2
0
mirror of https://github.com/searx/searx synced 2025-09-02 07:25:50 +00:00

[fix] skip non-complete google news results

This commit is contained in:
Adam Tauber
2017-01-10 11:03:05 +01:00
parent 94327d67fc
commit 108392f8da

View File

@@ -66,11 +66,14 @@ def response(resp):
# parse results # parse results
for result in dom.xpath('//div[@class="g"]|//div[@class="g _cy"]'): for result in dom.xpath('//div[@class="g"]|//div[@class="g _cy"]'):
try:
r = { r = {
'url': result.xpath('.//div[@class="_cnc"]//a/@href')[0], 'url': result.xpath('.//div[@class="_cnc"]//a/@href')[0],
'title': ''.join(result.xpath('.//div[@class="_cnc"]//h3//text()')), 'title': ''.join(result.xpath('.//div[@class="_cnc"]//h3//text()')),
'content': ''.join(result.xpath('.//div[@class="st"]//text()')), 'content': ''.join(result.xpath('.//div[@class="st"]//text()')),
} }
except:
continue
imgs = result.xpath('.//img/@src') imgs = result.xpath('.//img/@src')
if len(imgs) and not imgs[0].startswith('data'): if len(imgs) and not imgs[0].startswith('data'):