[fix] add missing scheme to duplicated results too ++ revert gigablasts handling

This commit is contained in:
Adam Tauber 2015-09-11 18:33:06 +02:00
parent e3df22b140
commit 37c3ace309
2 changed files with 4 additions and 5 deletions

View File

@ -53,8 +53,6 @@ def response(resp):
# parse results
for result in dom.xpath(results_xpath):
url = result.xpath(url_xpath)[0].text
if not url.startswith('http://') and not url.startswith('https://'):
url = 'http://' + url
title = result.xpath(title_xpath)[0].text
content = escape(result.xpath(content_xpath)[0].text)

View File

@ -143,6 +143,10 @@ def score_results(results):
res['parsed_url'] = urlparse(res['url'])
# if the result has no scheme, use http as default
if not res['parsed_url'].scheme:
res['parsed_url'] = res['parsed_url']._replace(scheme="http")
res['host'] = res['parsed_url'].netloc
if res['host'].startswith('www.'):
@ -206,9 +210,6 @@ def score_results(results):
# if there is no duplicate found, append result
else:
res['score'] = score
# if the result has no scheme, use http as default
if res['parsed_url'].scheme == '':
res['parsed_url'] = res['parsed_url']._replace(scheme="http")
results.append(res)