diff --git a/searx/results.py b/searx/results.py index b3b87411..3acf1045 100644 --- a/searx/results.py +++ b/searx/results.py @@ -6,6 +6,7 @@ from urllib.parse import urlparse, unquote from searx import logger from searx.engines import engines from searx.metrology.error_recorder import record_error +from searx import settings CONTENT_LEN_IGNORED_CHARS_REGEX = re.compile(r'[,;:!?\./\\\\ ()-_]', re.M | re.U) @@ -129,13 +130,18 @@ def merge_two_infoboxes(infobox1, infobox2): infobox1['content'] = content2 -def result_score(result): +def result_score(result, language): weight = 1.0 for result_engine in result['engines']: if hasattr(engines[result_engine], 'weight'): weight *= float(engines[result_engine].weight) + if settings['search']['prefer_configured_language']: + domain_parts = result['parsed_url'].netloc.split('.') + if language in domain_parts: + weight *= 1.1 + occurences = len(result['positions']) return sum((occurences * weight) / position for position in result['positions']) @@ -145,9 +151,10 @@ class ResultContainer: """docstring for ResultContainer""" __slots__ = '_merged_results', 'infoboxes', 'suggestions', 'answers', 'corrections', '_number_of_results',\ - '_ordered', 'paging', 'unresponsive_engines', 'timings', 'redirect_url', 'engine_data' + '_ordered', 'paging', 'unresponsive_engines', 'timings', 'redirect_url', 'engine_data',\ + '_language' - def __init__(self): + def __init__(self, language): super().__init__() self._merged_results = [] self.infoboxes = [] @@ -161,6 +168,7 @@ class ResultContainer: self.unresponsive_engines = set() self.timings = [] self.redirect_url = None + self._language = language.lower().split('-')[0] def extend(self, engine_name, results): standard_result_count = 0 @@ -299,7 +307,7 @@ class ResultContainer: def order_results(self): for result in self._merged_results: - score = result_score(result) + score = result_score(result, self._language) result['score'] = score with RLock(): for result_engine in result['engines']: diff --git a/searx/search/__init__.py b/searx/search/__init__.py index a3c70866..f2f774bb 100644 --- a/searx/search/__init__.py +++ b/searx/search/__init__.py @@ -66,7 +66,7 @@ class Search: # init vars super().__init__() self.search_query = search_query - self.result_container = ResultContainer() + self.result_container = ResultContainer(search_query.lang) self.start_time = None self.actual_timeout = None diff --git a/searx/settings.yml b/searx/settings.yml index 591c819d..175a8656 100644 --- a/searx/settings.yml +++ b/searx/settings.yml @@ -19,6 +19,7 @@ search: default_lang : "" # Default search language - leave blank to detect from browser information or use codes from 'languages.py' ban_time_on_fail : 5 # ban time in seconds after engine errors max_ban_time_on_fail : 120 # max ban time in seconds after engine errors + prefer_configured_language: False # increase weight of results in confiugred language in ranking server: port : 8888 diff --git a/tests/unit/test_results.py b/tests/unit/test_results.py index 274b5b37..a1d9e673 100644 --- a/tests/unit/test_results.py +++ b/tests/unit/test_results.py @@ -20,22 +20,22 @@ def fake_result(url='https://aa.bb/cc?dd=ee#ff', class ResultContainerTestCase(SearxTestCase): def test_empty(self): - c = ResultContainer() + c = ResultContainer("en-US") self.assertEqual(c.get_ordered_results(), []) def test_one_result(self): - c = ResultContainer() + c = ResultContainer("en-US") c.extend('wikipedia', [fake_result()]) self.assertEqual(c.results_length(), 1) def test_one_suggestion(self): - c = ResultContainer() + c = ResultContainer("en-US") c.extend('wikipedia', [fake_result(suggestion=True)]) self.assertEqual(len(c.suggestions), 1) self.assertEqual(c.results_length(), 0) def test_result_merge(self): - c = ResultContainer() + c = ResultContainer("en-US") c.extend('wikipedia', [fake_result()]) c.extend('wikidata', [fake_result(), fake_result(url='https://example.com/')]) self.assertEqual(c.results_length(), 2)