From a825690804e2f144d57715a41193b036dd206d0f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?No=C3=A9mi=20V=C3=A1nyi?= Date: Sun, 31 Jul 2022 17:37:48 +0200 Subject: [PATCH] Add search operators plugin (#3311) ## What does this PR do? This PR adds search operator plugin to searx. By default it is disabled because it removes results from your result set. Thus, you might end up with an empty result page with the additional filtering. ## Why is this change important? With all of its shortcomings, still is a nifty plugin. ## How to test this PR locally? ``` batman -site:imdb.com ``` Co-authored-by: DiamondDemon669 <62653580+DiamondDemon669@users.noreply.github.com> --- searx/plugins/__init__.py | 3 +++ searx/plugins/search_operators.py | 32 +++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+) create mode 100644 searx/plugins/search_operators.py diff --git a/searx/plugins/__init__.py b/searx/plugins/__init__.py index 65174c21..60cf3e8a 100644 --- a/searx/plugins/__init__.py +++ b/searx/plugins/__init__.py @@ -34,6 +34,7 @@ from searx.plugins import (oa_doi_rewrite, self_info, hostname_replace, search_on_category_select, + search_operators, tracker_url_remover, vim_hotkeys) @@ -171,8 +172,10 @@ plugins.register(infinite_scroll) plugins.register(self_info) plugins.register(hostname_replace) plugins.register(search_on_category_select) +plugins.register(search_operators) plugins.register(tracker_url_remover) plugins.register(vim_hotkeys) + # load external plugins if 'plugins' in settings: plugins.register(*settings['plugins'], external=True) diff --git a/searx/plugins/search_operators.py b/searx/plugins/search_operators.py new file mode 100644 index 00000000..59125daf --- /dev/null +++ b/searx/plugins/search_operators.py @@ -0,0 +1,32 @@ +import shlex +import string + +from flask_babel import gettext + +name = gettext("Search operators") +description = gettext("""Filter results using hyphen, site: and -site:. +Please note that you might get less results with the additional filtering.""") +default_on = False + + +def on_result(request, search, result): + q = search.search_query.query + qs = shlex.split(q) + spitems = [x.lower() for x in qs if ' ' in x] + mitems = [x.lower() for x in qs if x.startswith('-')] + siteitems = [x.lower() for x in qs if x.startswith('site:')] + msiteitems = [x.lower() for x in qs if x.startswith('-site:')] + url, title, content = ( + result["url"].lower(), + result["title"].lower(), + (result.get("content").lower() if result.get("content") else '') + ) + if all((x not in title or x not in content) for x in spitems): + return False + if all((x in title or x in content) for x in mitems): + return False + if all(x not in url for x in siteitems): + return False + if all(x in url for x in msiteitems): + return False + return True