diff options
Diffstat (limited to 'searx/webapp.py')
-rwxr-xr-x[-rw-r--r--] | searx/webapp.py | 669 |
1 files changed, 447 insertions, 222 deletions
diff --git a/searx/webapp.py b/searx/webapp.py index aadefe6..e37eaf7 100644..100755 --- a/searx/webapp.py +++ b/searx/webapp.py @@ -17,91 +17,83 @@ along with searx. If not, see < http://www.gnu.org/licenses/ >. (C) 2013- by Adam Tauber, <asciimoo@gmail.com> ''' +import sys +if sys.version_info[0] < 3: + print('\033[1;31m Python2 is no longer supported\033[0m') + exit(1) + if __name__ == '__main__': - from sys import path from os.path import realpath, dirname - path.append(realpath(dirname(realpath(__file__)) + '/../')) + sys.path.append(realpath(dirname(realpath(__file__)) + '/../')) import hashlib import hmac import json import os -import sys import requests from searx import logger logger = logger.getChild('webapp') -try: - from pygments import highlight - from pygments.lexers import get_lexer_by_name - from pygments.formatters import HtmlFormatter -except: - logger.critical("cannot import dependency: pygments") - from sys import exit - exit(1) -try: - from cgi import escape -except: - from html import escape from datetime import datetime, timedelta from time import time -from werkzeug.contrib.fixers import ProxyFix +from html import escape +from io import StringIO +import urllib +from urllib.parse import urlencode, urlparse + +from pygments import highlight +from pygments.lexers import get_lexer_by_name +from pygments.formatters import HtmlFormatter # pylint: disable=no-name-in-module + +from werkzeug.middleware.proxy_fix import ProxyFix from flask import ( Flask, request, render_template, url_for, Response, make_response, redirect, send_from_directory ) +from babel.support import Translations +import flask_babel from flask_babel import Babel, gettext, format_date, format_decimal +from flask.ctx import has_request_context from flask.json import jsonify +from searx import brand, static_path from searx import settings, searx_dir, searx_debug from searx.exceptions import SearxParameterException from searx.engines import ( - categories, engines, engine_shortcuts, get_engines_stats, initialize_engines + categories, engines, engine_shortcuts, get_engines_stats ) -from searx.utils import ( - UnicodeWriter, highlight_content, html_to_text, get_resources_directory, - get_static_files, get_result_templates, get_themes, gen_useragent, - dict_subset, prettify_url, match_language +from searx.webutils import ( + UnicodeWriter, highlight_content, get_resources_directory, + get_static_files, get_result_templates, get_themes, + prettify_url, new_hmac, is_flask_run_cmdline ) +from searx.webadapter import get_search_query_from_webapp, get_selected_categories +from searx.utils import html_to_text, gen_useragent, dict_subset, match_language from searx.version import VERSION_STRING from searx.languages import language_codes as languages -from searx.search import SearchWithPlugins, get_search_query_from_webapp +from searx.search import SearchWithPlugins, initialize as search_initialize +from searx.search.checker import get_result as checker_get_result from searx.query import RawTextQuery -from searx.autocomplete import searx_bang, backends as autocomplete_backends +from searx.autocomplete import search_autocomplete, backends as autocomplete_backends from searx.plugins import plugins from searx.plugins.oa_doi_rewrite import get_doi_resolver from searx.preferences import Preferences, ValidationException, LANGUAGE_CODES from searx.answerers import answerers -from searx.url_utils import urlencode, urlparse, urljoin -from searx.utils import new_hmac - -# check if the pyopenssl package is installed. -# It is needed for SSL connection without trouble, see #298 -try: - import OpenSSL.SSL # NOQA -except ImportError: - logger.critical("The pyopenssl package has to be installed.\n" - "Some HTTPS connections will fail") - -try: - from cStringIO import StringIO -except: - from io import StringIO - - -if sys.version_info[0] == 3: - unicode = str - PY3 = True -else: - PY3 = False - logger.warning('\033[1;31m *** Deprecation Warning ***\033[0m') - logger.warning('\033[1;31m Python2 is deprecated\033[0m') +from searx.poolrequests import get_global_proxies +from searx.answerers import ask +from searx.metrology.error_recorder import errors_per_engines +from searx.settings_loader import get_default_settings_path # serve pages with HTTP/1.1 from werkzeug.serving import WSGIRequestHandler WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server'].get('http_protocol_version', '1.0')) +# check secret_key +if not searx_debug and settings['server']['secret_key'] == 'ultrasecretkey': + logger.error('server.secret_key is not changed. Please use something else instead of ultrasecretkey.') + exit(1) + # about static static_path = get_resources_directory(searx_dir, 'static', settings['ui']['static_path']) logger.debug('static directory is %s', static_path) @@ -129,18 +121,31 @@ app = Flask( app.jinja_env.trim_blocks = True app.jinja_env.lstrip_blocks = True -app.jinja_env.add_extension('jinja2.ext.loopcontrols') +app.jinja_env.add_extension('jinja2.ext.loopcontrols') # pylint: disable=no-member app.secret_key = settings['server']['secret_key'] -if not searx_debug \ - or os.environ.get("WERKZEUG_RUN_MAIN") == "true" \ - or os.environ.get('UWSGI_ORIGINAL_PROC_NAME') is not None: - initialize_engines(settings['engines']) +# see https://flask.palletsprojects.com/en/1.1.x/cli/ +# True if "FLASK_APP=searx/webapp.py FLASK_ENV=development flask run" +flask_run_development = \ + os.environ.get("FLASK_APP") is not None\ + and os.environ.get("FLASK_ENV") == 'development'\ + and is_flask_run_cmdline() + +# True if reload feature is activated of werkzeug, False otherwise (including uwsgi, etc..) +# __name__ != "__main__" if searx.webapp is imported (make test, make docs, uwsgi...) +# see run() at the end of this file : searx_debug activates the reload feature. +werkzeug_reloader = flask_run_development or (searx_debug and __name__ == "__main__") + +# initialize the engines except on the first run of the werkzeug server. +if not werkzeug_reloader\ + or (werkzeug_reloader and os.environ.get("WERKZEUG_RUN_MAIN") == "true"): + search_initialize(enable_checker=True) babel = Babel(app) -rtl_locales = ['ar', 'arc', 'bcc', 'bqi', 'ckb', 'dv', 'fa', 'glk', 'he', +rtl_locales = ['ar', 'arc', 'bcc', 'bqi', 'ckb', 'dv', 'fa', 'fa_IR', 'glk', 'he', 'ku', 'mzn', 'pnb', 'ps', 'sd', 'ug', 'ur', 'yi'] +ui_locale_codes = [l.replace('_', '-') for l in settings['locales'].keys()] # used when translating category names _category_names = (gettext('files'), @@ -152,25 +157,65 @@ _category_names = (gettext('files'), gettext('it'), gettext('news'), gettext('map'), + gettext('onions'), gettext('science')) -outgoing_proxies = settings['outgoing'].get('proxies') or None +_flask_babel_get_translations = flask_babel.get_translations + + +# monkey patch for flask_babel.get_translations +def _get_translations(): + if has_request_context() and request.form.get('use-translation') == 'oc': + babel_ext = flask_babel.current_app.extensions['babel'] + return Translations.load(next(babel_ext.translation_directories), 'oc') + + return _flask_babel_get_translations() + + +flask_babel.get_translations = _get_translations + + +def _get_browser_or_settings_language(request, lang_list): + for lang in request.headers.get("Accept-Language", "en").split(","): + if ';' in lang: + lang = lang.split(';')[0] + if '-' in lang: + lang_parts = lang.split('-') + lang = "{}-{}".format(lang_parts[0], lang_parts[-1].upper()) + locale = match_language(lang, lang_list, fallback=None) + if locale is not None: + return locale + return settings['search']['default_lang'] or 'en' @babel.localeselector def get_locale(): if 'locale' in request.form\ and request.form['locale'] in settings['locales']: - return request.form['locale'] - - if 'locale' in request.args\ - and request.args['locale'] in settings['locales']: - return request.args['locale'] - - if request.preferences.get_value('locale') != '': - return request.preferences.get_value('locale') + # use locale from the form + locale = request.form['locale'] + locale_source = 'form' + elif request.preferences.get_value('locale') != '': + # use locale from the preferences + locale = request.preferences.get_value('locale') + locale_source = 'preferences' + else: + # use local from the browser + locale = _get_browser_or_settings_language(request, ui_locale_codes) + locale = locale.replace('-', '_') + locale_source = 'browser' + + # see _get_translations function + # and https://github.com/searx/searx/pull/1863 + if locale == 'oc': + request.form['use-translation'] = 'oc' + locale = 'fr_FR' + + logger.debug( + "%s uses locale `%s` from %s", urllib.parse.quote(request.url), locale, locale_source + ) - return request.accept_languages.best_match(settings['locales'].keys()) + return locale # code-highlighter @@ -202,7 +247,8 @@ def code_highlighter(codelines, language=None): # highlight last codepart formatter = HtmlFormatter(linenos='inline', - linenostart=line_code_start) + linenostart=line_code_start, + cssclass="code-highlight") html_code = html_code + highlight(tmp_code, lexer, formatter) # reset conditions for next codepart @@ -216,7 +262,7 @@ def code_highlighter(codelines, language=None): last_line = line # highlight last codepart - formatter = HtmlFormatter(linenos='inline', linenostart=line_code_start) + formatter = HtmlFormatter(linenos='inline', linenostart=line_code_start, cssclass="code-highlight") html_code = html_code + highlight(tmp_code, lexer, formatter) return html_code @@ -229,14 +275,7 @@ def extract_domain(url): def get_base_url(): - if settings['server']['base_url']: - hostname = settings['server']['base_url'] - else: - scheme = 'http' - if request.is_secure: - scheme = 'https' - hostname = url_for('index', _external=True, _scheme=scheme) - return hostname + return url_for('index', _external=True) def get_current_theme_name(override=None): @@ -268,7 +307,8 @@ def url_for_theme(endpoint, override_theme=None, **values): filename_with_theme = "themes/{}/{}".format(theme_name, values['filename']) if filename_with_theme in static_files: values['filename'] = filename_with_theme - return url_for(endpoint, **values) + url = url_for(endpoint, **values) + return url def proxify(url): @@ -278,11 +318,11 @@ def proxify(url): if not settings.get('result_proxy'): return url - url_params = dict(mortyurl=url.encode('utf-8')) + url_params = dict(mortyurl=url.encode()) if settings['result_proxy'].get('key'): url_params['mortyhash'] = hmac.new(settings['result_proxy']['key'], - url.encode('utf-8'), + url.encode(), hashlib.sha256).hexdigest() return '{0}?{1}'.format(settings['result_proxy']['url'], @@ -297,16 +337,30 @@ def image_proxify(url): if not request.preferences.get_value('image_proxy'): return url - if url.startswith('data:image/jpeg;base64,'): - return url + if url.startswith('data:image/'): + # 50 is an arbitrary number to get only the beginning of the image. + partial_base64 = url[len('data:image/'):50].split(';') + if len(partial_base64) == 2 \ + and partial_base64[0] in ['gif', 'png', 'jpeg', 'pjpeg', 'webp', 'tiff', 'bmp']\ + and partial_base64[1].startswith('base64,'): + return url + else: + return None if settings.get('result_proxy'): return proxify(url) - h = new_hmac(settings['server']['secret_key'], url.encode('utf-8')) + h = new_hmac(settings['server']['secret_key'], url.encode()) return '{0}?{1}'.format(url_for('image_proxy'), - urlencode(dict(url=url.encode('utf-8'), h=h))) + urlencode(dict(url=url.encode(), h=h))) + + +def get_translations(): + return { + # when there is autocompletion + 'no_item_found': gettext('No item found') + } def render(template_name, override_theme=None, **kwargs): @@ -317,51 +371,32 @@ def render(template_name, override_theme=None, **kwargs): if (engine_name, category) not in disabled_engines) if 'categories' not in kwargs: - kwargs['categories'] = ['general'] - kwargs['categories'].extend(x for x in - sorted(categories.keys()) - if x != 'general' - and x in enabled_categories) - - if 'all_categories' not in kwargs: - kwargs['all_categories'] = ['general'] - kwargs['all_categories'].extend(x for x in - sorted(categories.keys()) - if x != 'general') - - if 'selected_categories' not in kwargs: - kwargs['selected_categories'] = [] - for arg in request.args: - if arg.startswith('category_'): - c = arg.split('_', 1)[1] - if c in categories: - kwargs['selected_categories'].append(c) - - if not kwargs['selected_categories']: - cookie_categories = request.preferences.get_value('categories') - for ccateg in cookie_categories: - kwargs['selected_categories'].append(ccateg) - - if not kwargs['selected_categories']: - kwargs['selected_categories'] = ['general'] + kwargs['categories'] = [x for x in + _get_ordered_categories() + if x in enabled_categories] if 'autocomplete' not in kwargs: kwargs['autocomplete'] = request.preferences.get_value('autocomplete') - if get_locale() in rtl_locales and 'rtl' not in kwargs: + locale = request.preferences.get_value('locale') + + if locale in rtl_locales and 'rtl' not in kwargs: kwargs['rtl'] = True kwargs['searx_version'] = VERSION_STRING kwargs['method'] = request.preferences.get_value('method') + kwargs['autofocus'] = request.preferences.get_value('autofocus') + + kwargs['archive_today'] = request.preferences.get_value('archive_today') + kwargs['safesearch'] = str(request.preferences.get_value('safesearch')) kwargs['language_codes'] = languages if 'current_language' not in kwargs: kwargs['current_language'] = match_language(request.preferences.get_value('language'), - LANGUAGE_CODES, - fallback=settings['search']['language']) + LANGUAGE_CODES) # override url_for function in templates kwargs['url_for'] = url_for_theme @@ -370,6 +405,9 @@ def render(template_name, override_theme=None, **kwargs): kwargs['proxify'] = proxify if settings.get('result_proxy', {}).get('url') else None + kwargs['opensearch_url'] = url_for('opensearch') + '?' \ + + urlencode({'method': kwargs['method'], 'autocomplete': kwargs['autocomplete']}) + kwargs['get_result_template'] = get_result_template kwargs['theme'] = get_current_theme_name(override=override_theme) @@ -384,11 +422,14 @@ def render(template_name, override_theme=None, **kwargs): kwargs['results_on_new_tab'] = request.preferences.get_value('results_on_new_tab') - kwargs['unicode'] = unicode - kwargs['preferences'] = request.preferences + kwargs['brand'] = brand + + kwargs['translations'] = json.dumps(get_translations(), separators=(',', ':')) + kwargs['scripts'] = set() + kwargs['endpoint'] = 'results' if 'q' in kwargs else request.endpoint for plugin in request.user_plugins: for script in plugin.js_dependencies: kwargs['scripts'].add(script) @@ -402,6 +443,17 @@ def render(template_name, override_theme=None, **kwargs): '{}/{}'.format(kwargs['theme'], template_name), **kwargs) +def _get_ordered_categories(): + ordered_categories = [] + if 'categories_order' not in settings['ui']: + ordered_categories = ['general'] + ordered_categories.extend(x for x in sorted(categories.keys()) if x != 'general') + return ordered_categories + ordered_categories = settings['ui']['categories_order'] + ordered_categories.extend(x for x in sorted(categories.keys()) if x not in ordered_categories) + return ordered_categories + + @app.before_request def pre_request(): request.start_time = time() @@ -409,6 +461,9 @@ def pre_request(): request.errors = [] preferences = Preferences(themes, list(categories.keys()), engines, plugins) + user_agent = request.headers.get('User-Agent', '').lower() + if 'webkit' in user_agent and 'android' in user_agent: + preferences.key_value_settings['method'].value = 'GET' request.preferences = preferences try: preferences.parse_dict(request.cookies) @@ -427,10 +482,16 @@ def pre_request(): else: try: preferences.parse_dict(request.form) - except Exception as e: + except Exception: logger.exception('invalid settings') request.errors.append(gettext('Invalid settings')) + # init search language and locale + if not preferences.get_value("language"): + preferences.parse_dict({"language": _get_browser_or_settings_language(request, LANGUAGE_CODES)}) + if not preferences.get_value("locale"): + preferences.parse_dict({"locale": get_locale()}) + # request.user_plugins request.user_plugins = [] allowed_plugins = preferences.plugins.get_enabled() @@ -442,6 +503,16 @@ def pre_request(): @app.after_request +def add_default_headers(response): + # set default http headers + for header, value in settings['server'].get('default_http_headers', {}).items(): + if header in response.headers: + continue + response.headers[header] = value + return response + + +@app.after_request def post_request(response): total_time = time() - request.start_time timings_all = ['total;dur=' + str(round(total_time * 1000, 3))] @@ -481,13 +552,37 @@ def index_error(output_format, error_message): request.errors.append(gettext('search error')) return render( 'index.html', + selected_categories=get_selected_categories(request.preferences, request.form), ) -@app.route('/search', methods=['GET', 'POST']) @app.route('/', methods=['GET', 'POST']) def index(): - """Render index page. + """Render index page.""" + + # UI + advanced_search = request.preferences.get_value('advanced_search') + + # redirect to search if there's a query in the request + if request.form.get('q'): + query = ('?' + request.query_string.decode()) if request.query_string else '' + return redirect(url_for('search') + query, 308) + + return render( + 'index.html', + selected_categories=get_selected_categories(request.preferences, request.form), + advanced_search=advanced_search, + ) + + +@app.route('/healthz', methods=['GET']) +def health(): + return Response('OK', mimetype='text/plain') + + +@app.route('/search', methods=['GET', 'POST']) +def search(): + """Search query in q and return results. Supported outputs: html, json, csv, rss. """ @@ -497,11 +592,13 @@ def index(): if output_format not in ['html', 'csv', 'json', 'rss']: output_format = 'html' - # check if there is query - if request.form.get('q') is None: + # check if there is query (not None and not an empty string) + if not request.form.get('q'): if output_format == 'html': return render( 'index.html', + advanced_search=request.preferences.get_value('advanced_search'), + selected_categories=get_selected_categories(request.preferences, request.form), ) else: return index_error(output_format, 'No query'), 400 @@ -511,19 +608,18 @@ def index(): raw_text_query = None result_container = None try: - search_query, raw_text_query = get_search_query_from_webapp(request.preferences, request.form) + search_query, raw_text_query, _, _ = get_search_query_from_webapp(request.preferences, request.form) # search = Search(search_query) # without plugins search = SearchWithPlugins(search_query, request.user_plugins, request) + result_container = search.search() + + except SearxParameterException as e: + logger.exception('search error: SearxParameterException') + return index_error(output_format, e.message), 400 except Exception as e: - # log exception logger.exception('search error') - - # is it an invalid input parameter or something else ? - if (issubclass(e.__class__, SearxParameterException)): - return index_error(output_format, e.message), 400 - else: - return index_error(output_format, gettext('search error')), 500 + return index_error(output_format, gettext('search error')), 500 # results results = result_container.get_ordered_results() @@ -531,8 +627,9 @@ def index(): if number_of_results < result_container.results_length(): number_of_results = 0 - # UI - advanced_search = request.form.get('advanced_search', None) + # checkin for a external bang + if result_container.redirect_url: + return redirect(result_container.redirect_url) # Server-Timing header request.timings = result_container.get_timings() @@ -543,7 +640,7 @@ def index(): if 'content' in result and result['content']: result['content'] = highlight_content(escape(result['content'][:1024]), search_query.query) if 'title' in result and result['title']: - result['title'] = highlight_content(escape(result['title'] or u''), search_query.query) + result['title'] = highlight_content(escape(result['title'] or ''), search_query.query) else: if result.get('content'): result['content'] = html_to_text(result['content']).strip() @@ -554,7 +651,7 @@ def index(): result['pretty_url'] = prettify_url(result['url']) # TODO, check if timezone is calculated right - if 'publishedDate' in result: + if result.get('publishedDate'): # do not try to get a date from an empty string or a None type try: # test if publishedDate >= 1900 (datetime module bug) result['pubdate'] = result['publishedDate'].strftime('%Y-%m-%d %H:%M:%S%z') except ValueError: @@ -565,39 +662,53 @@ def index(): minutes = int((timedifference.seconds / 60) % 60) hours = int(timedifference.seconds / 60 / 60) if hours == 0: - result['publishedDate'] = gettext(u'{minutes} minute(s) ago').format(minutes=minutes) + result['publishedDate'] = gettext('{minutes} minute(s) ago').format(minutes=minutes) else: - result['publishedDate'] = gettext(u'{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes) # noqa + result['publishedDate'] = gettext('{hours} hour(s), {minutes} minute(s) ago').format(hours=hours, minutes=minutes) # noqa else: result['publishedDate'] = format_date(result['publishedDate']) if output_format == 'json': - return Response(json.dumps({'query': search_query.query.decode('utf-8'), + return Response(json.dumps({'query': search_query.query, 'number_of_results': number_of_results, 'results': results, 'answers': list(result_container.answers), 'corrections': list(result_container.corrections), 'infoboxes': result_container.infoboxes, 'suggestions': list(result_container.suggestions), - 'unresponsive_engines': list(result_container.unresponsive_engines)}, + 'unresponsive_engines': __get_translated_errors(result_container.unresponsive_engines)}, # noqa default=lambda item: list(item) if isinstance(item, set) else item), mimetype='application/json') elif output_format == 'csv': csv = UnicodeWriter(StringIO()) - keys = ('title', 'url', 'content', 'host', 'engine', 'score') + keys = ('title', 'url', 'content', 'host', 'engine', 'score', 'type') csv.writerow(keys) for row in results: row['host'] = row['parsed_url'].netloc + row['type'] = 'result' + csv.writerow([row.get(key, '') for key in keys]) + for a in result_container.answers: + row = {'title': a, 'type': 'answer'} + csv.writerow([row.get(key, '') for key in keys]) + for a in result_container.suggestions: + row = {'title': a, 'type': 'suggestion'} + csv.writerow([row.get(key, '') for key in keys]) + for a in result_container.corrections: + row = {'title': a, 'type': 'correction'} csv.writerow([row.get(key, '') for key in keys]) csv.stream.seek(0) response = Response(csv.stream.read(), mimetype='application/csv') cont_disp = 'attachment;Filename=searx_-_{0}.csv'.format(search_query.query) response.headers.add('Content-Disposition', cont_disp) return response + elif output_format == 'rss': response_rss = render( 'opensearch_response_rss.xml', results=results, + answers=result_container.answers, + corrections=result_container.corrections, + suggestions=result_container.suggestions, q=request.form['q'], number_of_results=number_of_results, base_url=get_base_url(), @@ -609,13 +720,13 @@ def index(): # suggestions: use RawTextQuery to get the suggestion URLs with the same bang suggestion_urls = list(map(lambda suggestion: { - 'url': raw_text_query.changeSearchQuery(suggestion).getFullQuery(), + 'url': raw_text_query.changeQuery(suggestion).getFullQuery(), 'title': suggestion }, result_container.suggestions)) correction_urls = list(map(lambda correction: { - 'url': raw_text_query.changeSearchQuery(correction).getFullQuery(), + 'url': raw_text_query.changeQuery(correction).getFullQuery(), 'title': correction }, result_container.corrections)) @@ -628,16 +739,16 @@ def index(): pageno=search_query.pageno, time_range=search_query.time_range, number_of_results=format_decimal(number_of_results), - advanced_search=advanced_search, suggestions=suggestion_urls, answers=result_container.answers, corrections=correction_urls, infoboxes=result_container.infoboxes, + engine_data=result_container.engine_data, paging=result_container.paging, - unresponsive_engines=result_container.unresponsive_engines, + unresponsive_engines=__get_translated_errors(result_container.unresponsive_engines), current_language=match_language(search_query.lang, LANGUAGE_CODES, - fallback=settings['search']['language']), + fallback=request.preferences.get_value("language")), base_url=get_base_url(), theme=get_current_theme_name(), favicons=global_favicons[themes.index(get_current_theme_name())], @@ -645,6 +756,16 @@ def index(): ) +def __get_translated_errors(unresponsive_engines): + translated_errors = set() + for unresponsive_engine in unresponsive_engines: + error_msg = gettext(unresponsive_engine[1]) + if unresponsive_engine[2]: + error_msg = "{} {}".format(error_msg, unresponsive_engine[2]) + translated_errors.add((unresponsive_engine[0], error_msg)) + return translated_errors + + @app.route('/about', methods=['GET']) def about(): """Render about page""" @@ -657,55 +778,54 @@ def about(): def autocompleter(): """Return autocompleter results""" + # run autocompleter + results = [] + # set blocked engines disabled_engines = request.preferences.engines.get_disabled() # parse query - if PY3: - raw_text_query = RawTextQuery(request.form.get('q', b''), disabled_engines) - else: - raw_text_query = RawTextQuery(request.form.get('q', u'').encode('utf-8'), disabled_engines) - raw_text_query.parse_query() - - # check if search query is set - if not raw_text_query.getSearchQuery(): - return '', 400 - - # run autocompleter - completer = autocomplete_backends.get(request.preferences.get_value('autocomplete')) - - # parse searx specific autocompleter results like !bang - raw_results = searx_bang(raw_text_query) + raw_text_query = RawTextQuery(request.form.get('q', ''), disabled_engines) + sug_prefix = raw_text_query.getQuery() # normal autocompletion results only appear if no inner results returned - # and there is a query part besides the engine and language bangs - if len(raw_results) == 0 and completer and (len(raw_text_query.query_parts) > 1 or - (len(raw_text_query.languages) == 0 and - not raw_text_query.specific)): + # and there is a query part + if len(raw_text_query.autocomplete_list) == 0 and len(sug_prefix) > 0: + # get language from cookie language = request.preferences.get_value('language') if not language or language == 'all': language = 'en' else: language = language.split('-')[0] - # run autocompletion - raw_results.extend(completer(raw_text_query.getSearchQuery(), language)) - - # parse results (write :language and !engine back to result string) - results = [] - for result in raw_results: - raw_text_query.changeSearchQuery(result) - - # add parsed result - results.append(raw_text_query.getFullQuery()) - # return autocompleter results - if request.form.get('format') == 'x-suggestions': - return Response(json.dumps([raw_text_query.query, results]), - mimetype='application/json') + # run autocompletion + raw_results = search_autocomplete( + request.preferences.get_value('autocomplete'), sug_prefix, language + ) + for result in raw_results: + # attention: this loop will change raw_text_query object and this is + # the reason why the sug_prefix was stored before (see above) + results.append(raw_text_query.changeQuery(result).getFullQuery()) + + if len(raw_text_query.autocomplete_list) > 0: + for autocomplete_text in raw_text_query.autocomplete_list: + results.append(raw_text_query.get_autocomplete_full_query(autocomplete_text)) + + for answers in ask(raw_text_query): + for answer in answers: + results.append(str(answer['answer'])) + + if request.headers.get('X-Requested-With') == 'XMLHttpRequest': + # the suggestion request comes from the searx search form + suggestions = json.dumps(results) + mimetype = 'application/json' + else: + # the suggestion request comes from browser's URL bar + suggestions = json.dumps([sug_prefix, results]) + mimetype = 'application/x-suggestions+json' - return Response(json.dumps(results), - mimetype='application/json') + return Response(suggestions, mimetype=mimetype) @app.route('/preferences', methods=['GET', 'POST']) @@ -714,7 +834,7 @@ def preferences(): # save preferences if request.method == 'POST': - resp = make_response(redirect(urljoin(settings['server']['base_url'], url_for('index')))) + resp = make_response(redirect(url_for('index', _external=True))) try: request.preferences.parse_form(request.form) except ValidationException: @@ -724,35 +844,46 @@ def preferences(): # render preferences image_proxy = request.preferences.get_value('image_proxy') - lang = request.preferences.get_value('language') disabled_engines = request.preferences.engines.get_disabled() allowed_plugins = request.preferences.plugins.get_enabled() # stats for preferences page stats = {} + engines_by_category = {} for c in categories: + engines_by_category[c] = [] for e in categories[c]: + if not request.preferences.validate_token(e): + continue + stats[e.name] = {'time': None, 'warn_timeout': False, 'warn_time': False} if e.timeout > settings['outgoing']['request_timeout']: stats[e.name]['warn_timeout'] = True stats[e.name]['supports_selected_language'] = _is_selected_language_supported(e, request.preferences) + engines_by_category[c].append(e) # get first element [0], the engine time, # and then the second element [1] : the time (the first one is the label) - for engine_stat in get_engines_stats()[0][1]: + for engine_stat in get_engines_stats(request.preferences)[0][1]: stats[engine_stat.get('name')]['time'] = round(engine_stat.get('avg'), 3) if engine_stat.get('avg') > settings['outgoing']['request_timeout']: stats[engine_stat.get('name')]['warn_time'] = True # end of stats + locked_preferences = list() + if 'preferences' in settings and 'lock' in settings['preferences']: + locked_preferences = settings['preferences']['lock'] + return render('preferences.html', + selected_categories=get_selected_categories(request.preferences, request.form), + all_categories=_get_ordered_categories(), locales=settings['locales'], - current_locale=get_locale(), + current_locale=request.preferences.get_value("locale"), image_proxy=image_proxy, - engines_by_category=categories, + engines_by_category=engines_by_category, stats=stats, answerers=[{'info': a.self_info(), 'keywords': a.keywords} for a in answerers], disabled_engines=disabled_engines, @@ -766,6 +897,7 @@ def preferences(): theme=get_current_theme_name(), preferences_url_params=request.preferences.get_as_url_params(), base_url=get_base_url(), + locked_preferences=locked_preferences, preferences=True) @@ -779,7 +911,7 @@ def _is_selected_language_supported(engine, preferences): @app.route('/image_proxy', methods=['GET']) def image_proxy(): - url = request.args.get('url').encode('utf-8') + url = request.args.get('url').encode() if not url: return '', 400 @@ -789,14 +921,20 @@ def image_proxy(): if h != request.args.get('h'): return '', 400 + headers = { + 'User-Agent': gen_useragent(), + 'Accept': 'image/webp,*/*', + 'Accept-Encoding': 'gzip, deflate', + 'Sec-GPC': '1', + 'DNT': '1', + } headers = dict_subset(request.headers, {'If-Modified-Since', 'If-None-Match'}) - headers['User-Agent'] = gen_useragent() resp = requests.get(url, stream=True, timeout=settings['outgoing']['request_timeout'], headers=headers, - proxies=outgoing_proxies) + proxies=get_global_proxies()) if resp.status_code == 304: return '', resp.status_code @@ -828,13 +966,49 @@ def image_proxy(): @app.route('/stats', methods=['GET']) def stats(): """Render engine statistics page.""" - stats = get_engines_stats() + if not settings['general'].get('enable_stats'): + return page_not_found(None) + stats = get_engines_stats(request.preferences) return render( 'stats.html', stats=stats, ) +@app.route('/stats/errors', methods=['GET']) +def stats_errors(): + result = {} + engine_names = list(errors_per_engines.keys()) + engine_names.sort() + for engine_name in engine_names: + error_stats = errors_per_engines[engine_name] + sent_search_count = max(engines[engine_name].stats['sent_search_count'], 1) + sorted_context_count_list = sorted(error_stats.items(), key=lambda context_count: context_count[1]) + r = [] + percentage_sum = 0 + for context, count in sorted_context_count_list: + percentage = round(20 * count / sent_search_count) * 5 + percentage_sum += percentage + r.append({ + 'filename': context.filename, + 'function': context.function, + 'line_no': context.line_no, + 'code': context.code, + 'exception_classname': context.exception_classname, + 'log_message': context.log_message, + 'log_parameters': context.log_parameters, + 'percentage': percentage, + }) + result[engine_name] = sorted(r, reverse=True, key=lambda d: d['percentage']) + return jsonify(result) + + +@app.route('/stats/checker', methods=['GET']) +def stats_checker(): + result = checker_get_result() + return jsonify(result) + + @app.route('/robots.txt', methods=['GET']) def robots(): return Response("""User-agent: * @@ -857,15 +1031,15 @@ def opensearch(): if request.headers.get('User-Agent', '').lower().find('webkit') >= 0: method = 'get' - ret = render('opensearch.xml', - opensearch_method=method, - host=get_base_url(), - urljoin=urljoin, - override_theme='__common__') + ret = render( + 'opensearch.xml', + opensearch_method=method, + override_theme='__common__' + ) resp = Response(response=ret, status=200, - mimetype="text/xml") + mimetype="application/opensearchdescription+xml") return resp @@ -882,7 +1056,7 @@ def favicon(): @app.route('/clear_cookies') def clear_cookies(): - resp = make_response(redirect(urljoin(settings['server']['base_url'], url_for('index')))) + resp = make_response(redirect(url_for('index', _external=True))) for cookie_name in request.cookies: resp.delete_cookie(cookie_name) return resp @@ -890,34 +1064,53 @@ def clear_cookies(): @app.route('/config') def config(): - return jsonify({'categories': list(categories.keys()), - 'engines': [{'name': engine_name, - 'categories': engine.categories, - 'shortcut': engine.shortcut, - 'enabled': not engine.disabled, - 'paging': engine.paging, - 'language_support': engine.language_support, - 'supported_languages': - list(engine.supported_languages.keys()) - if isinstance(engine.supported_languages, dict) - else engine.supported_languages, - 'safesearch': engine.safesearch, - 'time_range_support': engine.time_range_support, - 'timeout': engine.timeout} - for engine_name, engine in engines.items()], - 'plugins': [{'name': plugin.name, - 'enabled': plugin.default_on} - for plugin in plugins], - 'instance_name': settings['general']['instance_name'], - 'locales': settings['locales'], - 'default_locale': settings['ui']['default_locale'], - 'autocomplete': settings['search']['autocomplete'], - 'safe_search': settings['search']['safe_search'], - 'default_theme': settings['ui']['default_theme'], - 'version': VERSION_STRING, - 'doi_resolvers': [r for r in settings['doi_resolvers']], - 'default_doi_resolver': settings['default_doi_resolver'], - }) + """Return configuration in JSON format.""" + _engines = [] + for name, engine in engines.items(): + if not request.preferences.validate_token(engine): + continue + + supported_languages = engine.supported_languages + if isinstance(engine.supported_languages, dict): + supported_languages = list(engine.supported_languages.keys()) + + _engines.append({ + 'name': name, + 'categories': engine.categories, + 'shortcut': engine.shortcut, + 'enabled': not engine.disabled, + 'paging': engine.paging, + 'language_support': engine.language_support, + 'supported_languages': supported_languages, + 'safesearch': engine.safesearch, + 'time_range_support': engine.time_range_support, + 'timeout': engine.timeout + }) + + _plugins = [] + for _ in plugins: + _plugins.append({'name': _.name, 'enabled': _.default_on}) + + return jsonify({ + 'categories': list(categories.keys()), + 'engines': _engines, + 'plugins': _plugins, + 'instance_name': settings['general']['instance_name'], + 'locales': settings['locales'], + 'default_locale': settings['ui']['default_locale'], + 'autocomplete': settings['search']['autocomplete'], + 'safe_search': settings['search']['safe_search'], + 'default_theme': settings['ui']['default_theme'], + 'version': VERSION_STRING, + 'brand': { + 'CONTACT_URL': brand.CONTACT_URL, + 'GIT_URL': brand.GIT_URL, + 'GIT_BRANCH': brand.GIT_BRANCH, + 'DOCS_URL': brand.DOCS_URL + }, + 'doi_resolvers': [r for r in settings['doi_resolvers']], + 'default_doi_resolver': settings['default_doi_resolver'], + }) @app.errorhandler(404) @@ -932,11 +1125,21 @@ def run(): use_debugger=searx_debug, port=settings['server']['port'], host=settings['server']['bind_address'], - threaded=True + threaded=True, + extra_files=[ + get_default_settings_path() + ], ) -class ReverseProxyPathFix(object): +def patch_application(app): + # serve pages with HTTP/1.1 + WSGIRequestHandler.protocol_version = "HTTP/{}".format(settings['server']['http_protocol_version']) + # patch app to handle non root url-s behind proxy & wsgi + app.wsgi_app = ReverseProxyPathFix(ProxyFix(app.wsgi_app)) + + +class ReverseProxyPathFix: '''Wrap the application in this middleware and configure the front-end server to add these headers, to let you quietly bind this to a URL other than / and to an HTTP scheme that is @@ -957,19 +1160,41 @@ class ReverseProxyPathFix(object): ''' def __init__(self, app): + self.app = app + self.script_name = None + self.scheme = None + self.server = None + + if settings['server']['base_url']: + + # If base_url is specified, then these values from are given + # preference over any Flask's generics. + + base_url = urlparse(settings['server']['base_url']) + self.script_name = base_url.path + if self.script_name.endswith('/'): + # remove trailing slash to avoid infinite redirect on the index + # see https://github.com/searx/searx/issues/2729 + self.script_name = self.script_name[:-1] + self.scheme = base_url.scheme + self.server = base_url.netloc def __call__(self, environ, start_response): - script_name = environ.get('HTTP_X_SCRIPT_NAME', '') + script_name = self.script_name or environ.get('HTTP_X_SCRIPT_NAME', '') if script_name: environ['SCRIPT_NAME'] = script_name path_info = environ['PATH_INFO'] if path_info.startswith(script_name): environ['PATH_INFO'] = path_info[len(script_name):] - scheme = environ.get('HTTP_X_SCHEME', '') + scheme = self.scheme or environ.get('HTTP_X_SCHEME', '') if scheme: environ['wsgi.url_scheme'] = scheme + + server = self.server or environ.get('HTTP_X_FORWARDED_HOST', '') + if server: + environ['HTTP_HOST'] = server return self.app(environ, start_response) |