diff options
author | Johannes 'josch' Schauer <josch@debian.org> | 2020-02-01 01:11:55 +0100 |
---|---|---|
committer | Johannes 'josch' Schauer <josch@debian.org> | 2020-02-01 01:11:55 +0100 |
commit | 61b98ca52e8d48a6ad3b4baed5feb4b38ee53804 (patch) | |
tree | 9f7ab24aabb0a3b25d125df81fff058ab5e63fe9 /searx/utils.py | |
parent | 3e49246c2e44159486ea66fed3757cdb4e4d0c50 (diff) |
New upstream version 0.16.0+dfsg1
Diffstat (limited to 'searx/utils.py')
-rw-r--r-- | searx/utils.py | 84 |
1 files changed, 75 insertions, 9 deletions
diff --git a/searx/utils.py b/searx/utils.py index dfa22c5..5ea9dc8 100644 --- a/searx/utils.py +++ b/searx/utils.py @@ -1,3 +1,4 @@ +# -*- coding: utf-8 -*- import csv import hashlib import hmac @@ -12,6 +13,7 @@ from numbers import Number from os.path import splitext, join from io import open from random import choice +from lxml.etree import XPath import sys import json @@ -44,9 +46,15 @@ logger = logger.getChild('utils') blocked_tags = ('script', 'style') +ecma_unescape4_re = re.compile(r'%u([0-9a-fA-F]{4})', re.UNICODE) +ecma_unescape2_re = re.compile(r'%([0-9a-fA-F]{2})', re.UNICODE) + useragents = json.loads(open(os.path.dirname(os.path.realpath(__file__)) + "/data/useragents.json", 'r', encoding='utf-8').read()) +xpath_cache = dict() +lang_to_lc_cache = dict() + def searx_useragent(): return 'searx/{searx_version} {suffix}'.format( @@ -183,7 +191,7 @@ def get_resources_directory(searx_directory, subdirectory, resources_directory): if not resources_directory: resources_directory = os.path.join(searx_directory, subdirectory) if not os.path.isdir(resources_directory): - raise Exception(directory + " is not a directory") + raise Exception(resources_directory + " is not a directory") return resources_directory @@ -302,18 +310,30 @@ def int_or_zero(num): def is_valid_lang(lang): is_abbr = (len(lang) == 2) + lang = lang.lower().decode('utf-8') if is_abbr: for l in language_codes: - if l[0][:2] == lang.lower(): + if l[0][:2] == lang: return (True, l[0][:2], l[3].lower()) return False else: for l in language_codes: - if l[1].lower() == lang.lower(): + if l[1].lower() == lang or l[3].lower() == lang: return (True, l[0][:2], l[3].lower()) return False +def _get_lang_to_lc_dict(lang_list): + key = str(lang_list) + value = lang_to_lc_cache.get(key, None) + if value is None: + value = dict() + for lc in lang_list: + value.setdefault(lc.split('-')[0], lc) + lang_to_lc_cache[key] = value + return value + + # auxiliary function to match lang_code in lang_list def _match_language(lang_code, lang_list=[], custom_aliases={}): # replace language code with a custom alias if necessary @@ -334,11 +354,7 @@ def _match_language(lang_code, lang_list=[], custom_aliases={}): return new_code # try to get the any supported country for this language - for lc in lang_list: - if lang_code == lc.split('-')[0]: - return lc - - return None + return _get_lang_to_lc_dict(lang_list).get(lang_code, None) # get the language code from lang_list that best matches locale_code @@ -384,10 +400,17 @@ def load_module(filename, module_dir): def new_hmac(secret_key, url): + try: + secret_key_bytes = bytes(secret_key, 'utf-8') + except TypeError as err: + if isinstance(secret_key, bytes): + secret_key_bytes = secret_key + else: + raise err if sys.version_info[0] == 2: return hmac.new(bytes(secret_key), url, hashlib.sha256).hexdigest() else: - return hmac.new(bytes(secret_key, 'utf-8'), url, hashlib.sha256).hexdigest() + return hmac.new(secret_key_bytes, url, hashlib.sha256).hexdigest() def to_string(obj): @@ -399,3 +422,46 @@ def to_string(obj): return obj.__str__() if hasattr(obj, '__repr__'): return obj.__repr__() + + +def ecma_unescape(s): + """ + python implementation of the unescape javascript function + + https://www.ecma-international.org/ecma-262/6.0/#sec-unescape-string + https://developer.mozilla.org/fr/docs/Web/JavaScript/Reference/Objets_globaux/unescape + """ + # s = unicode(s) + # "%u5409" becomes "吉" + s = ecma_unescape4_re.sub(lambda e: unichr(int(e.group(1), 16)), s) + # "%20" becomes " ", "%F3" becomes "ó" + s = ecma_unescape2_re.sub(lambda e: unichr(int(e.group(1), 16)), s) + return s + + +def get_engine_from_settings(name): + """Return engine configuration from settings.yml of a given engine name""" + + if 'engines' not in settings: + return {} + + for engine in settings['engines']: + if 'name' not in engine: + continue + if name == engine['name']: + return engine + + return {} + + +def get_xpath(xpath_str): + result = xpath_cache.get(xpath_str, None) + if result is None: + result = XPath(xpath_str) + xpath_cache[xpath_str] = result + return result + + +def eval_xpath(element, xpath_str): + xpath = get_xpath(xpath_str) + return xpath(element) |