summaryrefslogtreecommitdiff
path: root/searx/utils.py
diff options
context:
space:
mode:
authorJohannes 'josch' Schauer <josch@debian.org>2020-02-01 01:11:55 +0100
committerJohannes 'josch' Schauer <josch@debian.org>2020-02-01 01:11:55 +0100
commit61b98ca52e8d48a6ad3b4baed5feb4b38ee53804 (patch)
tree9f7ab24aabb0a3b25d125df81fff058ab5e63fe9 /searx/utils.py
parent3e49246c2e44159486ea66fed3757cdb4e4d0c50 (diff)
New upstream version 0.16.0+dfsg1
Diffstat (limited to 'searx/utils.py')
-rw-r--r--searx/utils.py84
1 files changed, 75 insertions, 9 deletions
diff --git a/searx/utils.py b/searx/utils.py
index dfa22c5..5ea9dc8 100644
--- a/searx/utils.py
+++ b/searx/utils.py
@@ -1,3 +1,4 @@
+# -*- coding: utf-8 -*-
import csv
import hashlib
import hmac
@@ -12,6 +13,7 @@ from numbers import Number
from os.path import splitext, join
from io import open
from random import choice
+from lxml.etree import XPath
import sys
import json
@@ -44,9 +46,15 @@ logger = logger.getChild('utils')
blocked_tags = ('script',
'style')
+ecma_unescape4_re = re.compile(r'%u([0-9a-fA-F]{4})', re.UNICODE)
+ecma_unescape2_re = re.compile(r'%([0-9a-fA-F]{2})', re.UNICODE)
+
useragents = json.loads(open(os.path.dirname(os.path.realpath(__file__))
+ "/data/useragents.json", 'r', encoding='utf-8').read())
+xpath_cache = dict()
+lang_to_lc_cache = dict()
+
def searx_useragent():
return 'searx/{searx_version} {suffix}'.format(
@@ -183,7 +191,7 @@ def get_resources_directory(searx_directory, subdirectory, resources_directory):
if not resources_directory:
resources_directory = os.path.join(searx_directory, subdirectory)
if not os.path.isdir(resources_directory):
- raise Exception(directory + " is not a directory")
+ raise Exception(resources_directory + " is not a directory")
return resources_directory
@@ -302,18 +310,30 @@ def int_or_zero(num):
def is_valid_lang(lang):
is_abbr = (len(lang) == 2)
+ lang = lang.lower().decode('utf-8')
if is_abbr:
for l in language_codes:
- if l[0][:2] == lang.lower():
+ if l[0][:2] == lang:
return (True, l[0][:2], l[3].lower())
return False
else:
for l in language_codes:
- if l[1].lower() == lang.lower():
+ if l[1].lower() == lang or l[3].lower() == lang:
return (True, l[0][:2], l[3].lower())
return False
+def _get_lang_to_lc_dict(lang_list):
+ key = str(lang_list)
+ value = lang_to_lc_cache.get(key, None)
+ if value is None:
+ value = dict()
+ for lc in lang_list:
+ value.setdefault(lc.split('-')[0], lc)
+ lang_to_lc_cache[key] = value
+ return value
+
+
# auxiliary function to match lang_code in lang_list
def _match_language(lang_code, lang_list=[], custom_aliases={}):
# replace language code with a custom alias if necessary
@@ -334,11 +354,7 @@ def _match_language(lang_code, lang_list=[], custom_aliases={}):
return new_code
# try to get the any supported country for this language
- for lc in lang_list:
- if lang_code == lc.split('-')[0]:
- return lc
-
- return None
+ return _get_lang_to_lc_dict(lang_list).get(lang_code, None)
# get the language code from lang_list that best matches locale_code
@@ -384,10 +400,17 @@ def load_module(filename, module_dir):
def new_hmac(secret_key, url):
+ try:
+ secret_key_bytes = bytes(secret_key, 'utf-8')
+ except TypeError as err:
+ if isinstance(secret_key, bytes):
+ secret_key_bytes = secret_key
+ else:
+ raise err
if sys.version_info[0] == 2:
return hmac.new(bytes(secret_key), url, hashlib.sha256).hexdigest()
else:
- return hmac.new(bytes(secret_key, 'utf-8'), url, hashlib.sha256).hexdigest()
+ return hmac.new(secret_key_bytes, url, hashlib.sha256).hexdigest()
def to_string(obj):
@@ -399,3 +422,46 @@ def to_string(obj):
return obj.__str__()
if hasattr(obj, '__repr__'):
return obj.__repr__()
+
+
+def ecma_unescape(s):
+ """
+ python implementation of the unescape javascript function
+
+ https://www.ecma-international.org/ecma-262/6.0/#sec-unescape-string
+ https://developer.mozilla.org/fr/docs/Web/JavaScript/Reference/Objets_globaux/unescape
+ """
+ # s = unicode(s)
+ # "%u5409" becomes "吉"
+ s = ecma_unescape4_re.sub(lambda e: unichr(int(e.group(1), 16)), s)
+ # "%20" becomes " ", "%F3" becomes "ó"
+ s = ecma_unescape2_re.sub(lambda e: unichr(int(e.group(1), 16)), s)
+ return s
+
+
+def get_engine_from_settings(name):
+ """Return engine configuration from settings.yml of a given engine name"""
+
+ if 'engines' not in settings:
+ return {}
+
+ for engine in settings['engines']:
+ if 'name' not in engine:
+ continue
+ if name == engine['name']:
+ return engine
+
+ return {}
+
+
+def get_xpath(xpath_str):
+ result = xpath_cache.get(xpath_str, None)
+ if result is None:
+ result = XPath(xpath_str)
+ xpath_cache[xpath_str] = result
+ return result
+
+
+def eval_xpath(element, xpath_str):
+ xpath = get_xpath(xpath_str)
+ return xpath(element)