summaryrefslogtreecommitdiff
path: root/searx/engines/google.py
diff options
context:
space:
mode:
authorJohannes 'josch' Schauer <josch@debian.org>2019-01-06 22:42:31 +0100
committerJohannes 'josch' Schauer <josch@debian.org>2019-01-06 23:42:19 +0100
commited48940eb3d6c12d9800bf7b490530d80c969e83 (patch)
treed8cb3d1506475576e85e5351a1e9b4e752326059 /searx/engines/google.py
parent728a7e0706c2d867a5de7247ea8c4c5f27bee346 (diff)
New upstream version 0.15.0+dfsg1
Diffstat (limited to 'searx/engines/google.py')
-rw-r--r--searx/engines/google.py34
1 files changed, 15 insertions, 19 deletions
diff --git a/searx/engines/google.py b/searx/engines/google.py
index 0a86783..62e7d11 100644
--- a/searx/engines/google.py
+++ b/searx/engines/google.py
@@ -14,6 +14,7 @@ from lxml import html, etree
from searx.engines.xpath import extract_text, extract_url
from searx import logger
from searx.url_utils import urlencode, urlparse, parse_qsl
+from searx.utils import match_language
logger = logger.getChild('google engine')
@@ -72,7 +73,7 @@ country_to_hostname = {
'RO': 'www.google.ro', # Romania
'RU': 'www.google.ru', # Russia
'SK': 'www.google.sk', # Slovakia
- 'SL': 'www.google.si', # Slovenia (SL -> si)
+ 'SI': 'www.google.si', # Slovenia
'SE': 'www.google.se', # Sweden
'TH': 'www.google.co.th', # Thailand
'TR': 'www.google.com.tr', # Turkey
@@ -90,7 +91,7 @@ url_map = 'https://www.openstreetmap.org/'\
search_path = '/search'
search_url = ('https://{hostname}' +
search_path +
- '?{query}&start={offset}&gws_rd=cr&gbv=1&lr={lang}&ei=x')
+ '?{query}&start={offset}&gws_rd=cr&gbv=1&lr={lang}&hl={lang_short}&ei=x')
time_range_search = "&tbs=qdr:{range}"
time_range_dict = {'day': 'd',
@@ -165,22 +166,16 @@ def extract_text_from_dom(result, xpath):
def request(query, params):
offset = (params['pageno'] - 1) * 10
- # temporary fix until a way of supporting en-US is found
- if params['language'] == 'en-US':
- params['language'] = 'en-GB'
-
- if params['language'][:2] == 'jv':
- language = 'jw'
- country = 'ID'
- url_lang = 'lang_jw'
+ language = match_language(params['language'], supported_languages)
+ language_array = language.split('-')
+ if params['language'].find('-') > 0:
+ country = params['language'].split('-')[1]
+ elif len(language_array) == 2:
+ country = language_array[1]
else:
- language_array = params['language'].lower().split('-')
- if len(language_array) == 2:
- country = language_array[1]
- else:
- country = 'US'
- language = language_array[0] + ',' + language_array[0] + '-' + country
- url_lang = 'lang_' + language_array[0]
+ country = 'US'
+
+ url_lang = 'lang_' + language
if use_locale_domain:
google_hostname = country_to_hostname.get(country.upper(), default_hostname)
@@ -192,11 +187,12 @@ def request(query, params):
params['url'] = search_url.format(offset=offset,
query=urlencode({'q': query}),
hostname=google_hostname,
- lang=url_lang)
+ lang=url_lang,
+ lang_short=language)
if params['time_range'] in time_range_dict:
params['url'] += time_range_search.format(range=time_range_dict[params['time_range']])
- params['headers']['Accept-Language'] = language
+ params['headers']['Accept-Language'] = language + ',' + language + '-' + country
params['headers']['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8'
params['google_hostname'] = google_hostname