diff options
author | Johannes 'josch' Schauer <josch@debian.org> | 2020-02-01 01:09:45 +0100 |
---|---|---|
committer | Johannes 'josch' Schauer <josch@debian.org> | 2020-02-01 01:09:45 +0100 |
commit | 3e49246c2e44159486ea66fed3757cdb4e4d0c50 (patch) | |
tree | 73fecfac5dd7475f346d4bff59d78aec04a966a8 /searx/engines/qwant.py |
Import Upstream version 0.15.0+dfsg1
Diffstat (limited to 'searx/engines/qwant.py')
-rw-r--r-- | searx/engines/qwant.py | 132 |
1 files changed, 132 insertions, 0 deletions
diff --git a/searx/engines/qwant.py b/searx/engines/qwant.py new file mode 100644 index 0000000..4b0f1c8 --- /dev/null +++ b/searx/engines/qwant.py @@ -0,0 +1,132 @@ +""" + Qwant (Web, Images, News, Social) + + @website https://qwant.com/ + @provide-api not officially (https://api.qwant.com/api/search/) + + @using-api yes + @results JSON + @stable yes + @parse url, title, content +""" + +from datetime import datetime +from json import loads +from searx.utils import html_to_text +from searx.url_utils import urlencode +from searx.utils import match_language + +# engine dependent config +categories = None +paging = True +language_support = True +supported_languages_url = 'https://qwant.com/region' + +category_to_keyword = {'general': 'web', + 'images': 'images', + 'news': 'news', + 'social media': 'social'} + +# search-url +url = 'https://api.qwant.com/api/search/{keyword}?count=10&offset={offset}&f=&{query}&t={keyword}&uiv=4' + + +# do search-request +def request(query, params): + offset = (params['pageno'] - 1) * 10 + + if categories[0] and categories[0] in category_to_keyword: + + params['url'] = url.format(keyword=category_to_keyword[categories[0]], + query=urlencode({'q': query}), + offset=offset) + else: + params['url'] = url.format(keyword='web', + query=urlencode({'q': query}), + offset=offset) + + # add language tag + language = match_language(params['language'], supported_languages) + params['url'] += '&locale=' + language.replace('-', '_').lower() + + return params + + +# get response from search-request +def response(resp): + results = [] + + search_results = loads(resp.text) + + # return empty array if there are no results + if 'data' not in search_results: + return [] + + data = search_results.get('data', {}) + + res = data.get('result', {}) + + # parse results + for result in res.get('items', {}): + + title = html_to_text(result['title']) + res_url = result['url'] + content = html_to_text(result['desc']) + + if category_to_keyword.get(categories[0], '') == 'web': + results.append({'title': title, + 'content': content, + 'url': res_url}) + + elif category_to_keyword.get(categories[0], '') == 'images': + thumbnail_src = result['thumbnail'] + img_src = result['media'] + results.append({'template': 'images.html', + 'url': res_url, + 'title': title, + 'content': '', + 'thumbnail_src': thumbnail_src, + 'img_src': img_src}) + + elif category_to_keyword.get(categories[0], '') == 'social': + published_date = datetime.fromtimestamp(result['date'], None) + img_src = result.get('img', None) + results.append({'url': res_url, + 'title': title, + 'publishedDate': published_date, + 'content': content, + 'img_src': img_src}) + + elif category_to_keyword.get(categories[0], '') == 'news': + published_date = datetime.fromtimestamp(result['date'], None) + media = result.get('media', []) + if len(media) > 0: + img_src = media[0].get('pict', {}).get('url', None) + else: + img_src = None + results.append({'url': res_url, + 'title': title, + 'publishedDate': published_date, + 'content': content, + 'img_src': img_src}) + + return results + + +# get supported languages from their site +def _fetch_supported_languages(resp): + # list of regions is embedded in page as a js object + response_text = resp.text + response_text = response_text[response_text.find('regionalisation'):] + response_text = response_text[response_text.find('{'):response_text.find(');')] + + regions_json = loads(response_text) + + supported_languages = [] + for lang in regions_json['languages'].values(): + if lang['code'] == 'nb': + lang['code'] = 'no' + for country in lang['countries']: + supported_languages.append(lang['code'] + '-' + country) + + return supported_languages |