summaryrefslogtreecommitdiff
path: root/searx/engines/bing_images.py
diff options
context:
space:
mode:
Diffstat (limited to 'searx/engines/bing_images.py')
-rw-r--r--searx/engines/bing_images.py64
1 files changed, 23 insertions, 41 deletions
diff --git a/searx/engines/bing_images.py b/searx/engines/bing_images.py
index 44e2c3b..4bee9bc 100644
--- a/searx/engines/bing_images.py
+++ b/searx/engines/bing_images.py
@@ -1,29 +1,31 @@
+# SPDX-License-Identifier: AGPL-3.0-or-later
"""
Bing (Images)
-
- @website https://www.bing.com/images
- @provide-api yes (http://datamarket.azure.com/dataset/bing/search),
- max. 5000 query/month
-
- @using-api no (because of query limit)
- @results HTML (using search portal)
- @stable no (HTML can change)
- @parse url, title, img_src
-
"""
+from urllib.parse import urlencode
from lxml import html
from json import loads
-import re
-from searx.url_utils import urlencode
from searx.utils import match_language
+from searx.engines.bing import language_aliases
+from searx.engines.bing import _fetch_supported_languages, supported_languages_url # NOQA # pylint: disable=unused-import
+
+# about
+about = {
+ "website": 'https://www.bing.com/images',
+ "wikidata_id": 'Q182496',
+ "official_api_documentation": 'https://www.microsoft.com/en-us/bing/apis/bing-image-search-api',
+ "use_official_api": False,
+ "require_api_key": False,
+ "results": 'HTML',
+}
+
# engine dependent config
categories = ['images']
paging = True
safesearch = True
time_range_support = True
-language_support = True
supported_languages_url = 'https://www.bing.com/account/general'
number_of_results = 28
@@ -33,7 +35,7 @@ search_string = 'images/search'\
'?{query}'\
'&count={count}'\
'&first={first}'\
- '&FORM=IBASEP'
+ '&tsc=ImageHoverTitle'
time_range_string = '&qft=+filterui:age-lt{interval}'
time_range_dict = {'day': '1440',
'week': '10080',
@@ -78,19 +80,18 @@ def response(resp):
# parse results
for result in dom.xpath('//div[@class="imgpt"]'):
-
- img_format = result.xpath('./div[contains(@class, "img_info")]/span/text()')[0]
- # Microsoft seems to experiment with this code so don't make the path too specific,
- # just catch the text section for the first anchor in img_info assuming this to be
- # the originating site.
- source = result.xpath('./div[contains(@class, "img_info")]//a/text()')[0]
-
try:
+ img_format = result.xpath('./div[contains(@class, "img_info")]/span/text()')[0]
+ # Microsoft seems to experiment with this code so don't make the path too specific,
+ # just catch the text section for the first anchor in img_info assuming this to be
+ # the originating site.
+ source = result.xpath('./div[contains(@class, "img_info")]//a/text()')[0]
+
m = loads(result.xpath('./a/@m')[0])
# strip 'Unicode private use area' highlighting, they render to Tux
# the Linux penguin and a standing diamond on my machine...
- title = m.get('t', '').replace(u'\ue000', '').replace(u'\ue001', '')
+ title = m.get('t', '').replace('\ue000', '').replace('\ue001', '')
results.append({'template': 'images.html',
'url': m['purl'],
'thumbnail_src': m['turl'],
@@ -103,22 +104,3 @@ def response(resp):
continue
return results
-
-
-# get supported languages from their site
-def _fetch_supported_languages(resp):
- supported_languages = []
- dom = html.fromstring(resp.text)
-
- regions_xpath = '//div[@id="region-section-content"]' \
- + '//ul[@class="b_vList"]/li/a/@href'
-
- regions = dom.xpath(regions_xpath)
- for region in regions:
- code = re.search('setmkt=[^\&]+', region).group()[7:]
- if code == 'nb-NO':
- code = 'no-NO'
-
- supported_languages.append(code)
-
- return supported_languages