diff options
author | Johannes 'josch' Schauer <josch@mister-muffin.de> | 2017-06-16 15:18:31 +0200 |
---|---|---|
committer | Johannes 'josch' Schauer <josch@mister-muffin.de> | 2017-06-16 15:18:31 +0200 |
commit | 7fe1a5ea5ff4aeecbbc2af673cbdc88fbbea18d5 (patch) | |
tree | fecfa8408befea37218807ea487e1f954afb356c /searx/engines/soundcloud.py |
New upstream version 0.12.0+dfsg1
Diffstat (limited to 'searx/engines/soundcloud.py')
-rw-r--r-- | searx/engines/soundcloud.py | 104 |
1 files changed, 104 insertions, 0 deletions
diff --git a/searx/engines/soundcloud.py b/searx/engines/soundcloud.py new file mode 100644 index 0000000..41b40da --- /dev/null +++ b/searx/engines/soundcloud.py @@ -0,0 +1,104 @@ +""" + Soundcloud (Music) + + @website https://soundcloud.com + @provide-api yes (https://developers.soundcloud.com/) + + @using-api yes + @results JSON + @stable yes + @parse url, title, content, publishedDate, embedded +""" + +import re +from json import loads +from lxml import html +from dateutil import parser +from searx import logger +from searx.poolrequests import get as http_get +from searx.url_utils import quote_plus, urlencode + +try: + from cStringIO import StringIO +except: + from io import StringIO + +# engine dependent config +categories = ['music'] +paging = True + +# search-url +url = 'https://api.soundcloud.com/' +search_url = url + 'search?{query}'\ + '&facet=model'\ + '&limit=20'\ + '&offset={offset}'\ + '&linked_partitioning=1'\ + '&client_id={client_id}' # noqa + +embedded_url = '<iframe width="100%" height="166" ' +\ + 'scrolling="no" frameborder="no" ' +\ + 'data-src="https://w.soundcloud.com/player/?url={uri}"></iframe>' + +cid_re = re.compile(r'client_id:"([^"]*)"', re.I | re.U) + + +def get_client_id(): + response = http_get("https://soundcloud.com") + + if response.ok: + tree = html.fromstring(response.content) + script_tags = tree.xpath("//script[contains(@src, '/assets/app')]") + app_js_urls = [script_tag.get('src') for script_tag in script_tags if script_tag is not None] + + # extracts valid app_js urls from soundcloud.com content + for app_js_url in app_js_urls: + # gets app_js and searches for the clientid + response = http_get(app_js_url) + if response.ok: + cids = cid_re.search(response.text) + if cids is not None and len(cids.groups()): + return cids.groups()[0] + logger.warning("Unable to fetch guest client_id from SoundCloud, check parser!") + return "" + + +# api-key +guest_client_id = get_client_id() + + +# do search-request +def request(query, params): + offset = (params['pageno'] - 1) * 20 + + params['url'] = search_url.format(query=urlencode({'q': query}), + offset=offset, + client_id=guest_client_id) + + return params + + +# get response from search-request +def response(resp): + results = [] + + search_res = loads(resp.text) + + # parse results + for result in search_res.get('collection', []): + if result['kind'] in ('track', 'playlist'): + title = result['title'] + content = result['description'] + publishedDate = parser.parse(result['last_modified']) + uri = quote_plus(result['uri']) + embedded = embedded_url.format(uri=uri) + + # append result + results.append({'url': result['permalink_url'], + 'title': title, + 'publishedDate': publishedDate, + 'embedded': embedded, + 'content': content}) + + # return results + return results |