diff options
author | Johannes 'josch' Schauer <josch@debian.org> | 2020-11-10 13:17:03 +0100 |
---|---|---|
committer | Johannes 'josch' Schauer <josch@debian.org> | 2020-11-10 13:17:03 +0100 |
commit | daeb2c0913653d197fad2a75010cfc6034c6a9e8 (patch) | |
tree | d25328f98ca39a5ac8abad156b5e8f5781505a3c | |
parent | 432eb165b83d4483780a279b02929b05b3e09fa5 (diff) |
New upstream version 2020-11-10+dfsg1
80 files changed, 4052 insertions, 735 deletions
@@ -65,6 +65,7 @@ RSS-Bridge requires PHP 5.6 or higher with following extensions enabled: - [`simplexml`](https://secure.php.net/manual/en/book.simplexml.php) - [`curl`](https://secure.php.net/manual/en/book.curl.php) - [`json`](https://secure.php.net/manual/en/book.json.php) + - [`filter`](https://secure.php.net/manual/en/book.filter.php) - [`sqlite3`](http://php.net/manual/en/book.sqlite3.php) (only when using SQLiteCache) Find more information on our [Wiki](https://github.com/rss-bridge/rss-bridge/wiki) @@ -109,8 +110,8 @@ We are RSS-Bridge community, a group of developers continuing the project initia Use this script to generate the list automatically (using the GitHub API): https://gist.github.com/LogMANOriginal/da00cd1e5f0ca31cef8e193509b17fd8 --> + * [16mhz](https://github.com/16mhz) -* [86423355844265459587182778](https://github.com/86423355844265459587182778) * [adamchainz](https://github.com/adamchainz) * [Ahiles3005](https://github.com/Ahiles3005) * [Albirew](https://github.com/Albirew) @@ -119,9 +120,12 @@ https://gist.github.com/LogMANOriginal/da00cd1e5f0ca31cef8e193509b17fd8 * [alexAubin](https://github.com/alexAubin) * [AmauryCarrade](https://github.com/AmauryCarrade) * [AntoineTurmel](https://github.com/AntoineTurmel) +* [arnd-s](https://github.com/arnd-s) * [ArthurHoaro](https://github.com/ArthurHoaro) * [Astalaseven](https://github.com/Astalaseven) * [Astyan-42](https://github.com/Astyan-42) +* [AxorPL](https://github.com/AxorPL) +* [ayacoo](https://github.com/ayacoo) * [az5he6ch](https://github.com/az5he6ch) * [azdkj532](https://github.com/azdkj532) * [b1nj](https://github.com/b1nj) @@ -133,6 +137,7 @@ https://gist.github.com/LogMANOriginal/da00cd1e5f0ca31cef8e193509b17fd8 * [cnlpete](https://github.com/cnlpete) * [corenting](https://github.com/corenting) * [couraudt](https://github.com/couraudt) +* [csisoap](https://github.com/csisoap) * [cyberjacob](https://github.com/cyberjacob) * [da2x](https://github.com/da2x) * [Daiyousei](https://github.com/Daiyousei) @@ -147,29 +152,36 @@ https://gist.github.com/LogMANOriginal/da00cd1e5f0ca31cef8e193509b17fd8 * [em92](https://github.com/em92) * [eMerzh](https://github.com/eMerzh) * [EtienneM](https://github.com/EtienneM) +* [fanch317](https://github.com/fanch317) * [floviolleau](https://github.com/floviolleau) * [fluffy-critter](https://github.com/fluffy-critter) * [Frenzie](https://github.com/Frenzie) * [fulmeek](https://github.com/fulmeek) +* [ggiessen](https://github.com/ggiessen) * [Ginko-Aloe](https://github.com/Ginko-Aloe) * [Glandos](https://github.com/Glandos) * [gloony](https://github.com/gloony) * [GregThib](https://github.com/GregThib) * [griffaurel](https://github.com/griffaurel) * [Grummfy](https://github.com/Grummfy) +* [gsantner](https://github.com/gsantner) * [hunhejj](https://github.com/hunhejj) * [husim0](https://github.com/husim0) * [IceWreck](https://github.com/IceWreck) * [j0k3r](https://github.com/j0k3r) * [JackNUMBER](https://github.com/JackNUMBER) +* [jannyba](https://github.com/jannyba) +* [JasonGhent](https://github.com/JasonGhent) * [jdesgats](https://github.com/jdesgats) * [jdigilio](https://github.com/jdigilio) * [JeremyRand](https://github.com/JeremyRand) * [Jocker666z](https://github.com/Jocker666z) * [johnnygroovy](https://github.com/johnnygroovy) * [johnpc](https://github.com/johnpc) -* [killruana](https://github.com/killruana) +* [joni1993](https://github.com/joni1993) +* [joshcoales](https://github.com/joshcoales) * [klimplant](https://github.com/klimplant) +* [kolarcz](https://github.com/kolarcz) * [kranack](https://github.com/kranack) * [kraoc](https://github.com/kraoc) * [l1n](https://github.com/l1n) @@ -178,6 +190,7 @@ https://gist.github.com/LogMANOriginal/da00cd1e5f0ca31cef8e193509b17fd8 * [lalannev](https://github.com/lalannev) * [ldidry](https://github.com/ldidry) * [Leomaradan](https://github.com/Leomaradan) +* [liamka](https://github.com/liamka) * [Limero](https://github.com/Limero) * [LogMANOriginal](https://github.com/LogMANOriginal) * [lorenzos](https://github.com/lorenzos) @@ -188,17 +201,25 @@ https://gist.github.com/LogMANOriginal/da00cd1e5f0ca31cef8e193509b17fd8 * [mdemoss](https://github.com/mdemoss) * [melangue](https://github.com/melangue) * [metaMMA](https://github.com/metaMMA) +* [mibe](https://github.com/mibe) +* [mightymt](https://github.com/mightymt) * [mitsukarenai](https://github.com/mitsukarenai) * [MonsieurPoutounours](https://github.com/MonsieurPoutounours) +* [mr-flibble](https://github.com/mr-flibble) * [mro](https://github.com/mro) +* [mschwld](https://github.com/mschwld) * [mxmehl](https://github.com/mxmehl) * [nel50n](https://github.com/nel50n) * [niawag](https://github.com/niawag) +* [Niehztog](https://github.com/Niehztog) * [Nono-m0le](https://github.com/Nono-m0le) * [ObsidianWitch](https://github.com/ObsidianWitch) * [OliverParoczai](https://github.com/OliverParoczai) -* [oratosquilla-oratoria](https://github.com/oratosquilla-oratoria) +* [Ololbu](https://github.com/Ololbu) * [ORelio](https://github.com/ORelio) +* [otakuf](https://github.com/otakuf) +* [Park0](https://github.com/Park0) +* [Paroleen](https://github.com/Paroleen) * [PaulVayssiere](https://github.com/PaulVayssiere) * [pellaeon](https://github.com/pellaeon) * [Piranhaplant](https://github.com/Piranhaplant) @@ -208,18 +229,23 @@ https://gist.github.com/LogMANOriginal/da00cd1e5f0ca31cef8e193509b17fd8 * [Pofilo](https://github.com/Pofilo) * [prysme01](https://github.com/prysme01) * [quentinus95](https://github.com/quentinus95) +* [RawkBob](https://github.com/RawkBob) * [regisenguehard](https://github.com/regisenguehard) * [Riduidel](https://github.com/Riduidel) * [rogerdc](https://github.com/rogerdc) * [Roliga](https://github.com/Roliga) +* [ronansalmon](https://github.com/ronansalmon) +* [rremizov](https://github.com/rremizov) * [sebsauvage](https://github.com/sebsauvage) * [shutosg](https://github.com/shutosg) +* [Simounet](https://github.com/Simounet) * [somini](https://github.com/somini) * [squeek502](https://github.com/squeek502) * [stjohnjohnson](https://github.com/stjohnjohnson) * [Strubbl](https://github.com/Strubbl) * [sublimz](https://github.com/sublimz) * [sunchaserinfo](https://github.com/sunchaserinfo) +* [SuperSandro2000](https://github.com/SuperSandro2000) * [sysadminstory](https://github.com/sysadminstory) * [tameroski](https://github.com/tameroski) * [teromene](https://github.com/teromene) @@ -227,6 +253,7 @@ https://gist.github.com/LogMANOriginal/da00cd1e5f0ca31cef8e193509b17fd8 * [thefranke](https://github.com/thefranke) * [ThePadawan](https://github.com/ThePadawan) * [TheRadialActive](https://github.com/TheRadialActive) +* [theScrabi](https://github.com/theScrabi) * [TitiTestScalingo](https://github.com/TitiTestScalingo) * [triatic](https://github.com/triatic) * [VerifiedJoseph](https://github.com/VerifiedJoseph) @@ -234,8 +261,9 @@ https://gist.github.com/LogMANOriginal/da00cd1e5f0ca31cef8e193509b17fd8 * [wtuuju](https://github.com/wtuuju) * [xurxof](https://github.com/xurxof) * [yardenac](https://github.com/yardenac) +* [ymeister](https://github.com/ymeister) * [ZeNairolf](https://github.com/ZeNairolf) - + Licenses === @@ -243,6 +271,7 @@ The source code for RSS-Bridge is [Public Domain](UNLICENSE). RSS-Bridge uses third party libraries with their own license: + * [`Parsedown`](https://github.com/erusev/parsedown) licensed under the [MIT License](http://opensource.org/licenses/MIT) * [`PHP Simple HTML DOM Parser`](http://simplehtmldom.sourceforge.net/) licensed under the [MIT License](http://opensource.org/licenses/MIT) * [`php-urljoin`](https://github.com/fluffy-critter/php-urljoin) licensed under the [MIT License](http://opensource.org/licenses/MIT) diff --git a/bridges/ASRockNewsBridge.php b/bridges/ASRockNewsBridge.php new file mode 100644 index 0000000..1f3f4dd --- /dev/null +++ b/bridges/ASRockNewsBridge.php @@ -0,0 +1,57 @@ +<?php +class ASRockNewsBridge extends BridgeAbstract { + const NAME = 'ASRock News Bridge'; + const URI = 'https://www.asrock.com'; + const DESCRIPTION = 'Returns latest news articles'; + const MAINTAINER = 'VerifiedJoseph'; + const PARAMETERS = array(); + + const CACHE_TIMEOUT = 3600; // 1 hour + + public function collectData() { + + $html = getSimpleHTMLDOM(self::URI . '/news/index.asp') + or returnServerError('Could not request: ' . self::URI . '/news/index.asp'); + + $html = defaultLinkTo($html, self::URI . '/news/'); + + foreach($html->find('div.inner > a') as $index => $a) { + $item = array(); + + $articlePath = $a->href; + + $articlePageHtml = getSimpleHTMLDOMCached($articlePath, self::CACHE_TIMEOUT) + or returnServerError('Could not request: ' . $articlePath); + + $articlePageHtml = defaultLinkTo($articlePageHtml, self::URI); + + $contents = $articlePageHtml->find('div.Contents', 0); + + $item['uri'] = $articlePath; + $item['title'] = $contents->find('h5', 0)->innertext; + + $contents->find('h5', 0)->outertext = ''; + + $item['content'] = $contents->innertext; + $item['timestamp'] = $this->extractDate($a->plaintext); + $item['enclosures'][] = $a->find('img', 0)->src; + $this->items[] = $item; + + if (count($this->items) >= 10) { + break; + } + } + } + + private function extractDate($text) { + $dateRegex = '/^([0-9]{4}\/[0-9]{1,2}\/[0-9]{1,2})/'; + + $text = trim($text); + + if (preg_match($dateRegex, $text, $matches)) { + return $matches[1]; + } + + return ''; + } +} diff --git a/bridges/AirBreizhBridge.php b/bridges/AirBreizhBridge.php new file mode 100644 index 0000000..2d852da --- /dev/null +++ b/bridges/AirBreizhBridge.php @@ -0,0 +1,54 @@ +<?php +class AirBreizhBridge extends BridgeAbstract { + + const MAINTAINER = 'fanch317'; + const NAME = 'Air Breizh'; + const URI = 'https://www.airbreizh.asso.fr/'; + const DESCRIPTION = 'Returns newests publications on Air Breizh'; + const PARAMETERS = array( + 'Publications' => array( + 'theme' => array( + 'name' => 'Thematique', + 'type' => 'list', + 'values' => array( + 'Tout' => '', + 'Rapport d\'activite' => 'rapport-dactivite', + 'Etude' => 'etudes', + 'Information' => 'information', + 'Autres documents' => 'autres-documents', + 'Plan Régional de Surveillance de la qualité de l’air' => 'prsqa', + 'Transport' => 'transport' + ) + ) + ) + ); + + public function getIcon() { + return 'https://www.airbreizh.asso.fr/voy_content/uploads/2017/11/favicon.png'; + } + + public function collectData(){ + $html = ''; + $html = getSimpleHTMLDOM(static::URI . 'publications/?fwp_publications_thematiques=' . $this->getInput('theme')) + or returnClientError('No results for this query.'); + + foreach ($html->find('article') as $article) { + $item = array(); + // Title + $item['title'] = $article->find('h2', 0)->plaintext; + // Author + $item['author'] = 'Air Breizh'; + // Image + $imagelink = $article->find('.card__image', 0)->find('img', 0)->getAttribute('src'); + // Content preview + $item['content'] = '<img src="' . $imagelink . '" /> + <br/>' + . $article->find('.card__text', 0)->plaintext; + // URL + $item['uri'] = $article->find('.publi__buttons', 0)->find('a', 0)->getAttribute('href'); + // ID + $item['id'] = $article->find('.publi__buttons', 0)->find('a', 0)->getAttribute('href'); + $this->items[] = $item; + } + } +} diff --git a/bridges/AlbionOnlineBridge.php b/bridges/AlbionOnlineBridge.php new file mode 100644 index 0000000..0a93901 --- /dev/null +++ b/bridges/AlbionOnlineBridge.php @@ -0,0 +1,74 @@ +<?php +class AlbionOnlineBridge extends BridgeAbstract { + + const NAME = 'Albion Online Changelog'; + const MAINTAINER = 'otakuf'; + const URI = 'https://albiononline.com'; + const DESCRIPTION = 'Returns the changes made to the Albion Online'; + const CACHE_TIMEOUT = 3600; // 60min + + const PARAMETERS = array( array( + 'postcount' => array( + 'name' => 'Limit', + 'type' => 'number', + 'title' => 'Maximum number of items to return', + 'defaultValue' => 5, + ), + 'language' => array( + 'name' => 'Language', + 'type' => 'list', + 'values' => array( + 'English' => 'en', + 'Deutsch' => 'de', + 'Polski' => 'pl', + 'Français' => 'fr', + 'Русский' => 'ru', + 'Português' => 'pt', + 'Español' => 'es', + ), + 'title' => 'Language of changelog posts', + 'defaultValue' => 'en', + ), + 'full' => array( + 'name' => 'Full changelog', + 'type' => 'checkbox', + 'required' => false, + 'title' => 'Enable to receive the full changelog post for each item' + ), + )); + + public function collectData() { + $api = 'https://albiononline.com/'; + // Example: https://albiononline.com/en/changelog/1/5 + $url = $api . $this->getInput('language') . '/changelog/1/' . $this->getInput('postcount'); + + $html = getSimpleHTMLDOM($url) + or returnServerError('Unable to get changelog data from "' . $url . '"!'); + + foreach ($html->find('li') as $data) { + $item = array(); + $item['uri'] = self::URI . $data->find('a', 0)->getAttribute('href'); + $item['title'] = trim(explode('|', $data->find('span', 0)->plaintext)[0]); + // Time below work only with en lang. Need to think about solution. May be separate request like getFullChangelog, but to english list for all language + //print_r( date_parse_from_format( 'M j, Y' , 'Sep 9, 2020') ); + //$item['timestamp'] = $this->extractDate($a->plaintext); + $item['author'] = 'albiononline.com'; + if($this->getInput('full')) { + $item['content'] = $this->getFullChangelog($item['uri']); + } else { + //$item['content'] = trim(preg_replace('/\s+/', ' ', $data->find('span', 0)->plaintext)); + // Just use title, no info at all or use title and date, see above + $item['content'] = $item['title']; + } + $item['uid'] = hash('sha256', $item['title']); + $this->items[] = $item; + } + } + + private function getFullChangelog($url) { + $html = getSimpleHTMLDOMCached($url) + or returnServerError('Unable to load changelog post from "' . $url . '"!'); + $html = defaultLinkTo($html, self::URI); + return $html->find('div.small-12.columns', 1)->innertext; + } +} diff --git a/bridges/AllocineFRBridge.php b/bridges/AllocineFRBridge.php index 40ef9a9..00fd0e8 100644 --- a/bridges/AllocineFRBridge.php +++ b/bridges/AllocineFRBridge.php @@ -85,7 +85,7 @@ class AllocineFRBridge extends BridgeAbstract { self::PARAMETERS[$this->queriedContext]['category']['values'] ); - foreach($html->find('div[class=col-left]', 0)->find('div[class*=video-card]') as $element) { + foreach($html->find('div[class=gd-col-left]', 0)->find('div[class*=video-card]') as $element) { $item = array(); $title = $element->find('a[class*=meta-title-link]', 0); diff --git a/bridges/AnidexBridge.php b/bridges/AnidexBridge.php index ae387c9..ff9f5f9 100644 --- a/bridges/AnidexBridge.php +++ b/bridges/AnidexBridge.php @@ -3,7 +3,9 @@ class AnidexBridge extends BridgeAbstract { const MAINTAINER = 'ORelio'; const NAME = 'Anidex'; - const URI = 'https://anidex.info/'; + const URI = 'http://anidex.info/'; // anidex.info has ddos-guard so we need to use anidex.moe + const ALTERNATE_URI = 'https://anidex.moe/'; // anidex.moe returns 301 unless Host is set to anidex.info + const ALTERNATE_HOST = 'anidex.info'; // Correct host for requesting anidex.moe without 301 redirect const DESCRIPTION = 'Returns the newest torrents, with optional search criteria.'; const PARAMETERS = array( array( @@ -108,7 +110,7 @@ class AnidexBridge extends BridgeAbstract { public function collectData() { // Build Search URL from user-provided parameters - $search_url = self::URI . '?s=upload_timestamp&o=desc'; + $search_url = self::ALTERNATE_URI . '?s=upload_timestamp&o=desc'; foreach (array('id', 'lang_id', 'group_id') as $param_name) { $param = $this->getInput($param_name); if (!empty($param) && intval($param) != 0 && ctype_digit(str_replace(',', '', $param))) { @@ -131,8 +133,16 @@ class AnidexBridge extends BridgeAbstract { $opt[CURLOPT_COOKIE] = 'anidex_h_toggle=' . $h; } + // We need to use a different Host HTTP header to reach the correct page on ALTERNATE_URI + $headers = array('Host: ' . self::ALTERNATE_HOST); + + // The HTTPS certificate presented by anidex.moe is for anidex.info. We need to ignore this. + // As a consequence, the bridge is intentionally marked as insecure by setting self::URI to http:// + $opt[CURLOPT_SSL_VERIFYHOST] = 0; + $opt[CURLOPT_SSL_VERIFYPEER] = 0; + // Retrieve torrent listing from search results, which does not contain torrent description - $html = getSimpleHTMLDOM($search_url, array(), $opt) + $html = getSimpleHTMLDOM($search_url, $headers, $opt) or returnServerError('Could not request Anidex: ' . $search_url); $links = $html->find('a'); $results = array(); @@ -156,10 +166,11 @@ class AnidexBridge extends BridgeAbstract { if ($torrent_id != 0 && ctype_digit($torrent_id)) { //Retrieve data for this torrent ID - $item_uri = self::URI . 'torrent/' . $torrent_id; + $item_browse_uri = self::URI . 'torrent/' . $torrent_id; + $item_fetch_uri = self::ALTERNATE_URI . 'torrent/' . $torrent_id; - //Retrieve full description from torrent page - if ($item_html = getSimpleHTMLDOMCached($item_uri)) { + //Retrieve full description from torrent page (cached for 24 hours: 86400 seconds) + if ($item_html = getSimpleHTMLDOMCached($item_fetch_uri, 86400, $headers, $opt)) { //Retrieve data from page contents $item_title = str_replace(' (Torrent) - AniDex ', '', $item_html->find('title', 0)->plaintext); @@ -191,7 +202,7 @@ class AnidexBridge extends BridgeAbstract { //Build and add final item $item = array(); - $item['uri'] = $item_uri; + $item['uri'] = $item_browse_uri; $item['title'] = $item_title; $item['author'] = $item_author; $item['timestamp'] = $item_date; diff --git a/bridges/AnimeUltimeBridge.php b/bridges/AnimeUltimeBridge.php index bc1dd7b..c83d6dd 100644 --- a/bridges/AnimeUltimeBridge.php +++ b/bridges/AnimeUltimeBridge.php @@ -102,7 +102,6 @@ class AnimeUltimeBridge extends BridgeAbstract { $item_description = defaultLinkTo($item_description, self::URI); $item_description = str_replace("\r", '', $item_description); $item_description = str_replace("\n", '', $item_description); - $item_description = utf8_encode($item_description); //Build and add final item $item = array(); diff --git a/bridges/AutoJMBridge.php b/bridges/AutoJMBridge.php index 25fb2cb..b9825ca 100644 --- a/bridges/AutoJMBridge.php +++ b/bridges/AutoJMBridge.php @@ -77,110 +77,69 @@ class AutoJMBridge extends BridgeAbstract { $model_url = self::URI . $this->getInput('url'); - // Get the session cookies and the form token - $this->getInitialParameters($model_url); - - // Build the form - $post_data = array( - 'form[energy]' => $this->getInput('energy'), - 'form[transmission]' => $this->getInput('transmission'), - 'form[priceMin]' => $this->getInput('priceMin'), - 'form[priceMin]' => $this->getInput('priceMin'), - 'form[_token]' => $this->token - ); + // Build the GET data + $get_data = 'form[energy]=' . $this->getInput('energy') . + '&form[transmission]=' . $this->getInput('transmission') . + '&form[priceMin]=' . $this->getInput('priceMin') . + '&form[priceMin]=' . $this->getInput('priceMin'); - // Set the Form request content type + // Set the header 'X-Requested-With' like the website does it $header = array( - 'Content-Type: application/x-www-form-urlencoded; charset=UTF-8', - ); - - // Set the curl options (POST query and content, and session cookies - $curl_opts = array( - CURLOPT_POST => true, - CURLOPT_POSTFIELDS => http_build_query($post_data), - CURLOPT_COOKIE => $this->cookies + 'X-Requested-With: XMLHttpRequest' ); // Get the JSON content of the form - $json = getContents($model_url, $header, $curl_opts) + $json = getContents($model_url . '?' . $get_data, $header) or returnServerError('Could not request AutoJM.'); // Extract the HTML content from the JSON result $data = json_decode($json); - $html = str_get_html($data->content); - - // Go through every finisha of the model - $list = $html->find('h3'); - foreach ($list as $finish) { - $finish_name = $finish->plaintext; - $motorizations = $finish->next_sibling()->find('li'); - foreach ($motorizations as $element) { - $image = $element->find('div[class=block-product-image]', 0)->{'data-ga-banner'}; - $serie = $element->find('span[class=model]', 0)->plaintext; - $url = self::URI . substr($element->find('a', 0)->href, 1); - if ($element->find('span[class*=block-product-nbModel]', 0) != null) { - $availability = 'En Stock'; - } else { - $availability = 'Sur commande'; - } - $discount_html = $element->find('span[class*=tag--promo]', 0); - if ($discount_html != null) { - $discount = $discount_html->plaintext; - } else { - $discount = 'inconnue'; - } - $price = $element->find('span[class=price red h1]', 0)->plaintext; - $item = array(); - $item['title'] = $finish_name . ' ' . $serie; - $item['content'] = '<p><img style="vertical-align:middle ; padding: 10px" src="' . $image . '" />' - . $finish_name . ' ' . $serie . '</p>'; - $item['content'] .= '<ul><li>Disponibilité : ' . $availability . '</li>'; - $item['content'] .= '<li>Série : ' . $serie . '</li>'; - $item['content'] .= '<li>Remise : ' . $discount . '</li>'; - $item['content'] .= '<li>Prix : ' . $price . '</li></ul>'; - - // Add a fictionnal anchor to the RSS element URL, based on the item content ; - // As the URL could be identical even if the price change, some RSS reader will not show those offers as new items - $item['uri'] = $url . '#' . md5($item['content']); - - $this->items[] = $item; + $html = str_get_html($data->results); + + // Go through every car of the model + $list = $html->find('div[class=car-card]'); + foreach ($list as $car) { + + // Get the Finish name if this car is the first of a new finish + $prev_tag = $car->prev_sibling(); + if($prev_tag->tag == 'div' && $prev_tag->class == 'results-title') { + $finish_name = $prev_tag->plaintext; } - } - } - /** - * Gets the session cookie and the form token - * - * @param string $pageURL The URL from which to get the values - */ - private function getInitialParameters($pageURL) { - $ch = curl_init(); - curl_setopt($ch, CURLOPT_URL, $pageURL); - curl_setopt($ch, CURLOPT_HEADER, true); - curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); - $data = curl_exec($ch); - - // Separate the response header and the content - $headerSize = curl_getinfo($ch, CURLINFO_HEADER_SIZE); - $header = substr($data, 0, $headerSize); - $content = substr($data, $headerSize); - curl_close($ch); - - // Extract the cookies from the headers - $cookies = ''; - $http_response_header = explode("\r\n", $header); - foreach ($http_response_header as $hdr) { - if (strpos($hdr, 'Set-Cookie') !== false) { - $cLine = explode(':', $hdr)[1]; - $cLine = explode(';', $cLine)[0]; - $cookies .= ';' . $cLine; + // Get the info about the car offer + $image = $car->find('div[class=car-card__visual]', 0)->find('img', 0)->src; + $serie = $car->find('div[class=car-card__title]', 0)->plaintext; + $url = $car->find('a', 0)->href; + // Check if the car model is in stock or available only on order + if($car->find('span[class*=tag--dispo]', 0) != null) { + $availability = 'En Stock'; + } else { + $availability = 'Sur commande'; } + $discount_html = $car->find('span[class=promo]', 0); + // Check if there is any discount dsiplayed + if ($discount_html != null) { + $discount = $discount_html->plaintext; + } else { + $discount = 'inconnue'; + } + $price = $car->find('span[class=price]', 0)->plaintext; + + // Construct the new item + $item = array(); + $item['title'] = $finish_name . ' ' . $serie; + $item['content'] = '<p><img style="vertical-align:middle ; padding: 10px" src="' . $image . '" />' + . $finish_name . ' ' . $serie . '</p>'; + $item['content'] .= '<ul><li>Disponibilité : ' . $availability . '</li>'; + $item['content'] .= '<li>Série : ' . $serie . '</li>'; + $item['content'] .= '<li>Remise : ' . $discount . '</li>'; + $item['content'] .= '<li>Prix : ' . $price . '</li></ul>'; + + // Add a fictionnal anchor to the RSS element URL, based on the item content ; + // As the URL could be identical even if the price change, some RSS reader will not show those offers as new items + $item['uri'] = $url . '#' . md5($item['content']); + + $this->items[] = $item; } - $this->cookies = trim(substr($cookies, 1)); - - // Get the token from the content - $html = str_get_html($content); - $token = $html->find('input[type=hidden][id=form__token]', 0); - $this->token = $token->value; } } diff --git a/bridges/AwwwardsBridge.php b/bridges/AwwwardsBridge.php new file mode 100644 index 0000000..c1d1d32 --- /dev/null +++ b/bridges/AwwwardsBridge.php @@ -0,0 +1,55 @@ +<?php +class AwwwardsBridge extends BridgeAbstract { + const NAME = 'Awwwards'; + const URI = 'https://www.awwwards.com/'; + const DESCRIPTION = 'Fetches the latest ten sites of the day from Awwwards'; + const MAINTAINER = 'Paroleen'; + const CACHE_TIMEOUT = 3600; + + const SITESURI = 'https://www.awwwards.com/websites/sites_of_the_day/'; + const SITEURI = 'https://www.awwwards.com/sites/'; + const ASSETSURI = 'https://assets.awwwards.com/awards/media/cache/thumb_417_299/'; + + private $sites = array(); + + public function getIcon() { + return 'https://www.awwwards.com/favicon.ico'; + } + + private function fetchSites() { + Debug::log('Fetching all sites'); + $sites = getSimpleHTMLDOM(self::SITESURI) + or returnServerError('Could not fetch JSON for sites.'); + + Debug::log('Parsing all JSON data'); + foreach($sites->find('li[data-model]') as $site) { + $decode = html_entity_decode($site->attr['data-model'], + ENT_QUOTES, 'utf-8'); + $decode = json_decode($decode, true); + $this->sites[] = $decode; + } + } + + public function collectData() { + $this->fetchSites(); + + Debug::log('Building RSS feed'); + foreach($this->sites as $site) { + $item = array(); + $item['title'] = $site['title']; + $item['timestamp'] = $site['createdAt']; + $item['categories'] = $site['tags']; + + $item['content'] = '<img src="' + . self::ASSETSURI + . $site['images']['thumbnail'] + . '">'; + $item['uri'] = self::SITEURI . $site['slug']; + + $this->items[] = $item; + + if(count($this->items) >= 10) + break; + } + } +} diff --git a/bridges/BleepingComputerBridge.php b/bridges/BleepingComputerBridge.php new file mode 100644 index 0000000..78ec312 --- /dev/null +++ b/bridges/BleepingComputerBridge.php @@ -0,0 +1,29 @@ +<?php +class BleepingComputerBridge extends FeedExpander { + + const MAINTAINER = 'csisoap'; + const NAME = 'Bleeping Computer'; + const URI = 'https://www.bleepingcomputer.com/'; + const DESCRIPTION = 'Returns the newest articles.'; + + protected function parseItem($item){ + $item = parent::parseItem($item); + + $article_html = getSimpleHTMLDOMCached($item['uri']); + if(!$article_html) { + $item['content'] .= '<p><em>Could not request ' . $this->getName() . ': ' . $item['uri'] . '</em></p>'; + return $item; + } + + $article_content = $article_html->find('div.articleBody', 0)->innertext; + $article_content = stripRecursiveHTMLSection($article_content, 'div', '<div class="cz-related-article-wrapp'); + $item['content'] = trim($article_content); + + return $item; + } + + public function collectData(){ + $feed = static::URI . 'feed/'; + $this->collectExpandableDatas($feed); + } +} diff --git a/bridges/BlizzardNewsBridge.php b/bridges/BlizzardNewsBridge.php new file mode 100644 index 0000000..156dc29 --- /dev/null +++ b/bridges/BlizzardNewsBridge.php @@ -0,0 +1,60 @@ +<?php + +class BlizzardNewsBridge extends XPathAbstract { + + const NAME = 'Blizzard News'; + const URI = 'https://news.blizzard.com'; + const DESCRIPTION = 'Blizzard (game company) newsfeed'; + const MAINTAINER = 'Niehztog'; + const PARAMETERS = array( + '' => array( + 'locale' => array( + 'name' => 'Language', + 'type' => 'list', + 'values' => array( + 'Deutsch' => 'de-de', + 'English (EU)' => 'en-gb', + 'English (US)' => 'en-us', + 'Español (EU)' => 'es-es', + 'Español (AL)' => 'es-mx', + 'Français' => 'fr-fr', + 'Italiano' => 'it-it', + '日本語' => 'ja-jp', + '한국어' => 'ko-kr', + 'Polski' => 'pl-pl', + 'Português (AL)' => 'pt-br', + 'Русский' => 'ru-ru', + 'ภาษาไทย' => 'th-th', + '简体中文' => 'zh-cn', + '繁體中文' => 'zh-tw' + ), + 'defaultValue' => 'en-us', + 'title' => 'Select your language' + ) + ) + ); + const CACHE_TIMEOUT = 3600; + + const XPATH_EXPRESSION_ITEM = '/html/body/div/div[4]/div[2]/div[2]/div/div/section/ol/li/article'; + const XPATH_EXPRESSION_ITEM_TITLE = './/div/div[2]/h2'; + const XPATH_EXPRESSION_ITEM_CONTENT = './/div[@class="ArticleListItem-description"]/div[@class="h6"]'; + const XPATH_EXPRESSION_ITEM_URI = './/a[@class="ArticleLink ArticleLink"]/@href'; + const XPATH_EXPRESSION_ITEM_AUTHOR = ''; + const XPATH_EXPRESSION_ITEM_TIMESTAMP = './/time[@class="ArticleListItem-footerTimestamp"]/@timestamp'; + const XPATH_EXPRESSION_ITEM_ENCLOSURES = './/div[@class="ArticleListItem-image"]/@style'; + const XPATH_EXPRESSION_ITEM_CATEGORIES = './/div[@class="ArticleListItem-label"]'; + const SETTING_FIX_ENCODING = true; + + /** + * Source Web page URL (should provide either HTML or XML content) + * @return string + */ + protected function getSourceUrl(){ + + $locale = $this->getInput('locale'); + if('zh-cn' === $locale) { + return 'https://cn.news.blizzard.com'; + } + return 'https://news.blizzard.com/' . $locale; + } +} diff --git a/bridges/BrutBridge.php b/bridges/BrutBridge.php index 32265b6..d91ce97 100644 --- a/bridges/BrutBridge.php +++ b/bridges/BrutBridge.php @@ -16,6 +16,7 @@ class BrutBridge extends BridgeAbstract { 'Entertainment' => 'entertainment', 'Sports' => 'sport', 'Nature' => 'nature', + 'Health' => 'health', ), 'defaultValue' => 'news', ), @@ -26,6 +27,7 @@ class BrutBridge extends BridgeAbstract { 'United States' => 'us', 'United Kingdom' => 'uk', 'France' => 'fr', + 'Spain' => 'es', 'India' => 'in', 'Mexico' => 'mx', ), diff --git a/bridges/CeskaTelevizeBridge.php b/bridges/CeskaTelevizeBridge.php new file mode 100755 index 0000000..ea3a4bf --- /dev/null +++ b/bridges/CeskaTelevizeBridge.php @@ -0,0 +1,84 @@ +<?php + +class CeskaTelevizeBridge extends BridgeAbstract { + + const NAME = 'Česká televize Bridge'; + const URI = 'https://www.ceskatelevize.cz'; + const CACHE_TIMEOUT = 3600; + const DESCRIPTION = 'Return newest videos'; + const MAINTAINER = 'kolarcz'; + + const PARAMETERS = array( + array( + 'url' => array( + 'name' => 'url to the show', + 'required' => true, + 'exampleValue' => 'https://www.ceskatelevize.cz/porady/1097181328-udalosti/dily/' + ) + ) + ); + + private function fixChars($text) { + return html_entity_decode($text, ENT_QUOTES, 'UTF-8'); + } + + private function getUploadTimeFromString($string) { + if (strpos($string, 'dnes') !== false) { + return strtotime('today'); + } elseif (strpos($string, 'včera') !== false) { + return strtotime('yesterday'); + } elseif (!preg_match('/(\d+).\s(\d+).(\s(\d+))?/', $string, $match)) { + returnServerError('Could not get date from Česká televize string'); + } + + $date = sprintf('%04d-%02d-%02d', isset($match[3]) ? $match[3] : date('Y'), $match[2], $match[1]); + return strtotime($date); + } + + public function collectData() { + $url = $this->getInput('url'); + + $validUrl = '/^(https:\/\/www\.ceskatelevize\.cz\/porady\/\d+-[a-z0-9-]+\/)(dily\/((nove|vysilani)\/)?)?$/'; + if (!preg_match($validUrl, $url, $match)) { + returnServerError('Invalid url'); + } + + $category = isset($match[4]) ? $match[4] : 'nove'; + $fixedUrl = "{$match[1]}dily/{$category}/"; + + $html = getSimpleHTMLDOM($fixedUrl) + or returnServerError('Could not request Česká televize'); + + $this->feedUri = $fixedUrl; + $this->feedName = str_replace('Přehled dílů — ', '', $this->fixChars($html->find('title', 0)->plaintext)); + if ($category !== 'nove') { + $this->feedName .= " ({$category})"; + } + + foreach ($html->find('.episodes-broadcast-content a.episode_list_item') as $element) { + $itemTitle = $element->find('.episode_list_item-title', 0); + $itemContent = $element->find('.episode_list_item-desc', 0); + $itemDate = $element->find('.episode_list_item-date', 0); + $itemThumbnail = $element->find('img', 0); + $itemUri = self::URI . $element->getAttribute('href'); + + $item = array( + 'title' => $this->fixChars($itemTitle->plaintext), + 'uri' => $itemUri, + 'content' => '<img src="https:' . $itemThumbnail->getAttribute('src') . '" /><br />' + . $this->fixChars($itemContent->plaintext), + 'timestamp' => $this->getUploadTimeFromString($itemDate->plaintext) + ); + + $this->items[] = $item; + } + } + + public function getURI() { + return isset($this->feedUri) ? $this->feedUri : parent::getURI(); + } + + public function getName() { + return isset($this->feedName) ? $this->feedName : parent::getName(); + } +} diff --git a/bridges/DarkReadingBridge.php b/bridges/DarkReadingBridge.php index 3baaad7..6ab83e9 100644 --- a/bridges/DarkReadingBridge.php +++ b/bridges/DarkReadingBridge.php @@ -53,6 +53,8 @@ class DarkReadingBridge extends FeedExpander { protected function parseItem($newsItem){ $item = parent::parseItem($newsItem); + if (empty($item['content'])) + return null; //ignore dummy articles $article = getSimpleHTMLDOMCached($item['uri']) or returnServerError('Could not request Dark Reading: ' . $item['uri']); $item['content'] = $this->extractArticleContent($article); diff --git a/bridges/DevToBridge.php b/bridges/DevToBridge.php index c298d46..def7e76 100644 --- a/bridges/DevToBridge.php +++ b/bridges/DevToBridge.php @@ -45,24 +45,22 @@ apple-icon-5c6fa9f2bce280428589c6195b7f1924206a53b782b371cfe2d02da932c8c173.png' } public function collectData() { - $html = getSimpleHTMLDOMCached($this->getURI()) or returnServerError('Could not request ' . $this->getURI()); $html = defaultLinkTo($html, static::URI); - $articles = $html->find('div.single-article') + $articles = $html->find('div.crayons-story') or returnServerError('Could not find articles!'); foreach($articles as $article) { $item = array(); $item['uri'] = $article->find('a[id*=article-link]', 0)->href; - $item['title'] = $article->find('h3', 0)->plaintext; + $item['title'] = $article->find('h2 > a', 0)->plaintext; - // i.e. "Charlie Harrington・Sep 21" - $item['timestamp'] = strtotime(explode('・', $article->find('h4 a', 0)->plaintext, 2)[1]); - $item['author'] = explode('・', $article->find('h4 a', 0)->plaintext, 2)[0]; + $item['timestamp'] = $article->find('time', 0)->datetime; + $item['author'] = $article->find('a.crayons-story__secondary.fw-medium', 0)->plaintext; // Profile image $item['enclosures'] = array($article->find('img', 0)->src); @@ -70,7 +68,6 @@ apple-icon-5c6fa9f2bce280428589c6195b7f1924206a53b782b371cfe2d02da932c8c173.png' if($this->getInput('full')) { $fullArticle = $this->getFullArticle($item['uri']); $item['content'] = <<<EOD -<img src="{$item['enclosures'][0]}" alt="{$item['author']}"> <p>{$fullArticle}</p> EOD; } else { @@ -80,11 +77,13 @@ EOD; EOD; } - $item['categories'] = array_map(function($e){ return $e->plaintext; }, $article->find('div.tags span.tag')); + // categories + foreach ($article->find('a.crayons-tag') as $tag) { + $item['categories'][] = str_replace('#', '', $tag->plaintext); + } $this->items[] = $item; } - } public function getName() { @@ -101,6 +100,10 @@ EOD; $html = defaultLinkTo($html, static::URI); + if ($html->find('div.crayons-article__cover', 0)) { + return $html->find('div.crayons-article__cover', 0) . $html->find('[id="article-body"]', 0); + } + return $html->find('[id="article-body"]', 0); } } diff --git a/bridges/DiarioDeNoticiasBridge.php b/bridges/DiarioDeNoticiasBridge.php new file mode 100644 index 0000000..887eb11 --- /dev/null +++ b/bridges/DiarioDeNoticiasBridge.php @@ -0,0 +1,84 @@ +<?php +class DiarioDeNoticiasBridge extends BridgeAbstract { + const NAME = 'Diário de Notícias (PT)'; + const URI = 'https://dn.pt'; + const DESCRIPTION = 'Diário de Notícias (DN.PT)'; + const MAINTAINER = 'somini'; + const PARAMETERS = array( + 'Tag' => array( + 'n' => array( + 'name' => 'Tag Name', + 'exampleValue' => 'rogerio-casanova', + ) + ) + ); + + const MONPT = array( + 'jan', + 'fev', + 'mar', + 'abr', + 'mai', + 'jun', + 'jul', + 'ago', + 'set', + 'out', + 'nov', + 'dez', + ); + + public function getIcon() { + return 'https://static.globalnoticias.pt/dn/common/images/favicons/favicon-128.png'; + } + + public function getName() { + switch($this->queriedContext) { + case 'Tag': + $name = self::NAME . ' | Tag | ' . $this->getInput('n'); + break; + default: + $name = self::NAME; + } + return $name; + } + + public function getURI() { + switch($this->queriedContext) { + case 'Tag': + $url = self::URI . '/tag/' . $this->getInput('n') . '.html'; + break; + default: + $url = self::URI; + } + return $url; + } + + public function collectData() { + $archives = self::getURI(); + $html = getSimpleHTMLDOMCached($archives) + or returnServerError('Could not load content'); + + foreach($html->find('article') as $element) { + $item = array(); + + $title = $element->find('.t-am-title', 0); + $link = $element->find('a.t-am-text', 0); + + $item['title'] = $title->plaintext; + $item['uri'] = self::URI . $link->href; + + $snippet = $element->find('.t-am-lead', 0); + if ($snippet) { + $item['content'] = $snippet->plaintext; + } + preg_match('|edicao-do-dia\\/(?P<day>\d\d)-(?P<monpt>\w\w\w)-(?P<year>\d\d\d\d)|', $link->href, $d); + if ($d) { + $item['timestamp'] = sprintf('%s-%s-%s', $d['year'], array_search($d['monpt'], self::MONPT) + 1, $d['day']); + } + + $this->items[] = $item; + } + + } +} diff --git a/bridges/DownDetectorBridge.php b/bridges/DownDetectorBridge.php index 4aef372..bfbce69 100644 --- a/bridges/DownDetectorBridge.php +++ b/bridges/DownDetectorBridge.php @@ -6125,9 +6125,16 @@ class DownDetectorBridge extends BridgeAbstract { $table = $html->find('table.table-striped', 0); $maxCount = 10; - foreach ($table->find('tr') as $downEvent) { - $downLink = $downEvent->find('td', 1)->find('a', 1); - $item = $this->collectArticleData($downLink->getAttribute('href')); + foreach ($table->find('tr') as $event) { + $td = $event->find('td', 0); + + if (is_null($td)) { + continue; + } + + $link = $event->find('td', 0)->find('a', 0); + + $item = $this->collectArticleData($link->getAttribute('href')); $this->items[] = $item; if($maxCount == 0) break; $maxCount -= 1; diff --git a/bridges/DribbbleBridge.php b/bridges/DribbbleBridge.php index b1193c9..01cfb21 100644 --- a/bridges/DribbbleBridge.php +++ b/bridges/DribbbleBridge.php @@ -13,7 +13,7 @@ favicon-63b2904a073c89b52b19aa08cebc16a154bcf83fee8ecc6439968b1e6db569c7.ico'; } public function collectData(){ - $html = getSimpleHTMLDOM(self::URI . '/shots') + $html = getSimpleHTMLDOM(self::URI) or returnServerError('Error while downloading the website content'); $json = $this->loadEmbeddedJsonData($html); @@ -24,19 +24,19 @@ favicon-63b2904a073c89b52b19aa08cebc16a154bcf83fee8ecc6439968b1e6db569c7.ico'; $additional_data = $this->findJsonForShot($shot, $json); if ($additional_data === null) { $item['uri'] = self::URI . $shot->find('a', 0)->href; - $item['title'] = $shot->find('.dribbble-over strong', 0)->plaintext; + $item['title'] = $shot->find('.shot-title', 0)->plaintext; } else { $item['timestamp'] = strtotime($additional_data['published_at']); $item['uri'] = self::URI . $additional_data['path']; $item['title'] = $additional_data['title']; } - $item['author'] = trim($shot->find('.attribution-user a', 0)->plaintext); + $item['author'] = trim($shot->find('.user-information .display-name', 0)->plaintext); $description = $shot->find('.comment', 0); $item['content'] = $description === null ? '' : $description->plaintext; - $preview_path = $shot->find('picture source', 0)->attr['srcset']; + $preview_path = $shot->find('figure img', 1)->attr['data-srcset']; $item['content'] .= $this->getImageTag($preview_path, $item['title']); $item['enclosures'] = array($this->getFullSizeImagePath($preview_path)); @@ -51,10 +51,13 @@ favicon-63b2904a073c89b52b19aa08cebc16a154bcf83fee8ecc6439968b1e6db569c7.ico'; foreach($scripts as $script) { if(strpos($script->innertext, 'newestShots') !== false) { // fix single quotes - $script->innertext = str_replace('\'', '"', $script->innertext); + $script->innertext = preg_replace('/\'(.*)\'(,?)$/im', '"\1"\2', $script->innertext); // fix JavaScript JSON (why do they not adhere to the standard?) - $script->innertext = preg_replace('/(\w+):/i', '"\1":', $script->innertext); + $script->innertext = preg_replace('/^(\s*)(\w+):/im', '\1"\2":', $script->innertext); + + // fix relative dates, so they are recognized by strtotime + $script->innertext = preg_replace('/"about ([0-9]+ hours? ago)"(,?)$/im', '"\1"\2', $script->innertext); // find beginning of JSON array $start = strpos($script->innertext, '['); @@ -91,6 +94,6 @@ favicon-63b2904a073c89b52b19aa08cebc16a154bcf83fee8ecc6439968b1e6db569c7.ico'; } private function getFullSizeImagePath($preview_path){ - return str_replace('_1x', '', $preview_path); + return explode('?compress=1', $preview_path)[0]; } } diff --git a/bridges/EpicgamesBridge.php b/bridges/EpicgamesBridge.php new file mode 100644 index 0000000..e6ba542 --- /dev/null +++ b/bridges/EpicgamesBridge.php @@ -0,0 +1,93 @@ +<?php +class EpicgamesBridge extends BridgeAbstract { + + const NAME = 'Epic Games Store News'; + const MAINTAINER = 'otakuf'; + const URI = 'https://www.epicgames.com'; + const DESCRIPTION = 'Returns the latest posts from epicgames.com'; + const CACHE_TIMEOUT = 3600; // 60min + + const PARAMETERS = array( array( + 'postcount' => array( + 'name' => 'Limit', + 'type' => 'number', + 'title' => 'Maximum number of items to return', + 'defaultValue' => 10, + ), + 'language' => array( + 'name' => 'Language', + 'type' => 'list', + 'values' => array( + 'English' => 'en', + 'العربية' => 'ar', + 'Deutsch' => 'de', + 'Español (Spain)' => 'es-ES', + 'Español (LA)' => 'es-MX', + 'Français' => 'fr', + 'Italiano' => 'it', + '日本語' => 'ja', + '한국어' => 'ko', + 'Polski' => 'pl', + 'Português (Brasil)' => 'pt-BR', + 'Русский' => 'ru', + 'ไทย' => 'th', + 'Türkçe' => 'tr', + '简体中文' => 'zh-CN', + '繁體中文' => 'zh-Hant', + ), + 'title' => 'Language of blog posts', + 'defaultValue' => 'en', + ), + )); + + public function collectData() { + $api = 'https://store-content.ak.epicgames.com/api/'; + + // Get sticky posts first + // Example: https://store-content.ak.epicgames.com/api/ru/content/blog/sticky?locale=ru + $urlSticky = $api . $this->getInput('language') . '/content/blog/sticky'; + // Then get posts + // Example: https://store-content.ak.epicgames.com/api/ru/content/blog?limit=25 + $urlBlog = $api . $this->getInput('language') . '/content/blog?limit=' . $this->getInput('postcount'); + + $dataSticky = getContents($urlSticky) + or returnServerError('Unable to get the sticky posts from epicgames.com!'); + $dataBlog = getContents($urlBlog) + or returnServerError('Unable to get the news posts from epicgames.com!'); + + // Merge data + $decodedData = array_merge(json_decode($dataSticky), json_decode($dataBlog)); + + foreach($decodedData as $key => $value) { + $item = array(); + $item['uri'] = self::URI . $value->url; + $item['title'] = $value->title; + $item['timestamp'] = $value->date; + $item['author'] = 'Epic Games Store'; + if(!empty($value->author)) { + $item['author'] = $value->author; + } + if(!empty($value->content)) { + $item['content'] = defaultLinkTo($value->content, self::URI); + } + if(!empty($value->image)) { + $item['enclosures'][] = $value->image; + } + $item['uid'] = $value->_id; + $item['id'] = $value->_id; + + $this->items[] = $item; + } + + // Sort data + usort($this->items, function ($item1, $item2) { + if ($item2['timestamp'] == $item1['timestamp']) { + return 0; + } + return ($item2['timestamp'] < $item1['timestamp']) ? -1 : 1; + }); + + // Limit data + $this->items = array_slice($this->items, 0, $this->getInput('postcount')); + } +} diff --git a/bridges/FM4Bridge.php b/bridges/FM4Bridge.php new file mode 100644 index 0000000..e129c5c --- /dev/null +++ b/bridges/FM4Bridge.php @@ -0,0 +1,67 @@ +<?php + +class FM4Bridge extends BridgeAbstract +{ + const MAINTAINER = 'joni1993'; + const NAME = 'FM4 Bridge'; + const URI = 'https://fm4.orf.at'; + const CACHE_TIMEOUT = 1800; // 30min + const DESCRIPTION = 'Feed for FM4 articles by tags (authors)'; + const PARAMETERS = array( + array( + 'tag' => array( + 'name' => 'Tag (author, category, ...)', + 'title' => 'Tag to retrieve', + 'exampleValue' => 'musik' + ), + 'loadcontent' => array( + 'name' => 'Load Full Article Content', + 'title' => 'Retrieve full content of articles (may take longer)', + 'type' => 'checkbox' + ), + 'pages' => array( + 'name' => 'Pages', + 'title' => 'Amount of pages to load', + 'type' => 'number', + 'defaultValue' => 1 + ) + ) + ); + + private function getPageData($tag, $page) { + if($tag) + $uri = self::URI . '/tags/' . $tag; + else + $uri = self::URI; + + $uri = $uri . '?page=' . $page; + + $html = getSimpleHTMLDOM($uri) + or returnServerError('Error while downloading the website content'); + + $page_items = array(); + + foreach ($html->find('div[class*=listItem]') as $article) { + $item = array(); + + $item['uri'] = $article->find('a', 0)->href; + $item['title'] = $article->find('h2', 0)->plaintext; + $item['author'] = $article->find('p[class*=keyword]', 0)->plaintext; + $item['timestamp'] = strtotime($article->find('p[class*=time]', 0)->plaintext); + + if ($this->getInput('loadcontent')) { + $item['content'] = getSimpleHTMLDOM($item['uri'])->find('div[class=storyText]', 0)->innertext + or returnServerError('Error while downloading the full article'); + } + + $page_items[] = $item; + } + return $page_items; + } + + public function collectData() { + for ($cur_page = 1; $cur_page <= $this->getInput('pages'); $cur_page++) { + $this->items = array_merge($this->items, $this->getPageData($this->getInput('tag'), $cur_page)); + } + } +} diff --git a/bridges/FacebookBridge.php b/bridges/FacebookBridge.php index 13ccb27..c03de4e 100644 --- a/bridges/FacebookBridge.php +++ b/bridges/FacebookBridge.php @@ -30,7 +30,7 @@ class FacebookBridge extends BridgeAbstract { 'type' => 'checkbox', 'required' => false, 'defaultValue' => false, - 'title' => 'Feed includes reviews when checked' + 'title' => 'Feed includes reviews when unchecked' ) ), 'Group' => array( @@ -175,7 +175,13 @@ class FacebookBridge extends BridgeAbstract { $header = array(); } - $html = getSimpleHTMLDOM($this->getURI(), $header) + $touchURI = str_replace( + 'https://www.facebook', + 'https://touch.facebook', + $this->getURI() + ); + + $html = getSimpleHTMLDOM($touchURI, $header) or returnServerError('Failed loading facebook page: ' . $this->getURI()); if(!$this->isPublicGroup($html)) { @@ -186,19 +192,18 @@ class FacebookBridge extends BridgeAbstract { $this->groupName = $this->extractGroupName($html); - $posts = $html->find('div.userContentWrapper') + $posts = $html->find('div.story_body_container') or returnServerError('Failed finding posts!'); foreach($posts as $post) { $item = array(); - $item['uri'] = $this->extractGroupURI($post); - $item['title'] = $this->extractGroupTitle($post); - $item['author'] = $this->extractGroupAuthor($post); - $item['content'] = $this->extractGroupContent($post); - $item['timestamp'] = $this->extractGroupTimestamp($post); - $item['enclosures'] = $this->extractGroupEnclosures($post); + $item['uri'] = $this->extractGroupPostURI($post); + $item['title'] = $this->extractGroupPostTitle($post); + $item['author'] = $this->extractGroupPostAuthor($post); + $item['content'] = $this->extractGroupPostContent($post); + $item['enclosures'] = $this->extractGroupPostEnclosures($post); $this->items[] = $item; @@ -215,16 +220,7 @@ class FacebookBridge extends BridgeAbstract { $urlparts = parse_url($group); - if($urlparts['host'] !== parse_url(self::URI)['host'] - && 'www.' . $urlparts['host'] !== parse_url(self::URI)['host']) { - - returnClientError('The host you provided is invalid! Received "' - . $urlparts['host'] - . '", expected "' - . parse_url(self::URI)['host'] - . '"!'); - - } + $this->validateHost($urlparts['host']); return explode('/', $urlparts['path'])[2]; @@ -236,24 +232,47 @@ class FacebookBridge extends BridgeAbstract { } - private function isPublicGroup($html) { + private function validateHost($provided_host) { + // Handle mobile links + if (strpos($provided_host, 'm.') === 0) { + $provided_host = substr($provided_host, strlen('m.')); + } + if (strpos($provided_host, 'touch.') === 0) { + $provided_host = substr($provided_host, strlen('touch.')); + } - // Facebook redirects to the groups about page for non-public groups - $about = $html->find('#pagelet_group_about', 0); + $facebook_host = parse_url(self::URI)['host']; - return !($about); + if ($provided_host !== $facebook_host + && 'www.' . $provided_host !== $facebook_host) { + returnClientError('The host you provided is invalid! Received "' + . $provided_host + . '", expected "' + . $facebook_host + . '"!'); + } + } + + /** + * @param $html simple_html_dom + * @return bool + */ + private function isPublicGroup($html) { + // Facebook touch just presents a login page for non-public groups + $title = $html->find('title', 0); + return $title->plaintext !== 'Log in to Facebook | Facebook'; } private function extractGroupName($html) { - $ogtitle = $html->find('meta[property="og:title"]', 0) + $ogtitle = $html->find('._de1', 0) or returnServerError('Unable to find group title!'); - return html_entity_decode($ogtitle->content, ENT_QUOTES); + return html_entity_decode($ogtitle->plaintext, ENT_QUOTES); } - private function extractGroupURI($post) { + private function extractGroupPostURI($post) { $elements = $post->find('a') or returnServerError('Unable to find URI!'); @@ -262,7 +281,8 @@ class FacebookBridge extends BridgeAbstract { // Find the one that is a permalink if(strpos($anchor->href, 'permalink') !== false) { - return $anchor->href; + $arr = explode('?', $anchor->href, 2); + return $arr[0]; } } @@ -271,57 +291,61 @@ class FacebookBridge extends BridgeAbstract { } - private function extractGroupContent($post) { + private function extractGroupPostContent($post) { - $content = $post->find('div.userContent', 0) + $content = $post->find('div._5rgt', 0) or returnServerError('Unable to find user content!'); - return $content->innertext . $content->next_sibling()->innertext; - - } - - private function extractGroupTimestamp($post) { - - $element = $post->find('abbr[data-utime]', 0) - or returnServerError('Unable to find timestamp!'); - - return $element->getAttribute('data-utime'); + $context_text = $content->innertext; + if ($content->next_sibling() !== null) { + $context_text .= $content->next_sibling()->innertext; + } + return $context_text; } - private function extractGroupAuthor($post) { + private function extractGroupPostAuthor($post) { - $element = $post->find('img', 0) + $element = $post->find('h3 a', 0) or returnServerError('Unable to find author information!'); - return $element->{'aria-label'}; + return $element->plaintext; } - private function extractGroupEnclosures($post) { + private function extractGroupPostEnclosures($post) { - $elements = $post->find('div.userContent', 0)->next_sibling()->find('img'); + $elements = $post->find('span._6qdm'); + if ($post->find('div._5rgt', 0)->next_sibling() !== null) { + array_push($elements, ...$post->find('div._5rgt', 0)->next_sibling()->find('i.img')); + } $enclosures = array(); + $background_img_regex = '/background-image: ?url\\((.+?)\\);/'; + foreach($elements as $enclosure) { - $enclosures[] = $enclosure->src; + if(preg_match($background_img_regex, $enclosure, $matches) > 0) { + $bg_img_value = trim(html_entity_decode($matches[1], ENT_QUOTES), "'\""); + $bg_img_url = urldecode(preg_replace('/\\\([0-9a-z]{2}) /', '%$1', $bg_img_value)); + $enclosures[] = urldecode($bg_img_url); + } } return empty($enclosures) ? null : $enclosures; } - private function extractGroupTitle($post) { + private function extractGroupPostTitle($post) { - $element = $post->find('h5', 0) + $element = $post->find('h3', 0) or returnServerError('Unable to find title!'); if(strpos($element->plaintext, 'shared') === false) { - $content = strip_tags($this->extractGroupContent($post)); + $content = strip_tags($this->extractGroupPostContent($post)); - return $this->extractGroupAuthor($post) + return $this->extractGroupPostAuthor($post) . ' posted: ' . substr( $content, @@ -348,13 +372,7 @@ class FacebookBridge extends BridgeAbstract { $urlparts = parse_url($user); - if($urlparts['host'] !== parse_url(self::URI)['host']) { - returnClientError('The host you provided is invalid! Received "' - . $urlparts['host'] - . '", expected "' - . parse_url(self::URI)['host'] - . '"!'); - } + $this->validateHost($urlparts['host']); if(!array_key_exists('path', $urlparts) || $urlparts['path'] === '/') { @@ -555,7 +573,7 @@ EOD; } // No captcha? We can carry on retrieving page contents :) - // First, we check wether the page is public or not + // First, we check whether the page is public or not $loginForm = $html->find('._585r', 0); if($loginForm != null) { diff --git a/bridges/FicbookBridge.php b/bridges/FicbookBridge.php index 8b8a57f..7c89701 100644 --- a/bridges/FicbookBridge.php +++ b/bridges/FicbookBridge.php @@ -35,6 +35,8 @@ class FicbookBridge extends BridgeAbstract { ), ); + protected $titleName; + public function getURI() { switch($this->queriedContext) { case 'Site News': { @@ -56,6 +58,21 @@ class FicbookBridge extends BridgeAbstract { } } + public function getName() { + switch($this->queriedContext) { + case 'Site News': { + return $this->queriedContext . ' | ' . self::NAME; + } + case 'Fiction Updates': { + return $this->titleName . ' | ' . self::NAME; + } + case 'Fiction Comments': { + return $this->titleName . ' | Comments | ' . self::NAME; + } + default: return self::NAME; + } + } + public function collectData() { $header = array('Accept-Language: en-US'); @@ -65,6 +82,10 @@ class FicbookBridge extends BridgeAbstract { $html = defaultLinkTo($html, self::URI); + if ($this->queriedContext == 'Fiction Updates' or $this->queriedContext == 'Fiction Comments') { + $this->titleName = $html->find('.fanfic-main-info > h1', 0)->innertext; + } + switch($this->queriedContext) { case 'Site News': return $this->collectSiteNews($html); case 'Fiction Updates': return $this->collectUpdatesData($html); @@ -84,7 +105,7 @@ class FicbookBridge extends BridgeAbstract { } private function collectCommentsData($html) { - foreach($html->find('article.post') as $article) { + foreach($html->find('article.comment-container') as $article) { $this->items[] = array( 'uri' => $article->find('.comment_link_to_fic > a', 0)->href, 'title' => $article->find('.comment_author', 0)->plaintext, @@ -97,7 +118,7 @@ class FicbookBridge extends BridgeAbstract { } private function collectUpdatesData($html) { - foreach($html->find('ul.table-of-contents > li') as $chapter) { + foreach($html->find('ul.list-of-fanfic-parts > li') as $chapter) { $item = array( 'uri' => $chapter->find('a', 0)->href, 'title' => $chapter->find('a', 0)->plaintext, @@ -130,10 +151,10 @@ class FicbookBridge extends BridgeAbstract { 'июня', 'июля', 'августа', - 'Сентября', + 'сентября', 'октября', - 'Ноября', - 'Декабря', + 'ноября', + 'декабря', ); $en_month = array( diff --git a/bridges/FolhaDeSaoPauloBridge.php b/bridges/FolhaDeSaoPauloBridge.php index acd8d25..9a9717c 100644 --- a/bridges/FolhaDeSaoPauloBridge.php +++ b/bridges/FolhaDeSaoPauloBridge.php @@ -26,8 +26,9 @@ class FolhaDeSaoPauloBridge extends FeedExpander { $item_content = $articleHTMLContent->find('div.c-news__body', 0); if ($item_content) { $text = $item_content->innertext; - $text = strip_tags($text, '<p><b><a><blockquote><img><em>'); + $text = strip_tags($text, '<p><b><a><blockquote><figure><figcaption><img><strong><em>'); $item['content'] = $text; + $item['uri'] = explode('*', $item['uri'])[1]; } } else { Debug::log('???: ' . $item['uri']); diff --git a/bridges/FuturaSciencesBridge.php b/bridges/FuturaSciencesBridge.php index 772f443..79c0588 100644 --- a/bridges/FuturaSciencesBridge.php +++ b/bridges/FuturaSciencesBridge.php @@ -96,7 +96,7 @@ class FuturaSciencesBridge extends FeedExpander { } private function extractArticleContent($article){ - $contents = $article->find('section.article-text-classic', 0)->innertext; + $contents = $article->find('section.article-text', 1)->innertext; $headline = trim($article->find('p.description', 0)->plaintext); if(!empty($headline)) $headline = '<p><b>' . $headline . '</b></p>'; @@ -129,6 +129,7 @@ class FuturaSciencesBridge extends FeedExpander { $contents = stripWithDelimiters($contents, 'fs:xt:clickname="', '"'); $contents = StripWithDelimiters($contents, '<section class="module-toretain module-propal-nl', '</section>'); $contents = stripWithDelimiters($contents, '<script ', '</script>'); + $contents = stripWithDelimiters($contents, '<script>', '</script>'); return $headline . trim($contents); } diff --git a/bridges/GBAtempBridge.php b/bridges/GBAtempBridge.php index 48a7f85..e084195 100644 --- a/bridges/GBAtempBridge.php +++ b/bridges/GBAtempBridge.php @@ -113,8 +113,8 @@ class GBAtempBridge extends BridgeAbstract { break; case 'T': foreach($html->find('li.portal-tutorial') as $tutorialItem) { - $url = self::URI . $tutorialItem->find('a', 0)->href; - $title = $tutorialItem->find('a', 0)->plaintext; + $url = self::URI . $tutorialItem->find('a', 1)->href; + $title = $tutorialItem->find('a', 1)->plaintext; $time = $this->findItemDate($tutorialItem); $author = $tutorialItem->find('a.username', 0)->plaintext; $content = $this->fetchPostContent($url, self::URI); diff --git a/bridges/GithubTrendingBridge.php b/bridges/GithubTrendingBridge.php new file mode 100644 index 0000000..0b4a907 --- /dev/null +++ b/bridges/GithubTrendingBridge.php @@ -0,0 +1,636 @@ +<?php +class GithubTrendingBridge extends BridgeAbstract { + + const MAINTAINER = 'liamka'; + const NAME = 'Github Trending'; + const URI = 'https://github.com/trending'; + const URI_ITEM = 'https://github.com'; + const CACHE_TIMEOUT = 43200; // 12hr + const DESCRIPTION = 'See what the GitHub community is most excited repos.'; + const PARAMETERS = array( + 'By language' => array( + 'language' => array( + 'name' => 'Select language', + 'type' => 'list', + 'values' => array( + 'All languages' => '', + 'C++' => 'c++', + 'HTML' => 'html', + 'Java' => 'java', + 'JavaScript' => 'javascript', + 'PHP' => 'php', + 'Python' => 'python', + 'Ruby' => 'ruby', + 'Unknown languages' => 'unknown languages', + '1C Enterprise' => '1c enterprise', + '4D' => '4d', + 'ABAP' => 'abap', + 'ABNF' => 'abnf', + 'ActionScript' => 'actionscript', + 'Ada' => 'ada', + 'Adobe Font Metrics' => 'adobe font metrics', + 'Agda' => 'agda', + 'AGS Script' => 'ags script', + 'Alloy' => 'alloy', + 'Alpine Abuild' => 'alpine abuild', + 'Altium Designer' => 'altium designer', + 'AMPL' => 'ampl', + 'AngelScript' => 'angelscript', + 'Ant Build System' => 'ant build system', + 'ANTLR' => 'antlr', + 'ApacheConf' => 'apacheconf', + 'Apex' => 'apex', + 'API Blueprint' => 'api blueprint', + 'APL' => 'apl', + 'Apollo Guidance Computer' => 'apollo guidance computer', + 'AppleScript' => 'applescript', + 'Arc' => 'arc', + 'AsciiDoc' => 'asciidoc', + 'ASN.1' => 'asn.1', + 'ASP' => 'asp', + 'AspectJ' => 'aspectj', + 'Assembly' => 'assembly', + 'Asymptote' => 'asymptote', + 'ATS' => 'ats', + 'Augeas' => 'augeas', + 'AutoHotkey' => 'autohotkey', + 'AutoIt' => 'autoit', + 'Awk' => 'awk', + 'Ballerina' => 'ballerina', + 'Batchfile' => 'batchfile', + 'Befunge' => 'befunge', + 'BibTeX' => 'bibtex', + 'Bison' => 'bison', + 'BitBake' => 'bitbake', + 'Blade' => 'blade', + 'BlitzBasic' => 'blitzbasic', + 'BlitzMax' => 'blitzmax', + 'Bluespec' => 'bluespec', + 'Boo' => 'boo', + 'Brainfuck' => 'brainfuck', + 'Brightscript' => 'brightscript', + 'Zeek' => 'zeek', + 'C' => 'c', + 'C#' => 'c#', + 'C++' => 'c++', + 'C-ObjDump' => 'c-objdump', + 'C2hs Haskell' => 'c2hs haskell', + 'Cabal Config' => 'cabal config', + 'CartoCSS' => 'cartocss', + 'Ceylon' => 'ceylon', + 'Chapel' => 'chapel', + 'Charity' => 'charity', + 'ChucK' => 'chuck', + 'Cirru' => 'cirru', + 'Clarion' => 'clarion', + 'Clean' => 'clean', + 'Click' => 'click', + 'CLIPS' => 'clips', + 'Clojure' => 'clojure', + 'Closure Templates' => 'closure templates', + 'Cloud Firestore Security Rules' => 'cloud firestore security rules', + 'CMake' => 'cmake', + 'COBOL' => 'cobol', + 'CodeQL' => 'codeql', + 'CoffeeScript' => 'coffeescript', + 'ColdFusion' => 'coldfusion', + 'ColdFusion CFC' => 'coldfusion cfc', + 'COLLADA' => 'collada', + 'Common Lisp' => 'common lisp', + 'Common Workflow Language' => 'common workflow language', + 'Component Pascal' => 'component pascal', + 'CoNLL-U' => 'conll-u', + 'Cool' => 'cool', + 'Coq' => 'coq', + 'Cpp-ObjDump' => 'cpp-objdump', + 'Creole' => 'creole', + 'Crystal' => 'crystal', + 'CSON' => 'cson', + 'Csound' => 'csound', + 'Csound Document' => 'csound document', + 'Csound Score' => 'csound score', + 'CSS' => 'css', + 'CSV' => 'csv', + 'Cuda' => 'cuda', + 'cURL Config' => 'curl config', + 'CWeb' => 'cweb', + 'Cycript' => 'cycript', + 'Cython' => 'cython', + 'D' => 'd', + 'D-ObjDump' => 'd-objdump', + 'Darcs Patch' => 'darcs patch', + 'Dart' => 'dart', + 'DataWeave' => 'dataweave', + 'desktop' => 'desktop', + 'Dhall' => 'dhall', + 'Diff' => 'diff', + 'DIGITAL Command Language' => 'digital command language', + 'dircolors' => 'dircolors', + 'DirectX 3D File' => 'directx 3d file', + 'DM' => 'dm', + 'DNS Zone' => 'dns zone', + 'Dockerfile' => 'dockerfile', + 'Dogescript' => 'dogescript', + 'DTrace' => 'dtrace', + 'Dylan' => 'dylan', + 'E' => 'e', + 'Eagle' => 'eagle', + 'Easybuild' => 'easybuild', + 'EBNF' => 'ebnf', + 'eC' => 'ec', + 'Ecere Projects' => 'ecere projects', + 'ECL' => 'ecl', + 'ECLiPSe' => 'eclipse', + 'EditorConfig' => 'editorconfig', + 'Edje Data Collection' => 'edje data collection', + 'edn' => 'edn', + 'Eiffel' => 'eiffel', + 'EJS' => 'ejs', + 'Elixir' => 'elixir', + 'Elm' => 'elm', + 'Emacs Lisp' => 'emacs lisp', + 'EmberScript' => 'emberscript', + 'EML' => 'eml', + 'EQ' => 'eq', + 'Erlang' => 'erlang', + 'F#' => 'f#', + 'F*' => 'f*', + 'Factor' => 'factor', + 'Fancy' => 'fancy', + 'Fantom' => 'fantom', + 'Faust' => 'faust', + 'FIGlet Font' => 'figlet font', + 'Filebench WML' => 'filebench wml', + 'Filterscript' => 'filterscript', + 'fish' => 'fish', + 'FLUX' => 'flux', + 'Formatted' => 'formatted', + 'Forth' => 'forth', + 'Fortran' => 'fortran', + 'FreeMarker' => 'freemarker', + 'Frege' => 'frege', + 'G-code' => 'g-code', + 'Game Maker Language' => 'game maker language', + 'GAML' => 'gaml', + 'GAMS' => 'gams', + 'GAP' => 'gap', + 'GCC Machine Description' => 'gcc machine description', + 'GDB' => 'gdb', + 'GDScript' => 'gdscript', + 'Genie' => 'genie', + 'Genshi' => 'genshi', + 'Gentoo Ebuild' => 'gentoo ebuild', + 'Gentoo Eclass' => 'gentoo eclass', + 'Gerber Image' => 'gerber image', + 'Gettext Catalog' => 'gettext catalog', + 'Gherkin' => 'gherkin', + 'Git Attributes' => 'git attributes', + 'Git Config' => 'git config', + 'GLSL' => 'glsl', + 'Glyph' => 'glyph', + 'Glyph Bitmap Distribution Format' => 'glyph bitmap distribution format', + 'GN' => 'gn', + 'Gnuplot' => 'gnuplot', + 'Go' => 'go', + 'Golo' => 'golo', + 'Gosu' => 'gosu', + 'Grace' => 'grace', + 'Gradle' => 'gradle', + 'Grammatical Framework' => 'grammatical framework', + 'Graph Modeling Language' => 'graph modeling language', + 'GraphQL' => 'graphql', + 'Graphviz (DOT)' => 'graphviz (dot)', + 'Groovy' => 'groovy', + 'Groovy Server Pages' => 'groovy server pages', + 'Hack' => 'hack', + 'Haml' => 'haml', + 'Handlebars' => 'handlebars', + 'HAProxy' => 'haproxy', + 'Harbour' => 'harbour', + 'Haskell' => 'haskell', + 'Haxe' => 'haxe', + 'HCL' => 'hcl', + 'HiveQL' => 'hiveql', + 'HLSL' => 'hlsl', + 'HolyC' => 'holyc', + 'HTML' => 'html', + 'HTML+Django' => 'html+django', + 'HTML+ECR' => 'html+ecr', + 'HTML+EEX' => 'html+eex', + 'HTML+ERB' => 'html+erb', + 'HTML+PHP' => 'html+php', + 'HTML+Razor' => 'html+razor', + 'HTTP' => 'http', + 'HXML' => 'hxml', + 'Hy' => 'hy', + 'HyPhy' => 'hyphy', + 'IDL' => 'idl', + 'Idris' => 'idris', + 'Ignore List' => 'ignore list', + 'IGOR Pro' => 'igor pro', + 'Inform 7' => 'inform 7', + 'INI' => 'ini', + 'Inno Setup' => 'inno setup', + 'Io' => 'io', + 'Ioke' => 'ioke', + 'IRC log' => 'irc log', + 'Isabelle' => 'isabelle', + 'Isabelle ROOT' => 'isabelle root', + 'J' => 'j', + 'Jasmin' => 'jasmin', + 'Java' => 'java', + 'Java Properties' => 'java properties', + 'Java Server Pages' => 'java server pages', + 'JavaScript' => 'javascript', + 'JavaScript+ERB' => 'javascript+erb', + 'JFlex' => 'jflex', + 'Jison' => 'jison', + 'Jison Lex' => 'jison lex', + 'Jolie' => 'jolie', + 'JSON' => 'json', + 'JSON with Comments' => 'json with comments', + 'JSON5' => 'json5', + 'JSONiq' => 'jsoniq', + 'JSONLD' => 'jsonld', + 'Jsonnet' => 'jsonnet', + 'JSX' => 'jsx', + 'Julia' => 'julia', + 'Jupyter Notebook' => 'jupyter notebook', + 'KiCad Layout' => 'kicad layout', + 'KiCad Legacy Layout' => 'kicad legacy layout', + 'KiCad Schematic' => 'kicad schematic', + 'Kit' => 'kit', + 'Kotlin' => 'kotlin', + 'KRL' => 'krl', + 'LabVIEW' => 'labview', + 'Lasso' => 'lasso', + 'Latte' => 'latte', + 'Lean' => 'lean', + 'Less' => 'less', + 'Lex' => 'lex', + 'LFE' => 'lfe', + 'LilyPond' => 'lilypond', + 'Limbo' => 'limbo', + 'Linker Script' => 'linker script', + 'Linux Kernel Module' => 'linux kernel module', + 'Liquid' => 'liquid', + 'Literate Agda' => 'literate agda', + 'Literate CoffeeScript' => 'literate coffeescript', + 'Literate Haskell' => 'literate haskell', + 'LiveScript' => 'livescript', + 'LLVM' => 'llvm', + 'Logos' => 'logos', + 'Logtalk' => 'logtalk', + 'LOLCODE' => 'lolcode', + 'LookML' => 'lookml', + 'LoomScript' => 'loomscript', + 'LSL' => 'lsl', + 'LTspice Symbol' => 'ltspice symbol', + 'Lua' => 'lua', + 'M' => 'm', + 'M4' => 'm4', + 'M4Sugar' => 'm4sugar', + 'Makefile' => 'makefile', + 'Mako' => 'mako', + 'Markdown' => 'markdown', + 'Marko' => 'marko', + 'Mask' => 'mask', + 'Mathematica' => 'mathematica', + 'MATLAB' => 'matlab', + 'Maven POM' => 'maven pom', + 'Max' => 'max', + 'MAXScript' => 'maxscript', + 'mcfunction' => 'mcfunction', + 'MediaWiki' => 'mediawiki', + 'Mercury' => 'mercury', + 'Meson' => 'meson', + 'Metal' => 'metal', + 'Microsoft Developer Studio Project' => 'microsoft developer studio project', + 'MiniD' => 'minid', + 'Mirah' => 'mirah', + 'mIRC Script' => 'mirc script', + 'MLIR' => 'mlir', + 'Modelica' => 'modelica', + 'Modula-2' => 'modula-2', + 'Modula-3' => 'modula-3', + 'Module Management System' => 'module management system', + 'Monkey' => 'monkey', + 'Moocode' => 'moocode', + 'MoonScript' => 'moonscript', + 'Motorola 68K Assembly' => 'motorola 68k assembly', + 'MQL4' => 'mql4', + 'MQL5' => 'mql5', + 'MTML' => 'mtml', + 'MUF' => 'muf', + 'mupad' => 'mupad', + 'Muse' => 'muse', + 'Myghty' => 'myghty', + 'nanorc' => 'nanorc', + 'NASL' => 'nasl', + 'NCL' => 'ncl', + 'Nearley' => 'nearley', + 'Nemerle' => 'nemerle', + 'nesC' => 'nesc', + 'NetLinx' => 'netlinx', + 'NetLinx+ERB' => 'netlinx+erb', + 'NetLogo' => 'netlogo', + 'NewLisp' => 'newlisp', + 'Nextflow' => 'nextflow', + 'Nginx' => 'nginx', + 'Nim' => 'nim', + 'Ninja' => 'ninja', + 'Nit' => 'nit', + 'Nix' => 'nix', + 'NL' => 'nl', + 'NPM Config' => 'npm config', + 'NSIS' => 'nsis', + 'Nu' => 'nu', + 'NumPy' => 'numpy', + 'ObjDump' => 'objdump', + 'Object Data Instance Notation' => 'object data instance notation', + 'Objective-C' => 'objective-c', + 'Objective-C++' => 'objective-c++', + 'Objective-J' => 'objective-j', + 'ObjectScript' => 'objectscript', + 'OCaml' => 'ocaml', + 'Odin' => 'odin', + 'Omgrofl' => 'omgrofl', + 'ooc' => 'ooc', + 'Opa' => 'opa', + 'Opal' => 'opal', + 'Open Policy Agent' => 'open policy agent', + 'OpenCL' => 'opencl', + 'OpenEdge ABL' => 'openedge abl', + 'OpenQASM' => 'openqasm', + 'OpenRC runscript' => 'openrc runscript', + 'OpenSCAD' => 'openscad', + 'OpenStep Property List' => 'openstep property list', + 'OpenType Feature File' => 'opentype feature file', + 'Org' => 'org', + 'Ox' => 'ox', + 'Oxygene' => 'oxygene', + 'Oz' => 'oz', + 'P4' => 'p4', + 'Pan' => 'pan', + 'Papyrus' => 'papyrus', + 'Parrot' => 'parrot', + 'Parrot Assembly' => 'parrot assembly', + 'Parrot Internal Representation' => 'parrot internal representation', + 'Pascal' => 'pascal', + 'Pawn' => 'pawn', + 'Pep8' => 'pep8', + 'Perl' => 'perl', + 'PHP' => 'php', + 'Pic' => 'pic', + 'Pickle' => 'pickle', + 'PicoLisp' => 'picolisp', + 'PigLatin' => 'piglatin', + 'Pike' => 'pike', + 'PLpgSQL' => 'plpgsql', + 'PLSQL' => 'plsql', + 'Pod' => 'pod', + 'Pod 6' => 'pod 6', + 'PogoScript' => 'pogoscript', + 'Pony' => 'pony', + 'PostCSS' => 'postcss', + 'PostScript' => 'postscript', + 'POV-Ray SDL' => 'pov-ray sdl', + 'PowerBuilder' => 'powerbuilder', + 'PowerShell' => 'powershell', + 'Prisma' => 'prisma', + 'Processing' => 'processing', + 'Proguard' => 'proguard', + 'Prolog' => 'prolog', + 'Propeller Spin' => 'propeller spin', + 'Protocol Buffer' => 'protocol buffer', + 'Public Key' => 'public key', + 'Pug' => 'pug', + 'Puppet' => 'puppet', + 'Pure Data' => 'pure data', + 'PureBasic' => 'purebasic', + 'PureScript' => 'purescript', + 'Python' => 'python', + 'Python console' => 'python console', + 'Python traceback' => 'python traceback', + 'q' => 'q', + 'QMake' => 'qmake', + 'QML' => 'qml', + 'Quake' => 'quake', + 'R' => 'r', + 'Racket' => 'racket', + 'Ragel' => 'ragel', + 'Raku' => 'raku', + 'RAML' => 'raml', + 'Rascal' => 'rascal', + 'Raw token data' => 'raw token data', + 'RDoc' => 'rdoc', + 'Readline Config' => 'readline config', + 'REALbasic' => 'realbasic', + 'Reason' => 'reason', + 'Rebol' => 'rebol', + 'Red' => 'red', + 'Redcode' => 'redcode', + 'Regular Expression' => 'regular expression', + // 'Ren'Py' => 'ren'py', + 'RenderScript' => 'renderscript', + 'reStructuredText' => 'restructuredtext', + 'REXX' => 'rexx', + 'RHTML' => 'rhtml', + 'Rich Text Format' => 'rich text format', + 'Ring' => 'ring', + 'Riot' => 'riot', + 'RMarkdown' => 'rmarkdown', + 'RobotFramework' => 'robotframework', + 'Roff' => 'roff', + 'Roff Manpage' => 'roff manpage', + 'Rouge' => 'rouge', + 'RPC' => 'rpc', + 'RPM Spec' => 'rpm spec', + 'Ruby' => 'ruby', + 'RUNOFF' => 'runoff', + 'Rust' => 'rust', + 'Sage' => 'sage', + 'SaltStack' => 'saltstack', + 'SAS' => 'sas', + 'Sass' => 'sass', + 'Scala' => 'scala', + 'Scaml' => 'scaml', + 'Scheme' => 'scheme', + 'Scilab' => 'scilab', + 'SCSS' => 'scss', + 'sed' => 'sed', + 'Self' => 'self', + 'ShaderLab' => 'shaderlab', + 'Shell' => 'shell', + 'ShellSession' => 'shellsession', + 'Shen' => 'shen', + 'Slash' => 'slash', + 'Slice' => 'slice', + 'Slim' => 'slim', + 'Smali' => 'smali', + 'Smalltalk' => 'smalltalk', + 'Smarty' => 'smarty', + 'SmPL' => 'smpl', + 'SMT' => 'smt', + 'Solidity' => 'solidity', + 'SourcePawn' => 'sourcepawn', + 'SPARQL' => 'sparql', + 'Spline Font Database' => 'spline font database', + 'SQF' => 'sqf', + 'SQL' => 'sql', + 'SQLPL' => 'sqlpl', + 'Squirrel' => 'squirrel', + 'SRecode Template' => 'srecode template', + 'SSH Config' => 'ssh config', + 'Stan' => 'stan', + 'Standard ML' => 'standard ml', + 'Starlark' => 'starlark', + 'Stata' => 'stata', + 'STON' => 'ston', + 'Stylus' => 'stylus', + 'SubRip Text' => 'subrip text', + 'SugarSS' => 'sugarss', + 'SuperCollider' => 'supercollider', + 'Svelte' => 'svelte', + 'SVG' => 'svg', + 'Swift' => 'swift', + 'SWIG' => 'swig', + 'SystemVerilog' => 'systemverilog', + 'Tcl' => 'tcl', + 'Tcsh' => 'tcsh', + 'Tea' => 'tea', + 'Terra' => 'terra', + 'TeX' => 'tex', + 'Texinfo' => 'texinfo', + 'Text' => 'text', + 'Textile' => 'textile', + 'Thrift' => 'thrift', + 'TI Program' => 'ti program', + 'TLA' => 'tla', + 'TOML' => 'toml', + 'TSQL' => 'tsql', + 'TSX' => 'tsx', + 'Turing' => 'turing', + 'Turtle' => 'turtle', + 'Twig' => 'twig', + 'TXL' => 'txl', + 'Type Language' => 'type language', + 'TypeScript' => 'typescript', + 'Unified Parallel C' => 'unified parallel c', + 'Unity3D Asset' => 'unity3d asset', + 'Unix Assembly' => 'unix assembly', + 'Uno' => 'uno', + 'UnrealScript' => 'unrealscript', + 'UrWeb' => 'urweb', + 'V' => 'v', + 'Vala' => 'vala', + 'VBA' => 'vba', + 'VBScript' => 'vbscript', + 'VCL' => 'vcl', + 'Verilog' => 'verilog', + 'VHDL' => 'vhdl', + 'Vim script' => 'vim script', + 'Vim Snippet' => 'vim snippet', + 'Visual Basic .NET' => 'visual basic .net', + 'Visual Basic .NET' => 'visual basic .net', + 'Volt' => 'volt', + 'Vue' => 'vue', + 'Wavefront Material' => 'wavefront material', + 'Wavefront Object' => 'wavefront object', + 'wdl' => 'wdl', + 'Web Ontology Language' => 'web ontology language', + 'WebAssembly' => 'webassembly', + 'WebIDL' => 'webidl', + 'WebVTT' => 'webvtt', + 'Wget Config' => 'wget config', + 'Windows Registry Entries' => 'windows registry entries', + 'wisp' => 'wisp', + 'Wollok' => 'wollok', + 'World of Warcraft Addon Data' => 'world of warcraft addon data', + 'X BitMap' => 'x bitmap', + 'X Font Directory Index' => 'x font directory index', + 'X PixMap' => 'x pixmap', + 'X10' => 'x10', + 'xBase' => 'xbase', + 'XC' => 'xc', + 'XCompose' => 'xcompose', + 'XML' => 'xml', + 'XML Property List' => 'xml property list', + 'Xojo' => 'xojo', + 'XPages' => 'xpages', + 'XProc' => 'xproc', + 'XQuery' => 'xquery', + 'XS' => 'xs', + 'XSLT' => 'xslt', + 'Xtend' => 'xtend', + 'Yacc' => 'yacc', + 'YAML' => 'yaml', + 'YANG' => 'yang', + 'YARA' => 'yara', + 'YASnippet' => 'yasnippet', + 'ZAP' => 'zap', + 'Zeek' => 'zeek', + 'ZenScript' => 'zenscript', + 'Zephir' => 'zephir', + 'Zig' => 'zig', + 'ZIL' => 'zil', + 'Zimpl' => 'zimpl', + ), + 'defaultValue' => 'All languages' + ) + ), + + 'global' => array( + 'date_range' => array( + 'name' => 'Date range', + 'type' => 'list', + 'required' => false, + 'values' => array( + 'Today' => 'today', + 'Weekly' => 'weekly', + 'Monthly' => 'monthly', + ), + 'defaultValue' => 'today' + ) + ) + + ); + + public function collectData(){ + $params = array('since' => urlencode($this->getInput('date_range'))); + $url = self::URI . '/' . $this->getInput('language') . '?' . http_build_query($params); + + $html = getSimpleHTMLDOM($url) + or returnServerError('Error while downloading the website content'); + + $this->items = array(); + foreach($html->find('.Box-row') as $element) { + $item = array(); + + // URI + $item['uri'] = self::URI_ITEM . $element->find('h1 a', 0)->href; + + // Title + $item['title'] = str_replace(' ', '', trim(strip_tags($element->find('h1 a', 0)->plaintext))); + + // Description + $item['content'] = trim(strip_tags($element->find('p.text-gray', 0)->innertext)); + + // Time + $item['timestamp'] = time(); + + // TODO: Proxy? + $this->items[] = $item; + } + } + + public function getName(){ + if($this->getInput('language') == '') { + return self::NAME . ': all'; + } elseif (!is_null($this->getInput('language'))) { + return self::NAME . ': ' . $this->getInput('language'); + } + + return parent::getName(); + } +} diff --git a/bridges/GizmodoBridge.php b/bridges/GizmodoBridge.php index 35f162b..4b924a2 100644 --- a/bridges/GizmodoBridge.php +++ b/bridges/GizmodoBridge.php @@ -3,34 +3,78 @@ class GizmodoBridge extends FeedExpander { const MAINTAINER = 'polopollo'; const NAME = 'Gizmodo'; - const URI = 'http://gizmodo.com/'; + const URI = 'https://gizmodo.com'; const CACHE_TIMEOUT = 1800; // 30min - const DESCRIPTION = 'Returns the newest posts from Gizmodo (full text).'; + const DESCRIPTION = 'Returns the newest posts from Gizmodo.'; - protected function parseItem($item){ + protected function parseItem($item) { $item = parent::parseItem($item); - $articleHTMLContent = getSimpleHTMLDOMCached($item['uri']); - if(!$articleHTMLContent) { - $text = 'Could not load ' . $item['uri']; - } else { - $text = $articleHTMLContent->find('div.entry-content', 0)->innertext; - foreach($articleHTMLContent->find('pagespeed_iframe') as $element) { - $text .= '<p>link to a iframe (could be a video): <a href="' - . $element->src - . '">' - . $element->src - . '</a></p><br>'; - } + $html = getSimpleHTMLDOMCached($item['uri']) + or returnServerError('Could not request: ' . $item['uri']); - $text = strip_tags($text, '<p><b><a><blockquote><img><em>'); - } + $html = defaultLinkTo($html, $this->getURI()); + $this->stripTags($html); + $this->handleFigureTags($html); + $this->handleIframeTags($html); + + // Get header image + $image = $html->find('meta[property="og:image"]', 0)->content; + + $item['content'] = $html->find('div.js_post-content', 0)->innertext; + + // Get categories + $categories = explode(',', $html->find('meta[name="keywords"]', 0)->content); + $item['categories'] = array_map('trim', $categories); + + $item['enclosures'][] = $html->find('meta[property="og:image"]', 0)->content; - $item['content'] = $text; return $item; } - public function collectData(){ - $this->collectExpandableDatas('http://feeds.gawker.com/gizmodo/full'); + public function collectData() { + $this->collectExpandableDatas(self::URI . '/rss', 20); + } + + private function stripTags($html) { + foreach ($html->find('aside') as $aside) { + $aside->outertext = ''; + } + + foreach ($html->find('div.ad-unit') as $div) { + $div->outertext = ''; + } + + foreach ($html->find('script') as $script) { + $script->outertext = ''; + } + } + + private function handleFigureTags($html) { + foreach ($html->find('figure') as $index => $figure) { + + if (isset($figure->attr['data-id'])) { + $id = $figure->attr['data-id']; + $format = $figure->attr['data-format']; + + } else { + $img = $figure->find('img', 0); + $id = $img->attr['data-chomp-id']; + $format = $img->attr['data-format']; + $figure->find('div.img-permalink-sub-wrapper', 0)->style = ''; + } + + $imageUrl = 'https://i.kinja-img.com/gawker-media/image/upload/' . $id . '.' . $format; + + $figure->find('span', 0)->outertext = <<<EOD +<img src="{$imageUrl}"> +EOD; + } + } + + private function handleIframeTags($html) { + foreach($html->find('iframe') as $iframe) { + $iframe->src = urljoin($this->getURI(), $iframe->src); + } } } diff --git a/bridges/GoogleSearchBridge.php b/bridges/GoogleSearchBridge.php index e02aaeb..10f0f12 100644 --- a/bridges/GoogleSearchBridge.php +++ b/bridges/GoogleSearchBridge.php @@ -35,16 +35,10 @@ class GoogleSearchBridge extends BridgeAbstract { $item = array(); - // Extract direct URL from google href (eg. /url?q=...) $t = $element->find('a[href]', 0)->href; - $item['uri'] = '' . $t; - parse_str(parse_url($t, PHP_URL_QUERY), $parameters); - if(isset($parameters['q'])) { - $item['uri'] = $parameters['q']; - } - + $item['uri'] = htmlspecialchars_decode($t); $item['title'] = $element->find('h3', 0)->plaintext; - $item['content'] = $element->find('span[class=st]', 0)->plaintext; + $item['content'] = $element->find('span[class=aCOpRe]', 0)->plaintext; $this->items[] = $item; } diff --git a/bridges/HeiseBridge.php b/bridges/HeiseBridge.php index 1d9d802..fd72fbb 100644 --- a/bridges/HeiseBridge.php +++ b/bridges/HeiseBridge.php @@ -40,18 +40,15 @@ class HeiseBridge extends FeedExpander { protected function parseItem($feedItem) { $item = parent::parseItem($feedItem); - $uri = $item['uri']; + $uri = $item['uri'] . '&seite=all'; - do { - $article = getSimpleHTMLDOMCached($uri) - or returnServerError('Could not open article: ' . $uri); + $article = getSimpleHTMLDOMCached($uri) + or returnServerError('Could not open article: ' . $uri); + if ($article) { $article = defaultLinkTo($article, $uri); $item = $this->addArticleToItem($item, $article); - - if($next = $article->find('.pagination a[rel="next"]', 0)) - $uri = $next->href; - } while ($next); + } return $item; } @@ -62,6 +59,9 @@ class HeiseBridge extends FeedExpander { $content = $article->find('div[class*="article-content"]', 0); + if ($content == null) + $content = $article->find('#article_content', 0); + foreach($content->find('p, h3, ul, table, pre, img') as $element) { $item['content'] .= $element; } diff --git a/bridges/InstagramBridge.php b/bridges/InstagramBridge.php index 0a6dbaa..43df4e4 100644 --- a/bridges/InstagramBridge.php +++ b/bridges/InstagramBridge.php @@ -47,7 +47,7 @@ class InstagramBridge extends BridgeAbstract { ); const USER_QUERY_HASH = '58b6785bea111c67129decbe6a448951'; - const TAG_QUERY_HASH = '174a5243287c5f3a7de741089750ab3b'; + const TAG_QUERY_HASH = '9b498c08113f1e09617a1703c22b2f32'; const SHORTCODE_QUERY_HASH = '865589822932d1b43dfe312121dd353a'; protected function getInstagramUserId($username) { @@ -65,7 +65,7 @@ class InstagramBridge extends BridgeAbstract { $data = getContents(self::URI . 'web/search/topsearch/?query=' . $username); foreach(json_decode($data)->users as $user) { - if($user->user->username === $username) { + if(strtolower($user->user->username) === strtolower($username)) { $key = $user->user->pk; } } diff --git a/bridges/KoreusBridge.php b/bridges/KoreusBridge.php index a5e09cb..4cfb8c2 100644 --- a/bridges/KoreusBridge.php +++ b/bridges/KoreusBridge.php @@ -3,7 +3,7 @@ class KoreusBridge extends FeedExpander { const MAINTAINER = 'pit-fgfjiudghdf'; const NAME = 'Koreus'; - const URI = 'http://www.koreus.com/'; + const URI = 'https://www.koreus.com/'; const DESCRIPTION = 'Returns the newest posts from Koreus (full text)'; protected function parseItem($item){ @@ -17,6 +17,6 @@ class KoreusBridge extends FeedExpander { } public function collectData(){ - $this->collectExpandableDatas('http://feeds.feedburner.com/Koreus-articles'); + $this->collectExpandableDatas('https://feeds.feedburner.com/Koreus-articles'); } } diff --git a/bridges/LeMondeInformatiqueBridge.php b/bridges/LeMondeInformatiqueBridge.php index 45aa607..b85a963 100644 --- a/bridges/LeMondeInformatiqueBridge.php +++ b/bridges/LeMondeInformatiqueBridge.php @@ -26,8 +26,8 @@ class LeMondeInformatiqueBridge extends FeedExpander { //No response header sets the encoding, explicit conversion is needed or subsequent xml_encode() will fail $content_node = $article_html->find('div.col-primary, div.col-sm-9', 0); - $item['content'] = utf8_encode($this->cleanArticle($content_node->innertext)); - $item['author'] = utf8_encode($article_html->find('div.author-infos', 0)->find('b', 0)->plaintext); + $item['content'] = $this->cleanArticle($content_node->innertext); + $item['author'] = $article_html->find('div.author-infos', 0)->find('b', 0)->plaintext; return $item; } diff --git a/bridges/LesJoiesDuCodeBridge.php b/bridges/LesJoiesDuCodeBridge.php index 0957d92..c79b111 100644 --- a/bridges/LesJoiesDuCodeBridge.php +++ b/bridges/LesJoiesDuCodeBridge.php @@ -11,7 +11,7 @@ class LesJoiesDuCodeBridge extends BridgeAbstract { $html = getSimpleHTMLDOM(self::URI) or returnServerError('Could not request LesJoiesDuCode.'); - foreach($html->find('div.blog-post') as $element) { + foreach($html->find('article.blog-post') as $element) { $item = array(); $temp = $element->find('h1 a', 0); $titre = html_entity_decode($temp->innertext); diff --git a/bridges/MallTvBridge.php b/bridges/MallTvBridge.php new file mode 100644 index 0000000..34b38e8 --- /dev/null +++ b/bridges/MallTvBridge.php @@ -0,0 +1,73 @@ +<?php + +class MallTvBridge extends BridgeAbstract { + + const NAME = 'MALL.TV Bridge'; + const URI = 'https://www.mall.tv'; + const CACHE_TIMEOUT = 3600; + const DESCRIPTION = 'Return newest videos'; + const MAINTAINER = 'kolarcz'; + + const PARAMETERS = array( + array( + 'url' => array( + 'name' => 'url to the show', + 'required' => true, + 'exampleValue' => 'https://www.mall.tv/zivot-je-hra' + ) + ) + ); + + private function fixChars($text) { + return html_entity_decode($text, ENT_QUOTES, 'UTF-8'); + } + + private function getUploadTimeFromUrl($url) { + $html = getSimpleHTMLDOM($url) + or returnServerError('Could not request MALL.TV detail page'); + + $scriptLdJson = $html->find('script[type="application/ld+json"]', 0)->innertext; + if (!preg_match('/[\'"]uploadDate[\'"]\s*:\s*[\'"](\d{4}-\d{2}-\d{2})[\'"]/', $scriptLdJson, $match)) { + returnServerError('Could not get date from MALL.TV detail page'); + } + + return strtotime($match[1]); + } + + public function collectData() { + $url = $this->getInput('url'); + + if (!preg_match('/^https:\/\/www\.mall\.tv\/[a-z0-9-]+(\/[a-z0-9-]+)?\/?$/', $url)) { + returnServerError('Invalid url'); + } + + $html = getSimpleHTMLDOM($url) + or returnServerError('Could not request MALL.TV'); + + $this->feedUri = $url; + $this->feedName = $this->fixChars($html->find('title', 0)->plaintext); + + foreach ($html->find('section.isVideo .video-card') as $element) { + $itemTitle = $element->find('.video-card__details-link', 0); + $itemThumbnail = $element->find('.video-card__thumbnail', 0); + $itemUri = self::URI . $itemTitle->getAttribute('href'); + + $item = array( + 'title' => $this->fixChars($itemTitle->plaintext), + 'uri' => $itemUri, + 'content' => '<img src="' . $itemThumbnail->getAttribute('data-src') . '" />', + 'timestamp' => $this->getUploadTimeFromUrl($itemUri) + ); + + $this->items[] = $item; + } + } + + public function getURI() { + return isset($this->feedUri) ? $this->feedUri : parent::getURI(); + } + + public function getName() { + return isset($this->feedName) ? $this->feedName : parent::getName(); + } +} diff --git a/bridges/MarktplaatsBridge.php b/bridges/MarktplaatsBridge.php new file mode 100644 index 0000000..ada6592 --- /dev/null +++ b/bridges/MarktplaatsBridge.php @@ -0,0 +1,127 @@ +<?php + +class MarktplaatsBridge extends BridgeAbstract { + const NAME = 'Marktplaats'; + const URI = 'https://marktplaats.nl'; + const DESCRIPTION = 'Read search queries from marktplaats.nl'; + const PARAMETERS = array( + 'Search' => array( + 'q' => array( + 'name' => 'query', + 'type' => 'text', + 'required' => true, + 'title' => 'The search string for marktplaats', + ), + 'z' => array( + 'name' => 'zipcode', + 'type' => 'text', + 'required' => false, + 'title' => 'Zip code for location limited searches', + ), + 'd' => array( + 'name' => 'distance', + 'type' => 'number', + 'required' => false, + 'title' => 'The distance in meters from the zipcode', + ), + 'f' => array( + 'name' => 'priceFrom', + 'type' => 'number', + 'required' => false, + 'title' => 'The minimal price in cents', + ), + 't' => array( + 'name' => 'priceTo', + 'type' => 'number', + 'required' => false, + 'title' => 'The maximal price in cents', + ), + 's' => array( + 'name' => 'showGlobal', + 'type' => 'checkbox', + 'required' => false, + 'title' => 'Include result with negative distance', + ), + 'i' => array( + 'name' => 'includeImage', + 'type' => 'checkbox', + 'required' => false, + 'title' => 'Include the image at the end of the content', + ), + 'r' => array( + 'name' => 'includeRaw', + 'type' => 'checkbox', + 'required' => false, + 'title' => 'Include the raw data behind the content', + ) + ) + ); + const CACHE_TIMEOUT = 900; + + public function collectData() { + $query = ''; + $excludeGlobal = false; + if(!is_null($this->getInput('z')) && !is_null($this->getInput('d'))) { + $query = '&postcode=' . $this->getInput('z') . '&distanceMeters=' . $this->getInput('d'); + } + if(!is_null($this->getInput('f'))) { + $query .= '&PriceCentsFrom=' . $this->getInput('f'); + } + if(!is_null($this->getInput('t'))) { + $query .= '&PriceCentsTo=' . $this->getInput('t'); + } + if(!is_null($this->getInput('s'))) { + if(!$this->getInput('s')) { + $excludeGlobal = true; + } + } + $url = 'https://www.marktplaats.nl/lrp/api/search?query=' . urlencode($this->getInput('q')) . $query; + $jsonString = getSimpleHTMLDOM($url, 900) or returnServerError('No contents received!'); + $jsonObj = json_decode($jsonString); + foreach($jsonObj->listings as $listing) { + if(!$excludeGlobal || $listing->location->distanceMeters >= 0) { + $item = array(); + $item['uri'] = 'https://marktplaats.nl' . $listing->vipUrl; + $item['title'] = $listing->title; + $item['timestamp'] = $listing->date; + $item['author'] = $listing->sellerInformation->sellerName; + $item['content'] = $listing->description; + $item['categories'] = $listing->verticals; + $item['uid'] = $listing->itemId; + if(!is_null($this->getInput('i')) && !empty($listing->imageUrls)) { + $item['enclosures'] = $listing->imageUrls; + if(is_array($listing->imageUrls)) { + foreach($listing->imageUrls as $imgurl) { + $item['content'] .= "<br />\n<img src='https:" . $imgurl . "' />"; + } + } else { + $item['content'] .= "<br>\n<img src='https:" . $listing->imageUrls . "' />"; + } + } + if(!is_null($this->getInput('r'))) { + if($this->getInput('r')) { + $item['content'] .= "<br />\n<br />\n<br />\n" . json_encode($listing); + } + } + $item['content'] .= "<br>\n<br>\nPrice: " . $listing->priceInfo->priceCents / 100; + $item['content'] .= ' (' . $listing->priceInfo->priceType . ')'; + if(!empty($listing->location->cityName)) { + $item['content'] .= "<br><br>\n" . $listing->location->cityName; + } + if(!is_null($this->getInput('r'))) { + if($this->getInput('r')) { + $item['content'] .= "<br />\n<br />\n<br />\n" . json_encode($listing); + } + } + $this->items[] = $item; + } + } + } + + public function getName(){ + if(!is_null($this->getInput('q'))) { + return $this->getInput('q') . ' - Marktplaats'; + } + return parent::getName(); + } +} diff --git a/bridges/MastodonBridge.php b/bridges/MastodonBridge.php index 9e131b7..de5e41f 100644 --- a/bridges/MastodonBridge.php +++ b/bridges/MastodonBridge.php @@ -78,7 +78,7 @@ class MastodonBridge extends FeedExpander { public function getURI(){ if($this->getInput('canusername')) - return 'https://' . $this->getInstance() . '/users/' . $this->getUsername() . '.atom'; + return 'https://' . $this->getInstance() . '/@' . $this->getUsername() . '.rss'; return parent::getURI(); } diff --git a/bridges/MediapartBlogsBridge.php b/bridges/MediapartBlogsBridge.php new file mode 100644 index 0000000..40ae1f9 --- /dev/null +++ b/bridges/MediapartBlogsBridge.php @@ -0,0 +1,48 @@ +<?php +class MediapartBlogsBridge extends BridgeAbstract { + const NAME = 'Mediapart Blogs'; + const BASE_URI = 'https://blogs.mediapart.fr'; + const URI = self::BASE_URI . '/blogs'; + const MAINTAINER = 'somini'; + const PARAMETERS = array( + array( + 'slug' => array( + 'name' => 'Blog Slug', + 'type' => 'text', + 'title' => 'Blog user name', + 'exampleValue' => 'jean-vincot', + ) + ) + ); + + public function getIcon() { + return 'https://static.mediapart.fr/favicon/favicon-club.ico?v=2'; + } + + public function collectData() { + $html = getSimpleHTMLDOM(self::BASE_URI . '/' . $this->getInput('slug') . '/blog') + or returnServerError('Could not load content'); + + foreach($html->find('ul.post-list li') as $element) { + $item = array(); + + $item_title = $element->find('h3.title a', 0); + $item_divs = $element->find('div'); + + $item['title'] = $item_title->innertext; + $item['uri'] = self::BASE_URI . trim($item_title->href); + $item['author'] = $element->find('.author .subscriber', 0)->innertext; + $item['content'] = $item_divs[count($item_divs) - 2] . $item_divs[count($item_divs) - 1]; + $item['timestamp'] = strtotime($element->find('.author time', 0)->datetime); + + $this->items[] = $item; + } + } + + public function getName() { + if ($this->getInput('slug')) { + return self::NAME . ' | ' . $this->getInput('slug'); + } + return parent::getName(); + } +} diff --git a/bridges/MondeDiploBridge.php b/bridges/MondeDiploBridge.php index 85f771e..cff8496 100644 --- a/bridges/MondeDiploBridge.php +++ b/bridges/MondeDiploBridge.php @@ -3,22 +3,26 @@ class MondeDiploBridge extends BridgeAbstract { const MAINTAINER = 'Pitchoule'; const NAME = 'Monde Diplomatique'; - const URI = 'http://www.monde-diplomatique.fr/'; + const URI = 'https://www.monde-diplomatique.fr'; const CACHE_TIMEOUT = 21600; //6h const DESCRIPTION = 'Returns most recent results from MondeDiplo.'; + private function cleanText($text) { + return trim(str_replace(array(' ', ' '), ' ', $text)); + } + public function collectData(){ $html = getSimpleHTMLDOM(self::URI) or returnServerError('Could not request MondeDiplo. for : ' . self::URI); foreach($html->find('div.unarticle') as $article) { $element = $article->parent(); + $title = $element->find('h3', 0)->plaintext; + $datesAuteurs = $element->find('div.dates_auteurs', 0)->plaintext; $item = array(); $item['uri'] = self::URI . $element->href; - $item['title'] = $element->find('h3', 0)->plaintext; - $item['content'] = $element->find('div.dates_auteurs', 0)->plaintext - . '<br>' - . strstr($element->find('div', 0)->plaintext, $element->find('div.dates_auteurs', 0)->plaintext, true); + $item['title'] = $this->cleanText($title) . ' - ' . $this->cleanText($datesAuteurs); + $item['content'] = $this->cleanText(str_replace(array($title, $datesAuteurs), '', $element->plaintext)); $this->items[] = $item; } diff --git a/bridges/MozillaBugTrackerBridge.php b/bridges/MozillaBugTrackerBridge.php index 356bedc..439e148 100644 --- a/bridges/MozillaBugTrackerBridge.php +++ b/bridges/MozillaBugTrackerBridge.php @@ -61,43 +61,44 @@ class MozillaBugTrackerBridge extends BridgeAbstract { if($html === false) returnServerError('Failed to load page!'); + // Fix relative URLs + defaultLinkTo($html, self::URI); + // Store header information into private members - $this->bugid = $html->find('#bugzilla-body', 0)->find('a', 0)->innertext; - $this->bugdesc = $html->find('table.bugfields', 0)->find('tr', 0)->find('td', 0)->innertext; + $this->bugid = $html->find('#field-value-bug_id', 0)->plaintext; + $this->bugdesc = $html->find('h1#field-value-short_desc', 0)->plaintext; // Get and limit comments - $comments = $html->find('.bz_comment_table div.bz_comment'); + $comments = $html->find('div.change-set'); if($limit > 0 && count($comments) > $limit) { $comments = array_slice($comments, count($comments) - $limit, $limit); } - // Order comments - switch($sorting) { - case 'lf': $comments = array_reverse($comments, true); - case 'of': - default: // Nothing to do, keep original order + if ($sorting === 'lf') { + $comments = array_reverse($comments, true); } foreach($comments as $comment) { $comment = $this->inlineStyles($comment); $item = array(); - $item['uri'] = $this->getURI() . '#' . $comment->id; - $item['author'] = $comment->find('span.bz_comment_user', 0)->innertext; - $item['title'] = $comment->find('span.bz_comment_number', 0)->find('a', 0)->innertext; - $item['timestamp'] = strtotime($comment->find('span.bz_comment_time', 0)->innertext); - $item['content'] = $comment->find('pre.bz_comment_text', 0)->innertext; + $item['uri'] = $comment->find('h3.change-name', 0)->find('a', 0)->href; + $item['author'] = $comment->find('td.change-author', 0)->plaintext; + $item['title'] = $comment->find('h3.change-name', 0)->plaintext; + $item['timestamp'] = strtotime($comment->find('span.rel-time', 0)->title); + $item['content'] = ''; - // Fix line breaks (they use LF) - $item['content'] = str_replace("\n", '<br>', $item['content']); + if ($comment->find('.comment-text', 0)) { + $item['content'] = $comment->find('.comment-text', 0)->outertext; + } - // Fix relative URIs - $item['content'] = $this->replaceRelativeURI($item['content']); + if ($comment->find('div.activity', 0)) { + $item['content'] .= $comment->find('div.activity', 0)->innertext; + } $this->items[] = $item; } - } public function getURI(){ @@ -114,9 +115,8 @@ class MozillaBugTrackerBridge extends BridgeAbstract { public function getName(){ switch($this->queriedContext) { case 'Bug comments': - return 'Bug ' - . $this->bugid - . ' tracker for ' + return $this->bugid + . ' - ' . $this->bugdesc . ' - ' . parent::getName(); @@ -126,17 +126,6 @@ class MozillaBugTrackerBridge extends BridgeAbstract { } /** - * Replaces all relative URIs with absolute ones - * - * @param string $content The source string - * @return string Returns the source string with all relative URIs replaced - * by absolute ones. - */ - private function replaceRelativeURI($content){ - return preg_replace('/href="(?!http)/', 'href="' . self::URI . '/', $content); - } - - /** * Adds styles as attributes to tags with known classes * * @param object $html A simplehtmldom object @@ -144,10 +133,14 @@ class MozillaBugTrackerBridge extends BridgeAbstract { * attributes. */ private function inlineStyles($html){ - foreach($html->find('.bz_obsolete') as $element) { + foreach($html->find('.bz_closed') as $element) { $element->style = 'text-decoration:line-through;'; } + foreach($html->find('pre') as $element) { + $element->style = 'white-space: pre-wrap;'; + } + return $html; } } diff --git a/bridges/NasaApodBridge.php b/bridges/NasaApodBridge.php index 8e293e0..6e2674f 100644 --- a/bridges/NasaApodBridge.php +++ b/bridges/NasaApodBridge.php @@ -12,10 +12,8 @@ class NasaApodBridge extends BridgeAbstract { $html = getSimpleHTMLDOM(self::URI . 'archivepix.html') or returnServerError('Error while downloading the website content'); - $list = explode('<br>', $html->find('b', 0)->innertext); - - for($i = 0; $i < 3; $i++) { - $line = $list[$i]; + // Start at 1 to skip the "APOD Full Archive" on top of the page + for($i = 1; $i < 4; $i++) { $item = array(); $uri_page = $html->find('a', $i + 3)->href; @@ -26,9 +24,14 @@ class NasaApodBridge extends BridgeAbstract { $picture_html_string = $picture_html->innertext; //Extract image and explanation - $media = $picture_html->find('p', 1)->innertext; - $media = strstr($media, '<br>'); - $media = preg_replace('/<br>/', '', $media, 1); + $image_wrapper = $picture_html->find('a', 1); + $image_path = $image_wrapper->href; + $img_placeholder = $image_wrapper->find('img', 0); + $img_alt = $img_placeholder->alt; + $img_style = $img_placeholder->style; + $image_uri = self::URI . $image_path; + $new_img_placeholder = "<img src=\"$image_uri\" alt=\"$img_alt\" style=\"$img_style\">"; + $media = "<a href=\"$image_uri\">$new_img_placeholder</a>"; $explanation = $picture_html->find('p', 2)->innertext; //Extract date from the picture page diff --git a/bridges/NextInpactBridge.php b/bridges/NextInpactBridge.php index c6bf2f5..c3cca30 100644 --- a/bridges/NextInpactBridge.php +++ b/bridges/NextInpactBridge.php @@ -1,9 +1,10 @@ <?php class NextInpactBridge extends FeedExpander { - const MAINTAINER = 'qwertygc'; + const MAINTAINER = 'qwertygc and ORelio'; const NAME = 'NextInpact Bridge'; const URI = 'https://www.nextinpact.com/'; + const URI_HARDWARE = 'https://www.inpact-hardware.com/'; const DESCRIPTION = 'Returns the newest articles.'; const PARAMETERS = array( array( @@ -11,10 +12,30 @@ class NextInpactBridge extends FeedExpander { 'name' => 'Feed', 'type' => 'list', 'values' => array( - 'Tous nos articles' => 'news', - 'Nos contenus en accès libre' => 'acces-libre', - 'Blog' => 'blog', - 'Bons plans' => 'bonsplans' + 'Nos actualités' => array( + 'Toutes nos publications' => 'news', + 'Toutes nos publications sauf #LeBrief' => 'nobrief', + 'Toutes nos publications sauf INpact Hardware' => 'noih', + 'Seulement les publications INpact Hardware' => 'hardware:news', + 'Seulement les publications Next INpact' => 'nobrief-noih', + 'Seulement les publications #LeBrief' => 'lebrief', + ), + 'Flux spécifiques' => array( + 'Le blog' => 'blog', + 'Les bons plans' => 'bonsplans', + 'Publications INpact Hardware en accès libre' => 'hardware:acces-libre', + 'Publications Next INpact en accès libre' => 'acces-libre', + ), + 'Flux thématiques' => array( + 'Tech' => 'category:1', + 'Logiciel' => 'category:2', + 'Internet' => 'category:3', + 'Mobilité' => 'category:4', + 'Droit' => 'category:5', + 'Économie' => 'category:6', + 'Culture numérique' => 'category:7', + 'Next INpact' => 'category:8', + ) ) ), 'filter_premium' => array( @@ -39,9 +60,27 @@ class NextInpactBridge extends FeedExpander { public function collectData(){ $feed = $this->getInput('feed'); - if (empty($feed)) + $base_uri = self::URI; + $args = ''; + + if (empty($feed)) { + // Default to All articles $feed = 'news'; - $this->collectExpandableDatas(self::URI . 'rss/' . $feed . '.xml'); + } + + if (strpos($feed, 'hardware:') === 0) { + // Feed hosted on Hardware domain + $base_uri = self::URI_HARDWARE; + $feed = str_replace('hardware:', '', $feed); + } + + if (strpos($feed, 'category:') === 0) { + // Feed with specific category parameter + $args = '?CategoryIds=' . str_replace('category:', '', $feed); + $feed = 'params'; + } + + $this->collectExpandableDatas($base_uri . 'rss/' . $feed . '.xml' . $args); } protected function parseItem($newsItem){ @@ -57,9 +96,11 @@ class NextInpactBridge extends FeedExpander { if (!is_object($html)) return 'Failed to request NextInpact: ' . $url; + // Filter premium and brief articles? + $brief_selector = 'div.brief-container'; foreach(array( - 'filter_premium' => 'h2.title_reserve_article', - 'filter_brief' => 'div.brief-inner-content' + 'filter_premium' => 'p.red-msg', + 'filter_brief' => $brief_selector ) as $param_name => $selector) { $param_val = intval($this->getInput($param_name)); if ($param_val != 0) { @@ -71,38 +112,71 @@ class NextInpactBridge extends FeedExpander { } } - if (is_object($html->find('div[itemprop=articleBody], div.brief-inner-content', 0))) { + $article_content = $html->find('div.article-content', 0); + if (!is_object($article_content)) { + $article_content = $html->find('div.content', 0); + } + if (is_object($article_content)) { - $subtitle = trim($html->find('span.sub_title, div.brief-head', 0)); - if(is_object($subtitle) && $subtitle->plaintext !== $item['title']) { - $subtitle = '<p><em>' . $subtitle->plaintext . '</em></p>'; + // Subtitle + $subtitle = $html->find('small.subtitle', 0); + if(!is_object($subtitle) && !is_object($html->find($brief_selector, 0))) { + $subtitle = $html->find('small', 0); + } + if(!is_object($subtitle)) { + $content_wrapper = $html->find('div.content-wrapper', 0); + if (is_object($content_wrapper)) { + $subtitle = $content_wrapper->find('h2.title', 0); + } + } + if(is_object($subtitle) && (!isset($item['title']) || $subtitle->plaintext != $item['title'])) { + $subtitle = '<p><em>' . trim($subtitle->plaintext) . '</em></p>'; } else { $subtitle = ''; } - $postimg = $html->find( - 'div.container_main_image_article, div.image-brief-container, div.image-brief-side-container', 0 - ); + // Image + $postimg = $html->find('div.article-image, div.image-container', 0); if(is_object($postimg)) { - $postimg = '<p><img src="' - . $postimg->find('img.dedicated', 0)->src - . '" alt="-" /></p>'; + $postimg = $postimg->find('img', 0); + if (!empty($postimg->src)) { + $postimg = $postimg->src; + } else { + $postimg = $postimg->srcset; //"url 355w, url 1003w, url 748w" + $postimg = explode(', ', $postimg); //split by ', ' to get each url separately + $postimg = end($postimg); //Get last item: "url 748w" which is of largest size + $postimg = explode(' ', $postimg); //split by ' ' to separate url from res + $postimg = array_reverse($postimg); //reverse array content to have url last + $postimg = end($postimg); //Get last item of array: "url" + } + $postimg = '<p><img src="' . $postimg . '" alt="-" /></p>'; } else { $postimg = ''; } + // Paywall + $paywall = $html->find('div.paywall-restriction', 0); + if (is_object($paywall) && is_object($paywall->find('p.red-msg', 0))) { + $paywall = '<p><em>' . $paywall->find('span.head-mention', 0)->innertext . '</em></p>'; + } else { + $paywall = ''; + } + + // Content + $article_content = $article_content->outertext; + $article_content = str_replace('>Signaler une erreur</span>', '></span>', $article_content); + + // Result $text = $subtitle . $postimg - . $html->find('div[itemprop=articleBody], div.brief-inner-content', 0)->outertext; + . $article_content + . $paywall; } else { - $text = $item['content'] - . '<p><em>Failed retrieve full article content</em></p>'; - } - - $premium_article = $html->find('h2.title_reserve_article', 0); - if (is_object($premium_article)) { - $text .= '<p><em>' . $premium_article->innertext . '</em></p>'; + $text = '<p><em>Failed to retrieve full article content</em></p>'; + if (isset($item['content'])) { + $text = $item['content'] . $text; + } } return $text; diff --git a/bridges/NineGagBridge.php b/bridges/NineGagBridge.php index 939ff38..19c7e29 100644 --- a/bridges/NineGagBridge.php +++ b/bridges/NineGagBridge.php @@ -148,7 +148,7 @@ class NineGagBridge extends BridgeAbstract { } if (!$AvoidElement) { - $item['uri'] = $post['url']; + $item['uri'] = preg_replace('/^http:/i', 'https:', $post['url']); $item['title'] = $post['title']; $item['content'] = self::getContent($post); $item['categories'] = self::getCategories($post); diff --git a/bridges/NordbayernBridge.php b/bridges/NordbayernBridge.php new file mode 100644 index 0000000..37fa3d5 --- /dev/null +++ b/bridges/NordbayernBridge.php @@ -0,0 +1,131 @@ +<?php +ini_set('max_execution_time', '300'); +class NordbayernBridge extends BridgeAbstract { + + const MAINTAINER = 'schabi.org'; + const NAME = 'Nordbayern Bridge'; + const CACHE_TIMEOUT = 3600; + const URI = 'https://www.nordbayern.de'; + const DESCRIPTION = 'Bridge for Bavarian reginoal news site nordbayern.de'; + const PARAMETERS = array( array( + 'region' => array( + 'name' => 'region', + 'type' => 'list', + 'exampleValue' => 'Nürnberg', + 'title' => 'Select a region', + 'values' => array( + 'Nürnberg' => 'nuernberg', + 'Fürth' => 'fuerth', + 'Altdorf' => 'altdorf', + 'Ansbach' => 'ansbach', + 'Bad Windsheim' => 'bad-windsheim', + 'Bamberg' => 'bamberg', + 'Dinkelsbühl/Feuchtwangen' => 'dinkelsbuehl-feuchtwangen', + 'Feucht' => 'feucht', + 'Forchheim' => 'forchheim', + 'Gunzenhausen' => 'gunzenhausen', + 'Hersbruck' => 'hersbruck', + 'Herzogenaurach' => 'herzogenaurach', + 'Hilpolstein' => 'holpolstein', + 'Höchstadt' => 'hoechstadt', + 'Lauf' => 'lauf', + 'Neumarkt' => 'neumarkt', + 'Neustadt/Aisch' => 'neustadt-aisch', + 'Pegnitz' => 'pegnitz', + 'Roth' => 'roth', + 'Rothenburg o.d.T.' => 'rothenburg-o-d-t', + 'Schwabach' => 'schwabach', + 'Treuchtlingen' => 'treuchtlingen', + 'Weißenburg' => 'weissenburg' + ) + ), + 'policeReports' => array( + 'name' => 'Police Reports', + 'type' => 'checkbox', + 'exampleValue' => 'checked', + 'title' => 'Read Police Reports', + ) + )); + + private function getImageUrlFromScript($script) { + preg_match( + "#src=\\\\'(https:[-:\\.\\\\/a-zA-Z0-9%_]*\\.(jpg|JPG))#", + $script->innertext, + $matches, + PREG_OFFSET_CAPTURE + ); + if(isset($matches[1][0])) { + return stripcslashes($matches[1][0]) . '?w=800'; + } + return null; + } + + private function handleArticle($link) { + $item = array(); + $article = getSimpleHTMLDOM($link); + $content = $article->find('div[class*=article-content]', 0); + $item['uri'] = $link; + $item['title'] = $article->find('h1', 0)->innertext; + $item['content'] = ''; + + //first get image from block/modul + $figure = $article->find('figure[class*=panorama]', 0); + if($figure !== null) { + $imgUrl = self::getImageUrlFromScript($figure->find('script', 0)); + if($imgUrl === null) { + $imgUrl = self::getImageUrlFromScript($figure->find('script', 1)); + } + $item['content'] .= '<img src="' . $imgUrl . '">'; + } + + // get regular paragraphs + foreach($content->children() as $child) { + if($child->tag === 'p') { + $item['content'] .= $child; + } + } + + //get image divs + foreach($content->find('div[class*=article-slideshow]') as $slides) { + foreach($slides->children() as $child) { + switch($child->tag) { + case 'p': + $item['content'] .= $child; + break; + case 'h5': + $item['content'] .= '<h5><a href="' + . self::URI . $child->find('a', 0)->href . '">' . $child->plaintext . '</a></h5>'; + break; + case 'a': + $url = self::getImageUrlFromScript($child->find('script', 0)); + $item['content'] .= '<img src="' . $url . '">'; + break; + } + } + } + $this->items[] = $item; + $article->clear(); + } + + private function handleNewsblock($listSite, $readPoliceReports) { + $newsBlocks = $listSite->find('section[class*=newsblock]'); + $regionalNewsBlock = $newsBlocks[0]; + $policeBlock = $newsBlocks[1]; + foreach($regionalNewsBlock->find('h2') as $headline) { + self::handleArticle(self::URI . $headline->find('a', 0)->href); + } + if($readPoliceReports === true) { + foreach($policeBlock->find('h2') as $headline) { + self::handleArticle(self::URI . $headline->find('a', 0)->href); + } + } + } + + public function collectData() { + $item = array(); + $region = $this->getInput('region'); + $listSite = getSimpleHTMLDOM(self::URI . '/region/' . $region); + + self::handleNewsblock($listSite, $this->getInput('policeReports')); + } +} diff --git a/bridges/NyaaTorrentsBridge.php b/bridges/NyaaTorrentsBridge.php index b40b0f9..ab00ece 100644 --- a/bridges/NyaaTorrentsBridge.php +++ b/bridges/NyaaTorrentsBridge.php @@ -100,7 +100,9 @@ class NyaaTorrentsBridge extends BridgeAbstract { //Retrieve data from page contents $item_title = str_replace(' :: Nyaa', '', $item_html->find('title', 0)->plaintext); - $item_desc = str_get_html(markdownToHtml($item_html->find('#torrent-description', 0)->innertext)); + $item_desc = str_get_html( + markdownToHtml(html_entity_decode($item_html->find('#torrent-description', 0)->innertext)) + ); $item_author = extractFromDelimiters($item_html->outertext, 'href="/user/', '"'); $item_date = intval(extractFromDelimiters($item_html->outertext, 'data-timestamp="', '"')); diff --git a/bridges/OpenwrtSecurityBridge.php b/bridges/OpenwrtSecurityBridge.php new file mode 100644 index 0000000..7a7470f --- /dev/null +++ b/bridges/OpenwrtSecurityBridge.php @@ -0,0 +1,37 @@ +<?php +class OpenwrtSecurityBridge extends BridgeAbstract { + const NAME = 'OpenWrt Security Advisories'; + const URI = 'https://openwrt.org/advisory/start'; + const DESCRIPTION = 'Security Advisories published by openwrt.org'; + const MAINTAINER = 'mschwld'; + const CACHE_TIMEOUT = 3600; + const WEBROOT = 'https://openwrt.org'; + + public function collectData() { + $item = array(); + $html = getSimpleHTMLDOM(self::URI) + or returnServerError('Could not request entries'); + + $advisories = $html->find('div[class=plugin_nspages]', 0); + + foreach($advisories->find('a[class=wikilink1]') as $element) { + $item = array(); + + $row = $element->innertext; + + $item['title'] = substr($row, 0, strpos($row, ' - ')); + $item['timestamp'] = $this->getDate($element->href); + $item['uri'] = self::WEBROOT . $element->href; + $item['uid'] = self::WEBROOT . $element->href; + $item['content'] = substr($row, strpos($row, ' - ') + 3); + $item['author'] = 'OpenWrt Project'; + + $this->items[] = $item; + } + } + + private function getDate($href) { + $date = substr($href, -12); + return $date; + } +} diff --git a/bridges/OtrkeyFinderBridge.php b/bridges/OtrkeyFinderBridge.php new file mode 100644 index 0000000..32ce5c4 --- /dev/null +++ b/bridges/OtrkeyFinderBridge.php @@ -0,0 +1,175 @@ +<?php +class OtrkeyFinderBridge extends BridgeAbstract { + const MAINTAINER = 'mibe'; + const NAME = 'OtrkeyFinder'; + const URI = 'https://otrkeyfinder.com'; + const URI_TEMPLATE = 'https://otrkeyfinder.com/en/?search=%s&order=&page=%d'; + const CACHE_TIMEOUT = 3600; // 1h + const DESCRIPTION = 'Returns the newest .otrkey files matching the search criteria.'; + const PARAMETERS = array( + array( + 'searchterm' => array( + 'name' => 'Search term', + 'exampleValue' => 'Terminator', + 'title' => 'The search term is case-insensitive', + ), + 'station' => array( + 'name' => 'Station name', + 'exampleValue' => 'ARD', + ), + 'type' => array( + 'name' => 'Media type', + 'type' => 'list', + 'values' => array( + 'any' => '', + 'Detail' => array( + 'HD' => 'HD.avi', + 'AC3' => 'HD.ac3', + 'HD & AC3' => 'HD.', + 'HQ' => 'HQ.avi', + 'AVI' => 'g.avi', // 'g.' to exclude HD.avi and HQ.avi (filename always contains 'mpg.') + 'MP4' => '.mp4', + ), + ), + ), + 'minTime' => array( + 'name' => 'Min. running time', + 'type' => 'number', + 'title' => 'The minimum running time in minutes. The resolution is 5 minutes.', + 'exampleValue' => '90', + 'defaultValue' => '0', + ), + 'maxTime' => array( + 'name' => 'Max. running time', + 'type' => 'number', + 'title' => 'The maximum running time in minutes. The resolution is 5 minutes.', + 'exampleValue' => '120', + 'defaultValue' => '0', + ), + 'pages' => array( + 'name' => 'Number of pages', + 'type' => 'number', + 'title' => 'Specifies the number of pages to fetch. Increase this value if you get an empty feed.', + 'exampleValue' => '5', + 'defaultValue' => '5', + ), + ) + ); + // Example: Terminator_20.04.13_02-25_sf2_100_TVOON_DE.mpg.avi.otrkey + // The first group is the running time in minutes + const FILENAME_REGEX = '/_(\d+)_TVOON_DE\.mpg\..+\.otrkey/'; + // year.month.day_hour-minute with leading zeros + const TIME_REGEX = '/\d{2}\.\d{2}\.\d{2}_\d{2}-\d{2}/'; + const CONTENT_TEMPLATE = '<ul>%s</ul>'; + const MIRROR_TEMPLATE = '<li><a href="https://otrkeyfinder.com%s">%s</a></li>'; + + public function collectData() { + $pages = $this->getInput('pages'); + + for($page = 1; $page <= $pages; $page++) { + $uri = $this->buildUri($page); + + $html = getSimpleHTMLDOMCached($uri, self::CACHE_TIMEOUT) + or returnServerError('Could not request ' . $uri); + + $keys = $html->find('div.otrkey'); + + foreach($keys as $key) { + $temp = $this->buildItem($key); + + if ($temp != null) + $this->items[] = $temp; + } + + // Sleep for 0.5 seconds to don't hammer the server. + usleep(500000); + } + } + + private function buildUri($page) { + $searchterm = $this->getInput('searchterm'); + $station = $this->getInput('station'); + $type = $this->getInput('type'); + + // Combine all three parts to a search query by separating them with white space + $search = implode(' ', array($searchterm, $station, $type)); + $search = trim($search); + $search = urlencode($search); + + return sprintf(self::URI_TEMPLATE, $search, $page); + } + + private function buildItem(simple_html_dom_node $node) { + $file = $this->getFilename($node); + + if ($file == null) + return null; + + $minTime = $this->getInput('minTime'); + $maxTime = $this->getInput('maxTime'); + + // Do we need to check the running time? + if ($minTime != 0 || $maxTime != 0) { + if ($maxTime > 0 && $maxTime < $minTime) + returnClientError('The minimum running time must be less than the maximum running time.'); + + preg_match(self::FILENAME_REGEX, $file, $matches); + + if (!isset($matches[1])) + return null; + + $time = (integer)$matches[1]; + + // Check for minimum running time + if ($minTime > 0 && $minTime > $time) + return null; + + // Check for maximum running time + if ($maxTime > 0 && $maxTime < $time) + return null; + } + + $item = array(); + $item['title'] = $file; + + // The URI_TEMPLATE for querying the site can be reused here + $item['uri'] = sprintf(self::URI_TEMPLATE, $file, 1); + + $content = $this->buildContent($node); + + if ($content != null) + $item['content'] = $content; + + if (preg_match(self::TIME_REGEX, $file, $matches) === 1) { + $item['timestamp'] = DateTime::createFromFormat( + 'y.m.d_H-i', + $matches[0], + new DateTimeZone('Europe/Berlin') + )->getTimestamp(); + } + + return $item; + } + + private function getFilename(simple_html_dom_node $node) { + $file = $node->find('.file', 0); + + if ($file == null) + return null; + else + return trim($file->innertext); + } + + private function buildContent(simple_html_dom_node $node) { + $mirrors = $node->find('div.mirror'); + $list = ''; + + // Build list of available mirrors + foreach($mirrors as $mirror) { + $anchor = $mirror->find('a', 0); + $list .= sprintf(self::MIRROR_TEMPLATE, $anchor->href, $anchor->innertext); + } + + return sprintf(self::CONTENT_TEMPLATE, $list); + } +} diff --git a/bridges/RainbowSixSiegeBridge.php b/bridges/RainbowSixSiegeBridge.php index 62ea482..067d3e7 100644 --- a/bridges/RainbowSixSiegeBridge.php +++ b/bridges/RainbowSixSiegeBridge.php @@ -12,8 +12,8 @@ class RainbowSixSiegeBridge extends BridgeAbstract { } public function collectData(){ - $dlUrl = 'https://www.ubisoft.com/api/updates/items?categoriesFilter=all'; - $dlUrl = $dlUrl . '&limit=6&mediaFilter=all&skip=0&startIndex=undefined&locale=en-us'; + $dlUrl = 'https://www.ubisoft.com/api/updates/items?locale=en-us&categoriesFilter=all'; + $dlUrl = $dlUrl . '&limit=6&mediaFilter=news&skip=0&startIndex=undefined&tags=BR-rainbow-six%20GA-siege'; $jsonString = getContents($dlUrl) or returnServerError('Error while downloading the website content'); $json = json_decode($jsonString, true); @@ -27,34 +27,7 @@ class RainbowSixSiegeBridge extends BridgeAbstract { $uri = $uri . $jsonItem['button']['buttonUrl']; $thumbnail = '<img src="' . $jsonItem['thumbnail']['url'] . '" alt="Thumbnail">'; - $content = $thumbnail . '<br />' . $jsonItem['content']; - - // Markdown parsing from https://gist.github.com/jbroadway/2836900 - - // Line breaks - $content = preg_replace("/\r\n|\r|\n/", '<br/>', $content); - - // Links - $regex = '/\[([^\[]+)\]\(([^\)]+)\)/'; - $replacement = '<a href=\'\2\'>\1</a>'; - $content = preg_replace($regex, $replacement, $content); - - // Bold text - $regex = '/(\*\*|__)(.*?)\1/'; - $replacement = '<strong>\2</strong>'; - $content = preg_replace($regex, $replacement, $content); - - // Lists - $regex = '/\n\s*[\*|\-](.*)/'; - $content = preg_replace_callback($regex, function($regs) { - $item = $regs[1]; - return sprintf ('<ul><li>%s</li></ul>', trim ($item)); - }, $content); - - // Italic text - $regex = '/(\*\*|\*)(.*?)\1/'; - $replacement = '<i>\2</i>'; - $content = preg_replace($regex, $replacement, $content); + $content = $thumbnail . '<br />' . markdownToHtml($jsonItem['content']); $item = array(); $item['uri'] = $uri; diff --git a/bridges/Releases3DSBridge.php b/bridges/Releases3DSBridge.php index fe2df8e..686e7c5 100644 --- a/bridges/Releases3DSBridge.php +++ b/bridges/Releases3DSBridge.php @@ -5,13 +5,16 @@ class Releases3DSBridge extends BridgeAbstract { const NAME = '3DS Scene Releases'; const URI = 'http://www.3dsdb.com/'; const CACHE_TIMEOUT = 10800; // 3h - const DESCRIPTION = 'Returns the newest scene releases.'; + const DESCRIPTION = 'Returns the newest scene releases for Nintendo 3DS.'; public function collectData(){ + $this->collectDataUrl(self::URI . 'xml.php'); + } + + protected function collectDataUrl($dataUrl){ - $dataUrl = self::URI . 'xml.php'; $xml = getContents($dataUrl) - or returnServerError('Could not request 3dsdb: ' . $dataUrl); + or returnServerError('Could not request URL: ' . $dataUrl); $limit = 0; foreach(array_reverse(explode('<release>', $xml)) as $element) { @@ -52,17 +55,25 @@ class Releases3DSBridge extends BridgeAbstract { $ignSearchUrl = 'https://www.ign.com/search?q=' . urlencode($name); if($ignResult = getSimpleHTMLDOMCached($ignSearchUrl)) { - $ignCoverArt = $ignResult->find('div.search-item-media', 0)->find('img', 0)->src; - $ignDesc = $ignResult->find('div.search-item-description', 0)->plaintext; - $ignLink = $ignResult->find('div.search-item-sub-title', 0)->find('a', 1)->href; - $ignDate = strtotime(trim($ignResult->find('span.publish-date', 0)->plaintext)); - $ignDescription = '<div><img src="' - . $ignCoverArt - . '" /></div><div>' - . $ignDesc - . ' <a href="' - . $ignLink - . '">More at IGN</a></div>'; + $ignCoverArt = $ignResult->find('div.search-item-media', 0); + $ignDesc = $ignResult->find('div.search-item-description', 0); + $ignLink = $ignResult->find('div.search-item-sub-title', 0); + $ignDate = $ignResult->find('span.publish-date', 0); + if (is_object($ignCoverArt)) + $ignCoverArt = $ignCoverArt->find('img', 0); + if (is_object($ignLink)) + $ignLink = $ignLink->find('a', 1); + if (is_object($ignDate)) + $ignDate = strtotime(trim($ignDate->plaintext)); + if (is_object($ignCoverArt) && is_object($ignDesc) && is_object($ignLink)) { + $ignDescription = '<div><img src="' + . $ignCoverArt->src + . '" /></div><div>' + . $ignDesc->plaintext + . ' <a href="' + . $ignLink->href + . '">More at IGN</a></div>'; + } } //Main section : Release description from 3DS database @@ -111,7 +122,7 @@ class Releases3DSBridge extends BridgeAbstract { private function typeToString($type){ switch($type) { - case 1: return '3DS Game'; + case 1: return 'Card Game'; case 4: return 'eShop'; default: return '??? (' . $type . ')'; } diff --git a/bridges/ReleasesSwitchBridge.php b/bridges/ReleasesSwitchBridge.php new file mode 100644 index 0000000..89ca76d --- /dev/null +++ b/bridges/ReleasesSwitchBridge.php @@ -0,0 +1,17 @@ +<?php + +// This bridge depends on Releases3DSBridge +if (!class_exists('Releases3DSBridge')) { + include('Releases3DSBridge.php'); +} + +class ReleasesSwitchBridge extends Releases3DSBridge { + + const NAME = 'Switch Scene Releases'; + const URI = 'http://www.nswdb.com/'; + const DESCRIPTION = 'Returns the newest scene releases for Nintendo Switch.'; + + public function collectData(){ + $this->collectDataUrl(self::URI . 'xml.php'); + } +} diff --git a/bridges/RobinhoodSnacksBridge.php b/bridges/RobinhoodSnacksBridge.php new file mode 100644 index 0000000..e123146 --- /dev/null +++ b/bridges/RobinhoodSnacksBridge.php @@ -0,0 +1,27 @@ +<?php + +class RobinhoodSnacksBridge extends BridgeAbstract { + const MAINTAINER = 'johnpc'; + const NAME = 'Robinhood Snacks Newsletter'; + const URI = 'https://snacks.robinhood.com/newsletters/'; + const CACHE_TIMEOUT = 86400; // 24h + const DESCRIPTION = 'Returns newsletters from Robinhood Snacks'; + + public function collectData() + { + $html = getSimpleHTMLDOM(self::URI) + or returnServerError('Could not request snacks.robinhood.com.'); + + foreach ($html->find('#root > div > div > div > div > div > a') as $element) { + if ($element->href === 'https://snacks.robinhood.com/newsletters/page/2/') { + continue; + } + + $this->items[] = array( + 'uri' => $element->href, + 'title' => $element->find('div > div', 3)->plaintext, + 'content' => $element->find('div > div', 4)->plaintext, + ); + } + } +} diff --git a/bridges/SensCritiqueBridge.php b/bridges/SensCritiqueBridge.php index 7ac35f2..9126c31 100644 --- a/bridges/SensCritiqueBridge.php +++ b/bridges/SensCritiqueBridge.php @@ -3,15 +3,11 @@ class SensCritiqueBridge extends BridgeAbstract { const MAINTAINER = 'kranack'; const NAME = 'Sens Critique'; - const URI = 'http://www.senscritique.com/'; + const URI = 'https://www.senscritique.com/'; const CACHE_TIMEOUT = 21600; // 6h const DESCRIPTION = 'Sens Critique news'; const PARAMETERS = array( array( - 'm' => array( - 'name' => 'Movies', - 'type' => 'checkbox' - ), 's' => array( 'name' => 'Series', 'type' => 'checkbox' @@ -40,8 +36,6 @@ class SensCritiqueBridge extends BridgeAbstract { if($this->getInput($category)) { $uri = self::URI; switch($category) { - case 'm': $uri .= 'films/cette-semaine'; - break; case 's': $uri .= 'series/actualite'; break; case 'g': $uri .= 'jeuxvideo/actualite'; @@ -77,20 +71,25 @@ class SensCritiqueBridge extends BridgeAbstract { . ' ' . $movie->find('.elco-date', 0)->plaintext; - $item['content'] = '<em>' - . $movie->find('.elco-original-title', 0)->plaintext - . '</em><br><br>' - . $movie->find('.elco-baseline', 0)->plaintext + $item['content'] = ''; + $originalTitle = $movie->find('.elco-original-title', 0); + $description = $movie->find('.elco-description', 0); + + if ($originalTitle) { + $item['content'] = '<em>' . $originalTitle->plaintext . '</em><br><br>'; + } + + $item['content'] .= $movie->find('.elco-baseline', 0)->plaintext . '<br>' . $movie->find('.elco-baseline', 1)->plaintext . '<br><br>' - . $movie->find('.elco-description', 0)->plaintext + . ($description ? $description->plaintext : '') . '<br><br>' . trim($movie->find('.erra-ratings .erra-global', 0)->plaintext) . ' / 10'; - $item['id'] = $this->getURI() . $movie->find('.elco-title a', 0)->href; - $item['uri'] = $this->getURI() . $movie->find('.elco-title a', 0)->href; + $item['id'] = $this->getURI() . ltrim($movie->find('.elco-title a', 0)->href, '/'); + $item['uri'] = $this->getURI() . ltrim($movie->find('.elco-title a', 0)->href, '/'); $this->items[] = $item; } } diff --git a/bridges/SoundcloudBridge.php b/bridges/SoundcloudBridge.php index 99a2117..45e6fed 100644 --- a/bridges/SoundcloudBridge.php +++ b/bridges/SoundcloudBridge.php @@ -11,44 +11,57 @@ class SoundCloudBridge extends BridgeAbstract { 'u' => array( 'name' => 'username', 'required' => true + ), + 't' => array( + 'name' => 'type', + 'type' => 'list', + 'defaultValue' => 'tracks', + 'values' => array( + 'Tracks' => 'tracks', + 'Playlists' => 'playlists' + ) ) )); + private $feedTitle = null; private $feedIcon = null; private $clientIDCache = null; public function collectData(){ $res = $this->apiGet('resolve', array( - 'url' => 'http://www.soundcloud.com/' . $this->getInput('u') + 'url' => 'https://soundcloud.com/' . $this->getInput('u') )) or returnServerError('No results for this query'); + $this->feedTitle = $res->username; $this->feedIcon = $res->avatar_url; - $tracks = $this->apiGet('users/' . urlencode($res->id) . '/tracks') - or returnServerError('No results for this user'); + $tracks = $this->apiGet( + 'users/' . urlencode($res->id) . '/' . $this->getInput('t'), + array('limit' => 31) + ) or returnServerError('No results for this user/playlist'); - $numTracks = min(count($tracks), 10); - for($i = 0; $i < $numTracks; $i++) { + foreach ($tracks->collection as $index => $track) { $item = array(); - $item['author'] = $tracks[$i]->user->username; - $item['title'] = $tracks[$i]->user->username . ' - ' . $tracks[$i]->title; - $item['timestamp'] = strtotime($tracks[$i]->created_at); - $item['content'] = $tracks[$i]->description; - $item['enclosures'] = array($tracks[$i]->uri - . '/stream?client_id=' - . $this->getClientID()); + $item['author'] = $track->user->username; + $item['title'] = $track->user->username . ' - ' . $track->title; + $item['timestamp'] = strtotime($track->created_at); + $item['content'] = nl2br($track->description); + $item['enclosures'][] = $track->artwork_url; $item['id'] = self::URI . urlencode($this->getInput('u')) . '/' - . urlencode($tracks[$i]->permalink); + . urlencode($track->permalink); $item['uri'] = self::URI . urlencode($this->getInput('u')) . '/' - . urlencode($tracks[$i]->permalink); + . urlencode($track->permalink); $this->items[] = $item; - } + if (count($this->items) >= 10) { + break; + } + } } public function getIcon(){ @@ -64,8 +77,8 @@ class SoundCloudBridge extends BridgeAbstract { } public function getName(){ - if(!is_null($this->getInput('u'))) { - return $this->getInput('u') . ' - ' . self::NAME; + if($this->feedTitle) { + return $this->feedTitle . ' - ' . self::NAME; } return parent::getName(); @@ -99,14 +112,14 @@ class SoundCloudBridge extends BridgeAbstract { // Without url=http, this returns a 404 $playerHTML = getContents('https://w.soundcloud.com/player/?url=http') - or returnServerError('Unable to get player page.'); + or returnServerError('Unable to get player page.'); $regex = '/widget-.+?\.js/'; if(preg_match($regex, $playerHTML, $matches) == false) returnServerError('Unable to find widget JS URL.'); $widgetURL = 'https://widget.sndcdn.com/' . $matches[0]; $widgetJS = getContents($widgetURL) - or returnServerError('Unable to get widget JS page.'); + or returnServerError('Unable to get widget JS page.'); $regex = '/client_id.*?"(.+?)"/'; if(preg_match($regex, $widgetJS, $matches) == false) returnServerError('Unable to find client ID.'); @@ -117,13 +130,13 @@ class SoundCloudBridge extends BridgeAbstract { } private function buildAPIURL($endpoint, $parameters){ - return 'https://api.soundcloud.com/' + return 'https://api-v2.soundcloud.com/' . $endpoint . '?' . http_build_query($parameters); } - private function apiGet($endpoint, $parameters = array()){ + private function apiGet($endpoint, $parameters = array()) { $parameters['client_id'] = $this->getClientID(); try { diff --git a/bridges/TheCodingLoveBridge.php b/bridges/TheCodingLoveBridge.php index 8060c94..54fd0d2 100644 --- a/bridges/TheCodingLoveBridge.php +++ b/bridges/TheCodingLoveBridge.php @@ -11,14 +11,14 @@ class TheCodingLoveBridge extends BridgeAbstract { $html = getSimpleHTMLDOM(self::URI) or returnServerError('Could not request The Coding Love.'); - foreach($html->find('div.post') as $element) { + foreach($html->find('article.blog-post') as $element) { $item = array(); - $temp = $element->find('h3 a', 0); + $temp = $element->find('h1 a', 0); - $titre = $temp->innertext; + $title = $temp->innertext; $url = $temp->href; - $temp = $element->find('div.bodytype', 0); + $temp = $element->find('div.blog-post-content', 0); // retrieve .gif instead of static .jpg $images = $temp->find('p.e img'); @@ -28,17 +28,13 @@ class TheCodingLoveBridge extends BridgeAbstract { } $content = $temp->innertext; - $auteur = $temp->find('i', 0); - $pos = strpos($auteur->innertext, 'by'); - - if($pos > 0) { - $auteur = trim(str_replace('*/', '', substr($auteur->innertext, ($pos + 2)))); - $item['author'] = $auteur; - } + $temp = $element->find('div.post-meta-info', 0); + $author = $temp->find('span', 0); + $item['author'] = $author->innertext; $item['content'] .= trim($content); $item['uri'] = $url; - $item['title'] = trim($titre); + $item['title'] = trim($title); $this->items[] = $item; } diff --git a/bridges/TheHackerNewsBridge.php b/bridges/TheHackerNewsBridge.php index 687b620..1e710b3 100644 --- a/bridges/TheHackerNewsBridge.php +++ b/bridges/TheHackerNewsBridge.php @@ -17,6 +17,7 @@ class TheHackerNewsBridge extends BridgeAbstract { $article_url = $element->find('a.story-link', 0)->href; $article_author = trim($element->find('i.icon-user', 0)->parent()->plaintext); + $article_author = str_replace('', '', $article_author); $article_title = $element->find('h2.home-title', 0)->plaintext; //Date without time diff --git a/bridges/TwitchBridge.php b/bridges/TwitchBridge.php index 39b4601..8b43a31 100644 --- a/bridges/TwitchBridge.php +++ b/bridges/TwitchBridge.php @@ -20,7 +20,9 @@ class TwitchBridge extends BridgeAbstract { 'All' => 'all', 'Archive' => 'archive', 'Highlights' => 'highlight', - 'Uploads' => 'upload' + 'Uploads' => 'upload', + 'Past Premieres' => 'past_premiere', + 'Premiere Uploads' => 'premiere_upload' ), 'defaultValue' => 'archive' ) @@ -32,43 +34,90 @@ class TwitchBridge extends BridgeAbstract { */ const CLIENT_ID = 'kimne78kx3ncx6brgo4mv6wki5h1ko'; + const API_ENDPOINT = 'https://gql.twitch.tv/gql'; + const BROADCAST_TYPES = array( + 'all' => array( + 'ARCHIVE', + 'HIGHLIGHT', + 'UPLOAD', + 'PAST_PREMIERE', + 'PREMIERE_UPLOAD' + ), + 'archive' => 'ARCHIVE', + 'highlight' => 'HIGHLIGHT', + 'upload' => 'UPLOAD', + 'past_premiere' => 'PAST_PREMIERE', + 'premiere_upload' => 'PREMIERE_UPLOAD' + ); + public function collectData(){ - // get channel user - $query_data = array( - 'login' => $this->getInput('channel') - ); - $users = $this->apiGet('users', $query_data)->users; - if(count($users) === 0) - returnClientError('User "' - . $this->getInput('channel') - . '" could not be found'); - $user = $users[0]; - - // get video list - $query_endpoint = 'channels/' . $user->_id . '/videos'; - $query_data = array( - 'broadcast_type' => $this->getInput('type'), - 'limit' => 10 + $query = <<<'EOD' +query VODList($channel: String!, $types: [BroadcastType!]) { + user(login: $channel) { + displayName + videos(types: $types, sort: TIME) { + edges { + node { + id + title + publishedAt + lengthSeconds + viewCount + thumbnailURLs(width: 640, height: 360) + previewThumbnailURL(width: 640, height: 360) + description + tags + contentTags { + isLanguageTag + localizedName + } + game { + displayName + } + moments(momentRequestType: VIDEO_CHAPTER_MARKERS) { + edges { + node { + description + positionMilliseconds + } + } + } + } + } + } + } +} +EOD; + $variables = array( + 'channel' => $this->getInput('channel'), + 'types' => self::BROADCAST_TYPES[$this->getInput('type')] ); - $videos = $this->apiGet($query_endpoint, $query_data)->videos; + $data = $this->apiRequest($query, $variables); + + $user = $data->user; + foreach($user->videos->edges as $edge) { + $video = $edge->node; + + $url = 'https://www.twitch.tv/videos/' . $video->id; - foreach($videos as $video) { $item = array( - 'uri' => $video->url, + 'uri' => $url, 'title' => $video->title, - 'timestamp' => $video->published_at, - 'author' => $video->channel->display_name, + 'timestamp' => $video->publishedAt, + 'author' => $user->displayName, ); // Add categories for tags and played game - $item['categories'] = array_filter(explode(' ', $video->tag_list)); - if(!empty($video->game)) - $item['categories'][] = $video->game; + $item['categories'] = $video->tags; + if(!is_null($video->game)) + $item['categories'][] = $video->game->displayName; + foreach($video->contentTags as $tag) + if(!$tag->isLanguageTag) + $item['categories'][] = $tag->localizedName; // Add enclosures for thumbnails from a few points in the video - $item['enclosures'] = array(); - foreach($video->thumbnails->large as $thumbnail) - $item['enclosures'][] = $thumbnail->url; + // Thumbnail list has duplicate entries sometimes so remove those + $item['enclosures'] = array_unique($video->thumbnailURLs); /* * Content format example: @@ -86,44 +135,45 @@ class TwitchBridge extends BridgeAbstract { * */ $item['content'] = '<p><a href="' - . $video->url + . $url . '"><img src="' - . $video->preview->large + . $video->previewThumbnailURL . '" /></a></p><p>' - . $video->description_html + . $video->description // in markdown format . '</p><p><b>Duration:</b> ' - . $this->formatTimestampTime($video->length) + . $this->formatTimestampTime($video->lengthSeconds) . '<br/><b>Views:</b> ' - . $video->views + . $video->viewCount . '</p>'; // Add played games list to content - $video_id = trim($video->_id, 'v'); // _id gives 'v1234' but API wants '1234' - $markers = $this->apiGet('videos/' . $video_id . '/markers')->markers; - $item['content'] .= '<p><b>Played games:</b></b><ul><li><a href="' - . $video->url - . '">00:00:00</a> - ' - . $video->game - . '</li>'; - if(isset($markers->game_changes)) { - usort($markers->game_changes, function($a, $b) { - return $a->time - $b->time; - }); - foreach($markers->game_changes as $game_change) { - $item['categories'][] = $game_change->label; + $item['content'] .= '<p><b>Played games:</b><ul>'; + if(count($video->moments->edges) > 0) { + foreach($video->moments->edges as $edge) { + $moment = $edge->node; + + $item['categories'][] = $moment->description; $item['content'] .= '<li><a href="' - . $video->url + . $url . '?t=' - . $this->formatQueryTime($game_change->time) + . $this->formatQueryTime($moment->positionMilliseconds / 1000) . '">' - . $this->formatTimestampTime($game_change->time) + . $this->formatTimestampTime($moment->positionMilliseconds / 1000) . '</a> - ' - . $game_change->label + . $moment->description . '</li>'; } + } else { + $item['content'] .= '<li><a href="' + . $url + . '">00:00:00</a> - ' + . ($video->game ? $video->game->displayName : 'No Game') + . '</li>'; } $item['content'] .= '</ul></p>'; + $item['categories'] = array_unique($item['categories']); + $this->items[] = $item; } } @@ -144,25 +194,37 @@ class TwitchBridge extends BridgeAbstract { $seconds % 60); } - /* - * Ideally the new 'helix' API should be used as v5/'kraken' is deprecated. - * The new API however still misses many features (markers, played game..) of - * the old one, so let's use the old one for as long as it's available. - */ - private function apiGet($endpoint, $query_data = array()) { - $query_data['api_version'] = 5; - $url = 'https://api.twitch.tv/kraken/' - . $endpoint - . '?' - . http_build_query($query_data); + // GraphQL: https://graphql.org/ + // Tool for developing/testing queries: https://github.com/skevy/graphiql-app + private function apiRequest($query, $variables) { + $request = array( + 'query' => $query, + 'variables' => $variables + ); $header = array( 'Client-ID: ' . self::CLIENT_ID ); + $opts = array( + CURLOPT_CUSTOMREQUEST => 'POST', + CURLOPT_POSTFIELDS => json_encode($request) + ); - $data = json_decode(getContents($url, $header)) - or returnServerError('API request to "' . $url . '" failed.'); + Debug::log("Sending GraphQL query:\n" . $query); + Debug::log("Sending GraphQL variables:\n" + . json_encode($variables, JSON_PRETTY_PRINT)); + + $response = json_decode(getContents(self::API_ENDPOINT, $header, $opts)) + or returnServerError('API request to "' . self::API_ENDPOINT . '" failed.'); + + Debug::log("Got GraphQL response:\n" + . json_encode($response, JSON_PRETTY_PRINT)); + + if(isset($response->errors)) { + $messages = array_column($response->errors, 'message'); + returnServerError('API error(s): ' . implode("\n", $messages)); + } - return $data; + return $response->data; } public function getName(){ diff --git a/bridges/TwitterBridge.php b/bridges/TwitterBridge.php index 0d8b024..0bc2f67 100644 --- a/bridges/TwitterBridge.php +++ b/bridges/TwitterBridge.php @@ -2,6 +2,9 @@ class TwitterBridge extends BridgeAbstract { const NAME = 'Twitter Bridge'; const URI = 'https://twitter.com/'; + const API_URI = 'https://api.twitter.com'; + const GUEST_TOKEN_USES = 100; + const GUEST_TOKEN_EXPIRY = 300; // 5min const CACHE_TIMEOUT = 300; // 5min const DESCRIPTION = 'returns tweets'; const MAINTAINER = 'pmaziere'; @@ -92,6 +95,20 @@ EOD 'required' => false, 'title' => 'Specify term to search for' ) + ), + 'By list ID' => array( + 'listid' => array( + 'name' => 'List ID', + 'exampleValue' => '31748', + 'required' => true, + 'title' => 'Insert the list id' + ), + 'filter' => array( + 'name' => 'Filter', + 'exampleValue' => '#rss-bridge', + 'required' => false, + 'title' => 'Specify term to search for' + ) ) ); @@ -142,6 +159,8 @@ EOD break; case 'By list': return $this->getInput('list') . ' - Twitter list by ' . $this->getInput('user'); + case 'By list ID': + return 'Twitter List #' . $this->getInput('listid'); default: return parent::getName(); } return 'Twitter ' . $specific . $this->getInput($param); @@ -164,26 +183,46 @@ EOD . urlencode($this->getInput('user')) . '/lists/' . str_replace(' ', '-', strtolower($this->getInput('list'))); + case 'By list ID': + return self::URI + . 'i/lists/' + . urlencode($this->getInput('listid')); default: return parent::getURI(); } } + private function getApiURI() { + switch($this->queriedContext) { + case 'By keyword or hashtag': + return self::API_URI + . '/2/search/adaptive.json?q=' + . urlencode($this->getInput('q')) + . '&tweet_mode=extended&tweet_search_mode=live'; + case 'By username': + return self::API_URI + . '/2/timeline/profile/' + . $this->getRestId($this->getInput('u')) + . '.json?tweet_mode=extended'; + case 'By list': + return self::API_URI + . '/2/timeline/list.json?list_id=' + . $this->getListId($this->getInput('user'), $this->getInput('list')) + . '&tweet_mode=extended'; + case 'By list ID': + return self::API_URI + . '/2/timeline/list.json?list_id=' + . $this->getInput('listid') + . '&tweet_mode=extended'; + default: returnServerError('Invalid query context !'); + } + } + public function collectData(){ $html = ''; $page = $this->getURI(); + $data = json_decode($this->getApiContents($this->getApiURI())); - $header = array( - 'User-Agent: Mozilla/5.0 (Windows NT 9.0; WOW64; Trident/7.0; rv:11.0) like Gecko' - ); - - if(php_sapi_name() === 'cli' && empty(ini_get('curl.cainfo'))) { - $cookies = $this->getCookies($page); - $html = getSimpleHTMLDOM($page, array_merge($header, array("Cookie: $cookies"))); - } else { - $html = getSimpleHTMLDOM($page, $header, array(CURLOPT_COOKIEFILE => '')); - } - - if(!$html) { + if(!$data) { switch($this->queriedContext) { case 'By keyword or hashtag': returnServerError('No results for this query.'); @@ -196,75 +235,80 @@ EOD $hidePictures = $this->getInput('nopic'); - foreach($html->find('div.js-stream-tweet') as $tweet) { - - // Skip retweets? - if($this->getInput('noretweet') - && $tweet->find('div.context span.js-retweet-text a', 0)) { - continue; + $promotedTweetIds = array_reduce($data->timeline->instructions[0]->addEntries->entries, function($carry, $entry) { + if (!isset($entry->content->item)) { + return $carry; } + $tweet = $entry->content->item->content->tweet; + if (isset($tweet->promotedMetadata)) { + $carry[] = $tweet->id; + } + return $carry; + }, array()); + + foreach($data->globalObjects->tweets as $tweet) { - // remove 'invisible' content - foreach($tweet->find('.invisible') as $invisible) { - $invisible->outertext = ''; + /* Debug::log('>>> ' . json_encode($tweet)); */ + // Skip spurious retweets + if (isset($tweet->retweeted_status_id_str) && substr($tweet->full_text, 0, 4) === 'RT @') { + continue; } - // Skip protmoted tweets - $heading = $tweet->previousSibling(); - if(!is_null($heading) && - $heading->getAttribute('class') === 'promoted-tweet-heading' - ) { + // Skip promoted tweets + if (in_array($tweet->id_str, $promotedTweetIds)) { continue; } $item = array(); // extract username and sanitize - $item['username'] = htmlspecialchars_decode($tweet->getAttribute('data-screen-name'), ENT_QUOTES); - // extract fullname (pseudonym) - $item['fullname'] = htmlspecialchars_decode($tweet->getAttribute('data-name'), ENT_QUOTES); - // get author + $user_info = $this->getUserInformation($tweet->user_id_str, $data->globalObjects); + + $item['username'] = $user_info->screen_name; + $item['fullname'] = $user_info->name; $item['author'] = $item['fullname'] . ' (@' . $item['username'] . ')'; - if($rt = $tweet->find('div.context span.js-retweet-text a', 0)) { - $item['author'] .= ' RT: @' . $rt->plaintext; + if (null !== $this->getInput('u') && $item['username'] != $this->getInput('u')) { + $item['author'] .= ' RT: @' . $this->getInput('u'); } - // get avatar link - $item['avatar'] = $tweet->find('img', 0)->src; - // get TweetID - $item['id'] = $tweet->getAttribute('data-tweet-id'); - // get tweet link - $item['uri'] = self::URI . substr($tweet->find('a.js-permalink', 0)->getAttribute('href'), 1); - // extract tweet timestamp - $item['timestamp'] = $tweet->find('span.js-short-timestamp', 0)->getAttribute('data-time'); - // generate the title - $item['title'] = strip_tags($this->fixAnchorSpacing(htmlspecialchars_decode( - $tweet->find('p.js-tweet-text', 0), ENT_QUOTES), '<a>')); + $item['avatar'] = $user_info->profile_image_url_https; - switch($this->queriedContext) { - case 'By list': - // Check if filter applies to list (using raw content) - if($this->getInput('filter')) { - if(stripos($tweet->find('p.js-tweet-text', 0)->plaintext, $this->getInput('filter')) === false) { - continue 2; // switch + for-loop! - } - } - break; - default: + $item['id'] = $tweet->id_str; + $item['uri'] = self::URI . $item['username'] . '/status/' . $item['id']; + // extract tweet timestamp + $item['timestamp'] = $tweet->created_at; + + // Convert plain text URLs into HTML hyperlinks + $cleanedTweet = $tweet->full_text; + $foundUrls = false; + + if (isset($tweet->entities->media)) { + foreach($tweet->entities->media as $media) { + $cleanedTweet = str_replace($media->url, + '<a href="' . $media->expanded_url . '">' . $media->display_url . '</a>', + $cleanedTweet); + $foundUrls = true; + } } + if (isset($tweet->entities->urls)) { + foreach($tweet->entities->urls as $url) { + $cleanedTweet = str_replace($url->url, + '<a href="' . $url->expanded_url . '">' . $url->display_url . '</a>', + $cleanedTweet); + $foundUrls = true; + } + } + if ($foundUrls === false) { + // fallback to regex'es + $reg_ex = '/(http|https|ftp|ftps)\:\/\/[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(\/\S*)?/'; + if(preg_match($reg_ex, $tweet->full_text, $url)) { + $cleanedTweet = preg_replace($reg_ex, + "<a href='{$url[0]}' target='_blank'>{$url[0]}</a> ", + $cleanedTweet); + } + } + // generate the title + $item['title'] = strip_tags($cleanedTweet); - $this->processContentLinks($tweet); - $this->processEmojis($tweet); - - // get tweet text - $cleanedTweet = str_replace( - 'href="/', - 'href="' . self::URI, - $tweet->find('p.js-tweet-text', 0)->innertext - ); - - // fix anchors missing spaces in-between - $cleanedTweet = $this->fixAnchorSpacing($cleanedTweet); - - // Add picture to content + // Add avatar $picture_html = ''; if(!$hidePictures) { $picture_html = <<<EOD @@ -278,31 +322,79 @@ EOD EOD; } - // Add embeded image to content - $image_html = ''; - $images = $this->getImageURI($tweet); - if(!$this->getInput('noimg') && !is_null($images)) { - - foreach ($images as $image) { - - // Set image scaling - $image_orig = $this->getInput('noimgscaling') ? $image : $image . ':orig'; - $image_thumb = $this->getInput('noimgscaling') ? $image : $image . ':thumb'; - - // add enclosures - $item['enclosures'][] = $image_orig; + // Get images + $media_html = ''; + if(isset($tweet->extended_entities->media) && !$this->getInput('noimg')) { + foreach($tweet->extended_entities->media as $media) { + switch($media->type) { + case 'photo': + $image = $media->media_url_https . '?name=orig'; + $display_image = $media->media_url_https; + // add enclosures + $item['enclosures'][] = $image; - $image_html .= <<<EOD -<a href="{$image_orig}"> + $media_html .= <<<EOD +<a href="{$image}"> <img style="align:top; max-width:558px; border:1px solid black;" - src="{$image_thumb}" /> + referrerpolicy="no-referrer" + src="{$display_image}" /> </a> EOD; + break; + case 'video': + case 'animated_gif': + if(isset($media->video_info)) { + $link = $media->expanded_url; + $poster = $media->media_url_https; + $video = null; + $maxBitrate = -1; + foreach($media->video_info->variants as $variant) { + $bitRate = isset($variant->bitrate) ? $variant->bitrate : -100; + if ($bitRate > $maxBitrate) { + $maxBitrate = $bitRate; + $video = $variant->url; + } + } + if(!is_null($video)) { + // add enclosures + $item['enclosures'][] = $video; + $item['enclosures'][] = $poster; + + $media_html .= <<<EOD +<a href="{$link}">Video</a> +<video + style="align:top; max-width:558px; border:1px solid black;" + referrerpolicy="no-referrer" + src="{$video}" poster="{$poster}" /> +EOD; + } + } + break; + default: + Debug::log('Missing support for media type: ' . $media->type); + } } } - // add content + switch($this->queriedContext) { + case 'By list': + case 'By list ID': + // Check if filter applies to list (using raw content) + if($this->getInput('filter')) { + if(stripos($cleanedTweet, $this->getInput('filter')) === false) { + continue 2; // switch + for-loop! + } + } + break; + case 'By username': + if ($this->getInput('noretweet') && $item['username'] != $this->getInput('u')) { + continue 2; // switch + for-loop! + } + break; + default: + } + $item['content'] = <<<EOD <div style="display: inline-block; vertical-align: top;"> {$picture_html} @@ -311,155 +403,149 @@ EOD; <blockquote>{$cleanedTweet}</blockquote> </div> <div style="display: block; vertical-align: top;"> - <blockquote>{$image_html}</blockquote> + <blockquote>{$media_html}</blockquote> </div> EOD; - // add quoted tweet - $quotedTweet = $tweet->find('div.QuoteTweet', 0); - if($quotedTweet) { - // get tweet text - $cleanedQuotedTweet = str_replace( - 'href="/', - 'href="' . self::URI, - $quotedTweet->find('div.tweet-text', 0)->innertext - ); - - $this->processContentLinks($quotedTweet); - $this->processEmojis($quotedTweet); - - // Add embeded image to content - $quotedImage_html = ''; - $quotedImages = $this->getQuotedImageURI($tweet); - - if(!$this->getInput('noimg') && !is_null($quotedImages)) { - - foreach ($quotedImages as $image) { - - // Set image scaling - $image_orig = $this->getInput('noimgscaling') ? $image : $image . ':orig'; - $image_thumb = $this->getInput('noimgscaling') ? $image : $image . ':thumb'; - - // add enclosures - $item['enclosures'][] = $image_orig; - - $quotedImage_html .= <<<EOD -<a href="{$image_orig}"> -<img - style="align:top; max-width:558px; border:1px solid black;" - src="{$image_thumb}" /> -</a> -EOD; - } - } - - $item['content'] = <<<EOD -{$item['content']} -<hr> -<div style="display: inline-block; vertical-align: top;"> - <blockquote>{$cleanedQuotedTweet}</blockquote> -</div> -<div style="display: block; vertical-align: top;"> - <blockquote>{$quotedImage_html}</blockquote> -</div> -EOD; - } $item['content'] = htmlspecialchars_decode($item['content'], ENT_QUOTES); // put out $this->items[] = $item; } - } - - private function processEmojis($tweet){ - // process emojis (reduce size) - foreach($tweet->find('img.Emoji') as $img) { - $img->style .= ' height: 1em;'; - } - } - private function processContentLinks($tweet){ - // processing content links - foreach($tweet->find('a') as $link) { - if($link->hasAttribute('data-expanded-url')) { - $link->href = $link->getAttribute('data-expanded-url'); - } - $link->removeAttribute('data-expanded-url'); - $link->removeAttribute('data-query-source'); - $link->removeAttribute('rel'); - $link->removeAttribute('class'); - $link->removeAttribute('target'); - $link->removeAttribute('title'); - } + usort($this->items, array('TwitterBridge', 'compareTweetId')); } - private function fixAnchorSpacing($content){ - // fix anchors missing spaces in-between - return str_replace( - '<a', - ' <a', - $content - ); + private static function compareTweetId($tweet1, $tweet2) { + return (intval($tweet1['id']) < intval($tweet2['id']) ? 1 : -1); } - private function getImageURI($tweet){ - // Find media in tweet - $images = array(); - - $container = $tweet->find('div.AdaptiveMedia-container', 0); + //The aim of this function is to get an API key and a guest token + //This function takes 2 requests, and therefore is cached + private function getApiKey() { + + $cacheFac = new CacheFactory(); + $cacheFac->setWorkingDir(PATH_LIB_CACHES); + $r_cache = $cacheFac->create(Configuration::getConfig('cache', 'type')); + $r_cache->setScope(get_called_class()); + $r_cache->setKey(array('refresh')); + $data = $r_cache->loadData(); + + $refresh = null; + if($data === null) { + $refresh = time(); + $r_cache->saveData($refresh); + } else { + $refresh = $data; + } - if($container && $container->find('img', 0)) { - foreach ($container->find('img') as $img) { - $images[] = $img->src; + $cacheFac = new CacheFactory(); + $cacheFac->setWorkingDir(PATH_LIB_CACHES); + $cache = $cacheFac->create(Configuration::getConfig('cache', 'type')); + $cache->setScope(get_called_class()); + $cache->setKey(array('api_key')); + $data = $cache->loadData(); + + $apiKey = null; + if($data === null || (time() - $refresh) > self::GUEST_TOKEN_EXPIRY) { + $twitterPage = getContents('https://twitter.com'); + + $jsLink = false; + $jsMainRegexArray = array( + '/(https:\/\/abs\.twimg\.com\/responsive-web\/web\/main\.[^\.]+\.js)/m', + '/(https:\/\/abs\.twimg\.com\/responsive-web\/web_legacy\/main\.[^\.]+\.js)/m', + '/(https:\/\/abs\.twimg\.com\/responsive-web\/client-web\/main\.[^\.]+\.js)/m', + '/(https:\/\/abs\.twimg\.com\/responsive-web\/client-web-legacy\/main\.[^\.]+\.js)/m', + ); + foreach ($jsMainRegexArray as $jsMainRegex) { + if (preg_match_all($jsMainRegex, $twitterPage, $jsMainMatches, PREG_SET_ORDER, 0)) { + $jsLink = $jsMainMatches[0][0]; + break; + } + } + if (!$jsLink) { + returnServerError('Could not locate main.js link'); } - } - if (!empty($images)) { - return $images; + $jsContent = getContents($jsLink); + $apiKeyRegex = '/([a-zA-Z0-9]{59}%[a-zA-Z0-9]{44})/m'; + preg_match_all($apiKeyRegex, $jsContent, $apiKeyMatches, PREG_SET_ORDER, 0); + $apiKey = $apiKeyMatches[0][0]; + $cache->saveData($apiKey); + } else { + $apiKey = $data; } - return null; - } + $cacheFac2 = new CacheFactory(); + $cacheFac2->setWorkingDir(PATH_LIB_CACHES); + $gt_cache = $cacheFac->create(Configuration::getConfig('cache', 'type')); + $gt_cache->setScope(get_called_class()); + $gt_cache->setKey(array('guest_token')); + $guestTokenUses = $gt_cache->loadData(); + + $guestToken = null; + if($guestTokenUses === null || !is_array($guestTokenUses) || count($guestTokenUses) != 2 + || $guestTokenUses[0] <= 0 || (time() - $refresh) > self::GUEST_TOKEN_EXPIRY) { + $guestToken = $this->getGuestToken(); + $gt_cache->saveData(array(self::GUEST_TOKEN_USES, $guestToken)); + $r_cache->saveData(time()); + } else { + $guestTokenUses[0] -= 1; + $gt_cache->saveData($guestTokenUses); + $guestToken = $guestTokenUses[1]; + } - private function getQuotedImageURI($tweet){ - // Find media in tweet - $images = array(); + return array($apiKey, $guestToken); - $container = $tweet->find('div.QuoteMedia-container', 0); + } - if($container && $container->find('img', 0)) { - foreach ($container->find('img') as $img) { - $images[] = $img->src; - } - } + // Get a guest token. This is different to an API key, + // and it seems to change more regularly than the API key. + private function getGuestToken() { + $pageContent = getContents('https://twitter.com', array(), array(), true); + + $guestTokenRegex = '/gt=([0-9]*)/m'; + preg_match_all($guestTokenRegex, $pageContent['header'], $guestTokenMatches, PREG_SET_ORDER, 0); + if (!$guestTokenMatches) + preg_match_all($guestTokenRegex, $pageContent['content'], $guestTokenMatches, PREG_SET_ORDER, 0); + if (!$guestTokenMatches) returnServerError('Could not parse guest token'); + $guestToken = $guestTokenMatches[0][1]; + return $guestToken; + } - if (!empty($images)) { - return $images; - } + private function getApiContents($uri) { + $apiKeys = $this->getApiKey(); + $headers = array('authorization: Bearer ' . $apiKeys[0], + 'x-guest-token: ' . $apiKeys[1], + ); + return getContents($uri, $headers); + } - return null; + private function getRestId($username) { + $searchparams = urlencode('{"screen_name":"' . strtolower($username) . '", "withHighlightedLabel":true}'); + $searchURL = self::API_URI . '/graphql/-xfUfZsnR_zqjFd-IfrN5A/UserByScreenName?variables=' . $searchparams; + $searchResult = $this->getApiContents($searchURL); + $searchResult = json_decode($searchResult); + return $searchResult->data->user->rest_id; } - private function getCookies($pageURL){ + private function getListId($username, $listName) { + $searchparams = urlencode('{"screenName":"' + . strtolower($username) + . '", "listSlug": "' + . $listName + . '", "withHighlightedLabel":false}'); + $searchURL = self::API_URI . '/graphql/ErWsz9cObLel1BF-HjuBlA/ListBySlug?variables=' . $searchparams; + $searchResult = $this->getApiContents($searchURL); + $searchResult = json_decode($searchResult); + return $searchResult->data->user_by_screen_name->list->id_str; + } - $ctx = stream_context_create(array( - 'http' => array( - 'follow_location' => false - ) - ) - ); - $a = file_get_contents($pageURL, 0, $ctx); - - //First request to get the cookie - $cookies = ''; - foreach($http_response_header as $hdr) { - if(stripos($hdr, 'Set-Cookie') !== false) { - $cLine = explode(':', $hdr)[1]; - $cLine = explode(';', $cLine)[0]; - $cookies .= ';' . $cLine; + private function getUserInformation($userId, $apiData) { + foreach($apiData->users as $user) { + if($user->id_str == $userId) { + return $user; } } - - return substr($cookies, 2); } } diff --git a/bridges/UnraidCommunityApplicationsBridge.php b/bridges/UnraidCommunityApplicationsBridge.php new file mode 100644 index 0000000..1ab06e3 --- /dev/null +++ b/bridges/UnraidCommunityApplicationsBridge.php @@ -0,0 +1,71 @@ +<?php +class UnraidCommunityApplicationsBridge extends BridgeAbstract { + const NAME = 'Unraid Community Applications'; + const URI = 'https://forums.unraid.net/topic/38582-plug-in-community-applications/'; + const DESCRIPTION = 'Fetches the latest fifteen new apps/plugins from Unraid Community Applications'; + const MAINTAINER = 'Paroleen'; + const CACHE_TIMEOUT = 3600; + + const APPSURI = 'https://raw.githubusercontent.com/Squidly271/AppFeed/master/applicationFeed.json'; + + private $apps = array(); + + private function fetchApps() { + Debug::log('Fetching all applications/plugins'); + $this->apps = getContents(self::APPSURI) + or returnServerError('Could not fetch JSON for apps.'); + $this->apps = json_decode($this->apps, true)['applist']; + } + + private function sortApps() { + Debug::log('Sorting applications/plugins'); + usort($this->apps, function($app1, $app2) { + return $app1['FirstSeen'] < $app2['FirstSeen'] ? 1 : -1; + }); + } + + public function collectData() { + $this->fetchApps(); + $this->sortApps(); + + Debug::log('Building RSS feed'); + foreach($this->apps as $app) { + if(!array_key_exists('Language', $app)) { + $item = array(); + $item['title'] = $app['Name']; + $item['timestamp'] = $app['FirstSeen']; + $item['author'] = explode('\'', $app['Repo'])[0]; + $item['categories'] = explode(' ', $app['Category']); + $item['content'] = ''; + + if(array_key_exists('Icon', $app)) + $item['content'] .= '<img style="width: 64px" src="' + . $app['Icon'] + . '">'; + + if(array_key_exists('Overview', $app)) + $item['content'] .= '<p>' + . $app['Overview'] + . '</p>'; + + if(array_key_exists('Project', $app)) + $item['uri'] = $app['Project']; + + if(array_key_exists('Registry', $app)) + $item['content'] .= '<br><a href="' + . $app['Registry'] + . '">Docker Hub</a>'; + + if(array_key_exists('Support', $app)) + $item['content'] .= '<br><a href="' + . $app['Support'] + . '">Support</a>'; + + $this->items[] = $item; + + if(count($this->items) >= 15) + break; + } + } + } +} diff --git a/bridges/VarietyBridge.php b/bridges/VarietyBridge.php index a2e6170..8bc48f4 100644 --- a/bridges/VarietyBridge.php +++ b/bridges/VarietyBridge.php @@ -8,7 +8,7 @@ class VarietyBridge extends FeedExpander { const DESCRIPTION = 'RSS feed for Variety'; public function collectData(){ - $this->collectExpandableDatas('http://feeds.feedburner.com/variety/headlines', 15); + $this->collectExpandableDatas('https://feeds.feedburner.com/variety/headlines', 15); } protected function parseItem($newsItem){ diff --git a/bridges/VkBridge.php b/bridges/VkBridge.php index ea81a2b..87eaef2 100644 --- a/bridges/VkBridge.php +++ b/bridges/VkBridge.php @@ -355,7 +355,7 @@ class VkBridge extends BridgeAbstract private function getTitle($content) { - preg_match('/^["\w\ \p{Cyrillic}\(\)\?#«»-]+/mu', htmlspecialchars_decode($content), $result); + preg_match('/^["\w\ \p{L}\(\)\?#«»-]+/mu', htmlspecialchars_decode($content), $result); if (count($result) == 0) return 'untitled'; return $result[0]; } @@ -423,11 +423,11 @@ class VkBridge extends BridgeAbstract 'count' => 200 )); - if (isset($result['error'])) return; - - foreach($result['response']['items'] as $item) { - $video_id = strval($item['owner_id']) . '_' . strval($item['id']); - $this->videos[$video_id]['url'] = $item['player']; + if (!isset($result['error'])) { + foreach($result['response']['items'] as $item) { + $video_id = strval($item['owner_id']) . '_' . strval($item['id']); + $this->videos[$video_id]['url'] = $item['player']; + } } foreach($this->items as &$item) { diff --git a/bridges/WordPressBridge.php b/bridges/WordPressBridge.php index 1589c72..548e46e 100644 --- a/bridges/WordPressBridge.php +++ b/bridges/WordPressBridge.php @@ -92,9 +92,9 @@ class WordPressBridge extends FeedExpander { returnClientError('The url parameter must either refer to http or https protocol.'); } try{ - $this->collectExpandableDatas($this->getURI() . '/feed/atom/'); + $this->collectExpandableDatas($this->getURI() . '/feed/atom/', 20); } catch (Exception $e) { - $this->collectExpandableDatas($this->getURI() . '/?feed=atom'); + $this->collectExpandableDatas($this->getURI() . '/?feed=atom', 20); } } diff --git a/bridges/WorldCosplayBridge.php b/bridges/WorldCosplayBridge.php new file mode 100644 index 0000000..21776ff --- /dev/null +++ b/bridges/WorldCosplayBridge.php @@ -0,0 +1,141 @@ +<?php +class WorldCosplayBridge extends BridgeAbstract { + const NAME = 'WorldCosplay Bridge'; + const URI = 'https://worldcosplay.net/'; + const DESCRIPTION = 'Returns WorldCosplay photos'; + const MAINTAINER = 'AxorPL'; + + const API_CHARACTER = 'api/photo/list.json?character_id=%u&limit=%u'; + const API_COSPLAYER = 'api/member/photos.json?member_id=%u&limit=%u'; + const API_SERIES = 'api/photo/list.json?title_id=%u&limit=%u'; + const API_TAG = 'api/tag/photo_list.json?id=%u&limit=%u'; + + const CONTENT_HTML + = '<a href="%s" target="_blank"><img src="%s" alt="%s" title="%s"></a>'; + + const ERR_CONTEXT = 'No context provided'; + const ERR_QUERY = 'Unable to query: %s'; + + const LIMIT_MIN = 1; + const LIMIT_MAX = 24; + + const PARAMETERS = array( + 'Character' => array( + 'cid' => array( + 'name' => 'Character ID', + 'type' => 'number', + 'required' => true, + 'title' => 'WorldCosplay character ID', + 'exampleValue' => 18204 + ) + ), + 'Cosplayer' => array( + 'uid' => array( + 'name' => 'Cosplayer ID', + 'type' => 'number', + 'required' => true, + 'title' => 'Cosplayer\'s WorldCosplay profile ID', + 'exampleValue' => 406782 + ) + ), + 'Series' => array( + 'sid' => array( + 'name' => 'Series ID', + 'type' => 'number', + 'required' => true, + 'title' => 'WorldCosplay series ID', + 'exampleValue' => 3139 + ) + ), + 'Tag' => array( + 'tid' => array( + 'name' => 'Tag ID', + 'type' => 'number', + 'required' => true, + 'title' => 'WorldCosplay tag ID', + 'exampleValue' => 33643 + ) + ), + 'global' => array( + 'limit' => array( + 'name' => 'Limit', + 'type' => 'number', + 'required' => false, + 'title' => 'Maximum number of photos to return', + 'exampleValue' => 5, + 'defaultValue' => 5 + ) + ) + ); + + public function collectData() { + $limit = $this->getInput('limit'); + $limit = min(self::LIMIT_MAX, max(self::LIMIT_MIN, $limit)); + switch($this->queriedContext) { + case 'Character': + $id = $this->getInput('cid'); + $url = self::API_CHARACTER; + break; + case 'Cosplayer': + $id = $this->getInput('uid'); + $url = self::API_COSPLAYER; + break; + case 'Series': + $id = $this->getInput('sid'); + $url = self::API_SERIES; + break; + case 'Tag': + $id = $this->getInput('tid'); + $url = self::API_TAG; + break; + default: + returnClientError(self::ERR_CONTEXT); + } + $url = self::URI . sprintf($url, $id, $limit); + + $json = json_decode(getContents($url)) + or returnServerError(sprintf(self::ERR_QUERY, $url)); + if($json->has_error) { + returnServerError($json->message); + } + $list = $json->list; + + foreach($list as $img) { + $item = array(); + $item['uri'] = self::URI . substr($img->photo->url, 1); + $item['title'] = $img->photo->subject; + $item['timestamp'] = $img->photo->created_at; + $item['author'] = $img->member->global_name; + $item['enclosures'] = array($img->photo->large_url); + $item['uid'] = $img->photo->id; + $item['content'] = sprintf( + self::CONTENT_HTML, + $item['uri'], + $item['enclosures'][0], + $item['title'], + $item['title'] + ); + $this->items[] = $item; + } + } + + public function getName() { + switch($this->queriedContext) { + case 'Character': + $id = $this->getInput('cid'); + break; + case 'Cosplayer': + $id = $this->getInput('uid'); + break; + case 'Series': + $id = $this->getInput('sid'); + break; + case 'Tag': + $id = $this->getInput('tid'); + break; + default: + return parent::getName(); + } + return sprintf('%s %u - ', $this->queriedContext, $id) . self::NAME; + } +} diff --git a/bridges/WosckerBridge.php b/bridges/WosckerBridge.php new file mode 100644 index 0000000..7f34853 --- /dev/null +++ b/bridges/WosckerBridge.php @@ -0,0 +1,51 @@ +<?php +class WosckerBridge extends BridgeAbstract { + const NAME = 'Woscker Bridge'; + const URI = 'https://woscker.com/'; + const DESCRIPTION = 'Returns news of the day'; + const MAINTAINER = 'VerifiedJoseph'; + const PARAMETERS = array(); + + const CACHE_TIMEOUT = 1800; // 30 mins + + public function collectData() { + $html = getSimpleHTMLDOM($this->getURI()) + or returnServerError('Could not request: ' . $this->getURI()); + + $date = $html->find('h1', 0)->plaintext; + $timestamp = $html->find('span.dateFont', 0)->plaintext . ' ' . $html->find('span.dateFont', 1)->plaintext; + + $item = array(); + $item['title'] = $date; + $item['content'] = $this->formatContent($html); + $item['timestamp'] = $timestamp; + + $this->items[] = $item; + } + + private function formatContent($html) { + $html->find('h1', 0)->outertext = ''; + + foreach ($html->find('hr') as $hr) { + $hr->outertext = ''; + } + + foreach ($html->find('div.betweenHeadline') as $div) { + $div->outertext = ''; + } + + foreach ($html->find('div.dividingBarrier') as $div) { + $div->outertext = ''; + } + + foreach ($html->find('h2') as $h2) { + $h2->outertext = '<br><strong>' . $h2->innertext . '</strong><br>'; + } + + foreach ($html->find('h3') as $h3) { + $h3->outertext = $h3->innertext . '<br>'; + } + + return $html->find('div.fullContentPiece', 0)->innertext; + } +} diff --git a/bridges/XPathBridge.php b/bridges/XPathBridge.php new file mode 100644 index 0000000..5aa280e --- /dev/null +++ b/bridges/XPathBridge.php @@ -0,0 +1,251 @@ +<?php + +class XPathBridge extends XPathAbstract { + const NAME = 'XPathBridge'; + const URI = 'https://github.com/rss-bridge/rss-bridge'; + const DESCRIPTION + = 'Parse any webpage using <a href="https://devhints.io/xpath" target="_blank">XPath expressions</a>'; + const MAINTAINER = 'Niehztog'; + const PARAMETERS = array( + '' => array( + + 'url' => array( + 'name' => 'Enter web page URL', + 'title' => <<<"EOL" +You can specify any website URL which serves data suited for display in RSS feeds +(for example a news blog). +EOL + , 'type' => 'text', + 'exampleValue' => 'https://news.blizzard.com/en-en', + 'defaultValue' => 'https://news.blizzard.com/en-en', + 'required' => true + ), + + 'item' => array( + 'name' => 'Item selector', + 'title' => <<<"EOL" +Enter an XPath expression matching a list of dom nodes, each node containing one +feed article item in total (usually a surrounding <div> or <span> tag). This will +be the context nodes for all of the following expressions. This expression usually +starts with a single forward slash. +EOL + , 'type' => 'text', + 'exampleValue' => '/html/body/div/div[4]/div[2]/div[2]/div/div/section/ol/li/article', + 'defaultValue' => '/html/body/div/div[4]/div[2]/div[2]/div/div/section/ol/li/article', + 'required' => true + ), + + 'title' => array( + 'name' => 'Item title selector', + 'title' => <<<"EOL" +This expression should match a node contained within each article item node +containing the article headline. It should start with a dot followed by two +forward slashes, referring to any descendant nodes of the article item node. +EOL + , 'type' => 'text', + 'exampleValue' => './/div/div[2]/h2', + 'defaultValue' => './/div/div[2]/h2', + 'required' => true + ), + + 'content' => array( + 'name' => 'Item description selector', + 'title' => <<<"EOL" +This expression should match a node contained within each article item node +containing the article content or description. It should start with a dot +followed by two forward slashes, referring to any descendant nodes of the +article item node. +EOL + , 'type' => 'text', + 'exampleValue' => './/div[@class="ArticleListItem-description"]/div[@class="h6"]', + 'defaultValue' => './/div[@class="ArticleListItem-description"]/div[@class="h6"]', + 'required' => false + ), + + 'uri' => array( + 'name' => 'Item URL selector', + 'title' => <<<"EOL" +This expression should match a node's attribute containing the article URL +(usually the href attribute of an <a> tag). It should start with a dot +followed by two forward slashes, referring to any descendant nodes of +the article item node. Attributes can be selected by prepending an @ char +before the attributes name. +EOL + , 'type' => 'text', + 'exampleValue' => './/a[@class="ArticleLink ArticleLink"]/@href', + 'defaultValue' => './/a[@class="ArticleLink ArticleLink"]/@href', + 'required' => false + ), + + 'author' => array( + 'name' => 'Item author selector', + 'title' => <<<"EOL" +This expression should match a node contained within each article item +node containing the article author's name. It should start with a dot +followed by two forward slashes, referring to any descendant nodes of +the article item node. +EOL + , 'type' => 'text', + 'required' => false + ), + + 'timestamp' => array( + 'name' => 'Item date selector', + 'title' => <<<"EOL" +This expression should match a node or node's attribute containing the +article timestamp or date (parsable by PHP's strtotime function). It +should start with a dot followed by two forward slashes, referring to +any descendant nodes of the article item node. Attributes can be +selected by prepending an @ char before the attributes name. +EOL + , 'type' => 'text', + 'exampleValue' => './/time[@class="ArticleListItem-footerTimestamp"]/@timestamp', + 'defaultValue' => './/time[@class="ArticleListItem-footerTimestamp"]/@timestamp', + 'required' => false + ), + + 'enclosures' => array( + 'name' => 'Item image selector', + 'title' => <<<"EOL" +This expression should match a node's attribute containing an article +image URL (usually the src attribute of an <img> tag or a style +attribute). It should start with a dot followed by two forward slashes, +referring to any descendant nodes of the article item node. Attributes +can be selected by prepending an @ char before the attributes name. +EOL + , 'type' => 'text', + 'exampleValue' => './/div[@class="ArticleListItem-image"]/@style', + 'defaultValue' => './/div[@class="ArticleListItem-image"]/@style', + 'required' => false + ), + + 'categories' => array( + 'name' => 'Item category selector', + 'title' => <<<"EOL" +This expression should match a node or node's attribute contained +within each article item node containing the article category. This +could be inside <div> or <span> tags or sometimes be hidden +in a data attribute. It should start with a dot followed by two +forward slashes, referring to any descendant nodes of the article +item node. Attributes can be selected by prepending an @ char +before the attributes name. +EOL + , 'type' => 'text', + 'exampleValue' => './/div[@class="ArticleListItem-label"]', + 'defaultValue' => './/div[@class="ArticleListItem-label"]', + 'required' => false + ), + + 'fix_encoding' => array( + 'name' => 'Fix encoding', + 'title' => <<<"EOL" +Check this to fix feed encoding by invoking PHP's utf8_decode +function on all extracted texts. Try this in case you see "broken" or +"weird" characters in your feed where you'd normally expect umlauts +or any other non-ascii characters. +EOL + , 'type' => 'checkbox', + 'required' => false + ), + + ) + ); + + /** + * Source Web page URL (should provide either HTML or XML content) + * @return string + */ + protected function getSourceUrl(){ + return $this->encodeUri($this->getInput('url')); + } + + /** + * XPath expression for extracting the feed items from the source page + * @return string + */ + protected function getExpressionItem(){ + return urldecode($this->getInput('item')); + } + + /** + * XPath expression for extracting an item title from the item context + * @return string + */ + protected function getExpressionItemTitle(){ + return urldecode($this->getInput('title')); + } + + /** + * XPath expression for extracting an item's content from the item context + * @return string + */ + protected function getExpressionItemContent(){ + return urldecode($this->getInput('content')); + } + + /** + * XPath expression for extracting an item link from the item context + * @return string + */ + protected function getExpressionItemUri(){ + return urldecode($this->getInput('uri')); + } + + /** + * XPath expression for extracting an item author from the item context + * @return string + */ + protected function getExpressionItemAuthor(){ + return urldecode($this->getInput('author')); + } + + /** + * XPath expression for extracting an item timestamp from the item context + * @return string + */ + protected function getExpressionItemTimestamp(){ + return urldecode($this->getInput('timestamp')); + } + + /** + * XPath expression for extracting item enclosures (media content like + * images or movies) from the item context + * @return string + */ + protected function getExpressionItemEnclosures(){ + return urldecode($this->getInput('enclosures')); + } + + /** + * XPath expression for extracting an item category from the item context + * @return string + */ + protected function getExpressionItemCategories(){ + return urldecode($this->getInput('categories')); + } + + /** + * Fix encoding + * @return string + */ + protected function getSettingFixEncoding(){ + return $this->getInput('fix_encoding'); + } + + /** + * Fixes URL encoding issues in input URL's + * @param $uri + * @return string|string[] + */ + private function encodeUri($uri) + { + if (strpos($uri, 'https%3A%2F%2F') === 0 + || strpos($uri, 'http%3A%2F%2F') === 0) { + $uri = urldecode($uri); + } + + $uri = str_replace('|', '%7C', $uri); + + return $uri; + } +} diff --git a/bridges/ZDNetBridge.php b/bridges/ZDNetBridge.php index 75df3b1..1347802 100644 --- a/bridges/ZDNetBridge.php +++ b/bridges/ZDNetBridge.php @@ -185,7 +185,8 @@ class ZDNetBridge extends FeedExpander { '<div class="downloadNow', '<div data-shortcode', '<div id="sharethrough', - '<div id="inpage-video' + '<div id="inpage-video', + '<div class="share-bar-wrapper"', ) as $div_start) { $contents = stripRecursiveHtmlSection($contents, 'div', $div_start); } diff --git a/bridges/ZoneTelechargementBridge.php b/bridges/ZoneTelechargementBridge.php index 79723fc..f11f3b7 100644 --- a/bridges/ZoneTelechargementBridge.php +++ b/bridges/ZoneTelechargementBridge.php @@ -8,7 +8,7 @@ class ZoneTelechargementBridge extends BridgeAbstract { */ const NAME = 'Zone Telechargement'; - const URI = 'https://www.zone-annuaire.com/'; + const URI = 'https://www.zt-za.com/'; const DESCRIPTION = 'Suivi de série sur Zone Telechargement'; const MAINTAINER = 'sysadminstory'; const PARAMETERS = array( @@ -17,18 +17,21 @@ class ZoneTelechargementBridge extends BridgeAbstract { 'name' => 'URL de la série', 'type' => 'text', 'required' => true, - 'title' => 'URL d\'une série sans le https://www.zone-annuaire.com/', + 'title' => 'URL d\'une série sans le https://www.zt-za.com/', 'exampleValue' => 'telecharger-series/31079-halt-and-catch-fire-saison-4-french-hd720p.html' ) ) ); + // This is an URL that is not protected by robot protection + const UNPROTECED_URI = 'https://www.zone-annuaire.com/'; + public function getIcon() { return self::URI . '/templates/Default/images/favicon.ico'; } public function collectData(){ - $html = getSimpleHTMLDOM(self::URI . $this->getInput('url')) + $html = getSimpleHTMLDOM(self::UNPROTECED_URI . $this->getInput('url')) or returnServerError('Could not request Zone Telechargement.'); // Get the TV show title diff --git a/cache/pages/.gitkeep b/cache/pages/.gitkeep deleted file mode 100644 index e69de29..0000000 --- a/cache/pages/.gitkeep +++ /dev/null diff --git a/cache/server/.gitkeep b/cache/server/.gitkeep deleted file mode 100644 index e69de29..0000000 --- a/cache/server/.gitkeep +++ /dev/null diff --git a/caches/MemcachedCache.php b/caches/MemcachedCache.php index f69f10b..b431279 100644 --- a/caches/MemcachedCache.php +++ b/caches/MemcachedCache.php @@ -40,7 +40,7 @@ class MemcachedCache implements CacheInterface { if ($this->data) return $this->data; $result = $this->conn->get($this->getCacheKey()); if ($result === false) { - return false; + return null; } $this->time = $result['time']; diff --git a/composer.json b/composer.json index 3c03eeb..7a38697 100644 --- a/composer.json +++ b/composer.json @@ -34,6 +34,7 @@ }, "suggest": { "ext-memcached": "Allows to use memcached as cache type", - "ext-sqlite3": "Allows to use an SQLite database for caching" + "ext-sqlite3": "Allows to use an SQLite database for caching", + "ext-dom": "Allows to use some bridges based on XPath expressions" } } diff --git a/formats/AtomFormat.php b/formats/AtomFormat.php index c1bde25..80a388d 100644 --- a/formats/AtomFormat.php +++ b/formats/AtomFormat.php @@ -130,7 +130,7 @@ EOD; /* Data are prepared, now let's begin the "MAGIE !!!" */ $toReturn = <<<EOD <?xml version="1.0" encoding="{$charset}"?> -<feed xmlns="http://www.w3.org/2005/Atom"> +<feed xmlns="http://www.w3.org/2005/Atom" xmlns:media="http://search.yahoo.com/mrss/"> <title type="text">{$title}</title> <id>{$feedUrl}</id> diff --git a/lib/BridgeCard.php b/lib/BridgeCard.php index 4353f64..0ed605b 100644 --- a/lib/BridgeCard.php +++ b/lib/BridgeCard.php @@ -126,7 +126,7 @@ This bridge is not fetching its content through a secure connection</div>'; if(isset($inputEntry['title'])) $form .= '<i class="info" title="' . filter_var($inputEntry['title'], FILTER_SANITIZE_STRING) . '">i</i>'; else - $form .= '<i></i>'; + $form .= '<i class="no-info"></i>'; } $form .= '</div>'; diff --git a/lib/BridgeList.php b/lib/BridgeList.php index dc545de..7b2d526 100644 --- a/lib/BridgeList.php +++ b/lib/BridgeList.php @@ -129,7 +129,7 @@ EOD; * @return string The searchbar */ private static function getSearchbar() { - $query = filter_input(INPUT_GET, 'q'); + $query = filter_input(INPUT_GET, 'q', FILTER_SANITIZE_SPECIAL_CHARS); return <<<EOD <section class="searchbar"> diff --git a/lib/Configuration.php b/lib/Configuration.php index 76a34af..c7be6ae 100644 --- a/lib/Configuration.php +++ b/lib/Configuration.php @@ -28,7 +28,7 @@ final class Configuration { * * @todo Replace this property by a constant. */ - public static $VERSION = 'dev.2020-02-26'; + public static $VERSION = 'dev.2020-11-10'; /** * Holds the configuration data. @@ -244,9 +244,13 @@ final class Configuration { if(@is_readable($headFile)) { $revisionHashFile = '.git/' . substr(file_get_contents($headFile), 5, -1); - $branchName = explode('/', $revisionHashFile)[3]; - if(file_exists($revisionHashFile)) { - return 'git.' . $branchName . '.' . substr(file_get_contents($revisionHashFile), 0, 7); + $parts = explode('/', $revisionHashFile); + + if(isset($parts[3])) { + $branchName = $parts[3]; + if(file_exists($revisionHashFile)) { + return 'git.' . $branchName . '.' . substr(file_get_contents($revisionHashFile), 0, 7); + } } } diff --git a/lib/FeedExpander.php b/lib/FeedExpander.php index 665620a..bc6dc24 100644 --- a/lib/FeedExpander.php +++ b/lib/FeedExpander.php @@ -346,7 +346,7 @@ abstract class FeedExpander extends BridgeAbstract { if($attribute === 'isPermaLink' && ($value === 'true' || ( filter_var($feedItem->guid, FILTER_VALIDATE_URL) - && !filter_var($item['uri'], FILTER_VALIDATE_URL) + && (empty($item['uri']) || !filter_var($item['uri'], FILTER_VALIDATE_URL)) ) ) ) { diff --git a/lib/ParameterValidator.php b/lib/ParameterValidator.php index 149e8a4..12e0794 100644 --- a/lib/ParameterValidator.php +++ b/lib/ParameterValidator.php @@ -135,6 +135,9 @@ class ParameterValidator { return false; foreach($data as $name => $value) { + // Some RSS readers add a cache-busting parameter (_=<timestamp>) to feed URLs, detect and ignore them. + if ($name === '_') continue; + $registered = false; foreach($parameters as $context => $set) { if(array_key_exists($name, $set)) { diff --git a/lib/XPathAbstract.php b/lib/XPathAbstract.php new file mode 100644 index 0000000..e08f48d --- /dev/null +++ b/lib/XPathAbstract.php @@ -0,0 +1,583 @@ +<?php + +/** + * An alternative abstract class for bridges utilizing XPath expressions + * + * This class is meant as an alternative base class for bridge implementations. + * It offers preliminary functionality for generating feeds based on XPath + * expressions. + * As a minimum, extending classes should define XPath expressions pointing + * to the feed items contents in the class constants below. In case there is + * more manual fine tuning required, it offers a bunch of methods which can + * be overridden, for example in order to specify formatting of field values + * or more flexible definition of dynamic XPath expressions. + * + * This class extends {@see BridgeAbstract}, which means it incorporates and + * extends all of its functionality. + **/ +abstract class XPathAbstract extends BridgeAbstract { + + /** + * Source Web page URL (should provide either HTML or XML content) + * You can specify any website URL which serves data suited for display in RSS feeds + * (for example a news blog). + * + * Use {@see XPathAbstract::getSourceUrl()} to read this parameter + */ + const FEED_SOURCE_URL = ''; + + /** + * XPath expression for extracting the feed title from the source page. + * If this is left blank or does not provide any data {@see BridgeAbstract::getName()} + * is used instead as the feed's title. + * + * Use {@see XPathAbstract::getExpressionTitle()} to read this parameter + */ + const XPATH_EXPRESSION_FEED_TITLE = './/title'; + + /** + * XPath expression for extracting the feed favicon URL from the source page. + * If this is left blank or does not provide any data {@see BridgeAbstract::getIcon()} + * is used instead as the feed's favicon URL. + * + * Use {@see XPathAbstract::getExpressionIcon()} to read this parameter + */ + const XPATH_EXPRESSION_FEED_ICON = './/link[@rel="icon"]/@href'; + + /** + * XPath expression for extracting the feed items from the source page + * Enter an XPath expression matching a list of dom nodes, each node containing one + * feed article item in total (usually a surrounding <div> or <span> tag). This will + * be the context nodes for all of the following expressions. This expression usually + * starts with a single forward slash. + * + * Use {@see XPathAbstract::getExpressionItem()} to read this parameter + */ + const XPATH_EXPRESSION_ITEM = ''; + + /** + * XPath expression for extracting an item title from the item context + * This expression should match a node contained within each article item node + * containing the article headline. It should start with a dot followed by two + * forward slashes, referring to any descendant nodes of the article item node. + * + * Use {@see XPathAbstract::getExpressionItemTitle()} to read this parameter + */ + const XPATH_EXPRESSION_ITEM_TITLE = ''; + + /** + * XPath expression for extracting an item's content from the item context + * This expression should match a node contained within each article item node + * containing the article content or description. It should start with a dot + * followed by two forward slashes, referring to any descendant nodes of the + * article item node. + * + * Use {@see XPathAbstract::getExpressionItemContent()} to read this parameter + */ + const XPATH_EXPRESSION_ITEM_CONTENT = ''; + + /** + * XPath expression for extracting an item link from the item context + * This expression should match a node's attribute containing the article URL + * (usually the href attribute of an <a> tag). It should start with a dot + * followed by two forward slashes, referring to any descendant nodes of + * the article item node. Attributes can be selected by prepending an @ char + * before the attributes name. + * + * Use {@see XPathAbstract::getExpressionItemUri()} to read this parameter + */ + const XPATH_EXPRESSION_ITEM_URI = ''; + + /** + * XPath expression for extracting an item author from the item context + * This expression should match a node contained within each article item + * node containing the article author's name. It should start with a dot + * followed by two forward slashes, referring to any descendant nodes of + * the article item node. + * + * Use {@see XPathAbstract::getExpressionItemAuthor()} to read this parameter + */ + const XPATH_EXPRESSION_ITEM_AUTHOR = ''; + + /** + * XPath expression for extracting an item timestamp from the item context + * This expression should match a node or node's attribute containing the + * article timestamp or date (parsable by PHP's strtotime function). It + * should start with a dot followed by two forward slashes, referring to + * any descendant nodes of the article item node. Attributes can be + * selected by prepending an @ char before the attributes name. + * + * Use {@see XPathAbstract::getExpressionItemTimestamp()} to read this parameter + */ + const XPATH_EXPRESSION_ITEM_TIMESTAMP = ''; + + /** + * XPath expression for extracting item enclosures (media content like + * images or movies) from the item context + * This expression should match a node's attribute containing an article + * image URL (usually the src attribute of an <img> tag or a style + * attribute). It should start with a dot followed by two forward slashes, + * referring to any descendant nodes of the article item node. Attributes + * can be selected by prepending an @ char before the attributes name. + * + * Use {@see XPathAbstract::getExpressionItemEnclosures()} to read this parameter + */ + const XPATH_EXPRESSION_ITEM_ENCLOSURES = ''; + + /** + * XPath expression for extracting an item category from the item context + * This expression should match a node or node's attribute contained + * within each article item node containing the article category. This + * could be inside <div> or <span> tags or sometimes be hidden + * in a data attribute. It should start with a dot followed by two + * forward slashes, referring to any descendant nodes of the article + * item node. Attributes can be selected by prepending an @ char + * before the attributes name. + * + * Use {@see XPathAbstract::getExpressionItemCategories()} to read this parameter + */ + const XPATH_EXPRESSION_ITEM_CATEGORIES = ''; + + /** + * Fix encoding + * Set this to true for fixing feed encoding by invoking PHP's utf8_decode + * function on all extracted texts. Try this in case you see "broken" or + * "weird" characters in your feed where you'd normally expect umlauts + * or any other non-ascii characters. + * + * Use {@see XPathAbstract::getSettingFixEncoding()} to read this parameter + */ + const SETTING_FIX_ENCODING = false; + + /** + * Internal storage for resulting feed name, automatically detected + * @var string + */ + private $feedName; + + /** + * Internal storage for resulting feed name, automatically detected + * @var string + */ + private $feedUri; + + /** + * Internal storage for resulting feed favicon, automatically detected + * @var string + */ + private $feedIcon; + + public function getName(){ + return $this->feedName ?: parent::getName(); + } + + public function getURI() { + return $this->feedUri ?: parent::getURI(); + } + + public function getIcon() { + return $this->feedIcon ?: parent::getIcon(); + } + + /** + * Source Web page URL (should provide either HTML or XML content) + * @return string + */ + protected function getSourceUrl(){ + return static::FEED_SOURCE_URL; + } + + /** + * XPath expression for extracting the feed title from the source page + * @return string + */ + protected function getExpressionTitle(){ + return static::XPATH_EXPRESSION_FEED_TITLE; + } + + /** + * XPath expression for extracting the feed favicon from the source page + * @return string + */ + protected function getExpressionIcon(){ + return static::XPATH_EXPRESSION_FEED_ICON; + } + + /** + * XPath expression for extracting the feed items from the source page + * @return string + */ + protected function getExpressionItem(){ + return static::XPATH_EXPRESSION_ITEM; + } + + /** + * XPath expression for extracting an item title from the item context + * @return string + */ + protected function getExpressionItemTitle(){ + return static::XPATH_EXPRESSION_ITEM_TITLE; + } + + /** + * XPath expression for extracting an item's content from the item context + * @return string + */ + protected function getExpressionItemContent(){ + return static::XPATH_EXPRESSION_ITEM_CONTENT; + } + + /** + * XPath expression for extracting an item link from the item context + * @return string + */ + protected function getExpressionItemUri(){ + return static::XPATH_EXPRESSION_ITEM_URI; + } + + /** + * XPath expression for extracting an item author from the item context + * @return string + */ + protected function getExpressionItemAuthor(){ + return static::XPATH_EXPRESSION_ITEM_AUTHOR; + } + + /** + * XPath expression for extracting an item timestamp from the item context + * @return string + */ + protected function getExpressionItemTimestamp(){ + return static::XPATH_EXPRESSION_ITEM_TIMESTAMP; + } + + /** + * XPath expression for extracting item enclosures (media content like + * images or movies) from the item context + * @return string + */ + protected function getExpressionItemEnclosures(){ + return static::XPATH_EXPRESSION_ITEM_ENCLOSURES; + } + + /** + * XPath expression for extracting an item category from the item context + * @return string + */ + protected function getExpressionItemCategories(){ + return static::XPATH_EXPRESSION_ITEM_CATEGORIES; + } + + /** + * Fix encoding + * @return string + */ + protected function getSettingFixEncoding(){ + return static::SETTING_FIX_ENCODING; + } + + /** + * Internal helper method for quickly accessing all the user defined constants + * in derived classes + * + * @param $name + * @return bool|string + */ + private function getParam($name){ + switch($name) { + + case 'url': + return $this->getSourceUrl(); + case 'feed_title': + return $this->getExpressionTitle(); + case 'feed_icon': + return $this->getExpressionIcon(); + case 'item': + return $this->getExpressionItem(); + case 'title': + return $this->getExpressionItemTitle(); + case 'content': + return $this->getExpressionItemContent(); + case 'uri': + return $this->getExpressionItemUri(); + case 'author': + return $this->getExpressionItemAuthor(); + case 'timestamp': + return $this->getExpressionItemTimestamp(); + case 'enclosures': + return $this->getExpressionItemEnclosures(); + case 'categories': + return $this->getExpressionItemCategories(); + case 'fix_encoding': + return $this->getSettingFixEncoding(); + } + } + + /** + * Should provide the source website HTML content + * can be easily overwritten for example if special headers or auth infos are required + * @return string + */ + protected function provideWebsiteContent() { + return getContents($this->feedUri); + } + + /** + * Should provide the feeds title + * + * @param DOMXPath $xpath + * @return string + */ + protected function provideFeedTitle(DOMXPath $xpath) { + $title = $xpath->query($this->getParam('feed_title')); + if(count($title) === 1) { + return $this->getItemValueOrNodeValue($title); + } + } + + /** + * Should provide the URL of the feed's favicon + * + * @param DOMXPath $xpath + * @return string + */ + protected function provideFeedIcon(DOMXPath $xpath) { + $icon = $xpath->query($this->getParam('feed_icon')); + if(count($icon) === 1) { + return $this->cleanImageUrl($this->getItemValueOrNodeValue($icon)); + } + } + + /** + * Should provide the feed's items. + * + * @param DOMXPath $xpath + * @return DOMNodeList + */ + protected function provideFeedItems(DOMXPath $xpath) { + return @$xpath->query($this->getParam('item')); + } + + public function collectData() { + + $this->feedUri = $this->getParam('url'); + + $webPageHtml = new DOMDocument(); + libxml_use_internal_errors(true); + $webPageHtml->loadHTML($this->provideWebsiteContent()); + libxml_clear_errors(); + libxml_use_internal_errors(false); + + $xpath = new DOMXPath($webPageHtml); + + $this->feedName = $this->provideFeedTitle($xpath); + $this->feedIcon = $this->provideFeedIcon($xpath); + + $entries = $this->provideFeedItems($xpath); + if($entries === false) { + return; + } + + foreach ($entries as $entry) { + $item = new \FeedItem(); + foreach(array('title', 'content', 'uri', 'author', 'timestamp', 'enclosures', 'categories') as $param) { + + $expression = $this->getParam($param); + if('' === $expression) { + continue; + } + + //can be a string or DOMNodeList, depending on the expression result + $typedResult = @$xpath->evaluate($expression, $entry); + if ($typedResult === false || ($typedResult instanceof DOMNodeList && count($typedResult) === 0) + || (is_string($typedResult) && strlen(trim($typedResult)) === 0)) { + continue; + } + + $item->__set($param, $this->formatParamValue($param, $this->getItemValueOrNodeValue($typedResult))); + + } + + $itemId = $this->generateItemId($item); + if(null !== $itemId) { + $item->setUid($itemId); + } + + $this->items[] = $item; + } + + } + + /** + * @param $param + * @param $value + * @return string|array + */ + protected function formatParamValue($param, $value) + { + $value = $this->fixEncoding($value); + switch ($param) { + case 'title': + return $this->formatItemTitle($value); + case 'content': + return $this->formatItemContent($value); + case 'uri': + return $this->formatItemUri($value); + case 'author': + return $this->formatItemAuthor($value); + case 'timestamp': + return $this->formatItemTimestamp($value); + case 'enclosures': + return array($this->cleanImageUrl($value)); + case 'categories': + return array($this->fixEncoding($value)); + } + return $value; + } + + /** + * Formats the title of a feed item. Takes extracted raw title and returns it formatted + * as string. + * Can be easily overwritten for in case the value needs to be transformed into something + * else. + * @param string $value + * @return string + */ + protected function formatItemTitle($value) { + return $value; + } + + /** + * Formats the timestamp of a feed item. Takes extracted raw timestamp and returns unix + * timestamp as integer. + * Can be easily overwritten for example if a special format has to be expected on the + * source website. + * @param string $value + * @return string + */ + protected function formatItemContent($value) { + return $value; + } + + /** + * Formats the URI of a feed item. Takes extracted raw URI and returns it formatted + * as string. + * Can be easily overwritten for in case the value needs to be transformed into something + * else. + * @param string $value + * @return string + */ + protected function formatItemUri($value) { + if(strlen($value) === 0) { + return ''; + } + if(strpos($value, 'http://') === 0 || strpos($value, 'https://') === 0) { + return $value; + } + + return urljoin($this->feedUri, $value); + } + + /** + * Formats the author of a feed item. Takes extracted raw author and returns it formatted + * as string. + * Can be easily overwritten for in case the value needs to be transformed into something + * else. + * @param string $value + * @return string + */ + protected function formatItemAuthor($value) { + return $value; + } + + /** + * Formats the timestamp of a feed item. Takes extracted raw timestamp and returns unix + * timestamp as integer. + * Can be easily overwritten for example if a special format has to be expected on the + * source website. + * @param string $value + * @return false|int + */ + protected function formatItemTimestamp($value) { + return strtotime($value); + } + + /** + * Formats the enclosures of a feed item. Takes extracted raw enclosures and returns them + * formatted as array. + * Can be easily overwritten for in case the values need to be transformed into something + * else. + * @param string $value + * @return array + */ + protected function formatItemEnclosures($value) { + return array($this->cleanImageUrl($value)); + } + + /** + * Formats the categories of a feed item. Takes extracted raw categories and returns them + * formatted as array. + * Can be easily overwritten for in case the values need to be transformed into something + * else. + * @param string $value + * @return array + */ + protected function formatItemCategories($value) { + return array($value); + } + + /** + * @param $imageUrl + * @return string|void + */ + protected function cleanImageUrl($imageUrl) + { + $result = preg_match('~(?:http(?:s)?:)?[\/a-zA-Z0-9\-_\.]+\.(?:jpg|gif|png|jpeg|ico){1}~', $imageUrl, $matches); + if(1 !== $result) { + return; + } + return urljoin($this->feedUri, $matches[0]); + } + + /** + * @param $typedResult + * @return string + */ + protected function getItemValueOrNodeValue($typedResult) + { + if($typedResult instanceof DOMNodeList) { + $item = $typedResult->item(0); + if ($item instanceof DOMElement) { + return trim($item->nodeValue); + } elseif ($item instanceof DOMAttr) { + return trim($item->value); + } + } elseif(is_string($typedResult) && strlen($typedResult) > 0) { + return trim($typedResult); + } + returnServerError('Unknown type of XPath expression result.'); + } + + /** + * Fixes feed encoding by invoking PHP's utf8_decode function on extracted texts. + * Useful in case of "broken" or "weird" characters in the feed where you'd normally + * expect umlauts. + * + * @param $input + * @return string + */ + protected function fixEncoding($input) + { + return $this->getParam('fix_encoding') ? utf8_decode($input) : $input; + } + + /** + * Allows overriding default mechanism determining items Uid's + * + * @param FeedItem $item + * @return string|null + */ + protected function generateItemId(\FeedItem $item) { + return null; //auto generation + } +} diff --git a/lib/contents.php b/lib/contents.php index b1e3128..6e6f0b7 100644 --- a/lib/contents.php +++ b/lib/contents.php @@ -41,7 +41,7 @@ * 'content' if enabled. * * For more information see http://php.net/manual/en/function.curl-setopt.php - * @return string The contents. + * @return string|array The contents. */ function getContents($url, $header = array(), $opts = array(), $returnHeader = false){ Debug::log('Reading contents from "' . $url . '"'); @@ -82,6 +82,7 @@ function getContents($url, $header = array(), $opts = array(), $returnHeader = f $errorCode = 500; } else { $errorCode = 200; + $retVal['header'] = implode("\r\n", $http_response_header); } $curlError = ''; @@ -232,7 +233,7 @@ EOD * when returning plaintext. * @param string $defaultSpanText Specifies the replacement text for `<span />` * tags when returning plaintext. - * @return string Contents as simplehtmldom object. + * @return false|simple_html_dom Contents as simplehtmldom object. */ function getSimpleHTMLDOM($url, $header = array(), @@ -282,7 +283,7 @@ function getSimpleHTMLDOM($url, * when returning plaintext. * @param string $defaultSpanText Specifies the replacement text for `<span />` * tags when returning plaintext. - * @return string Contents as simplehtmldom object. + * @return false|simple_html_dom Contents as simplehtmldom object. */ function getSimpleHTMLDOMCached($url, $duration = 86400, diff --git a/lib/html.php b/lib/html.php index 13db97a..892ecb1 100644 --- a/lib/html.php +++ b/lib/html.php @@ -195,7 +195,7 @@ function stripRecursiveHTMLSection($string, $tag_name, $tag_start){ } /** - * Convert Markdown into HTML. Only a subset of the Markdown syntax is implemented. + * Convert Markdown into HTML with Parsedown. * * @link https://daringfireball.net/projects/markdown/ Markdown * @link https://github.github.com/gfm/ GitHub Flavored Markdown Spec @@ -205,40 +205,6 @@ function stripRecursiveHTMLSection($string, $tag_name, $tag_start){ */ function markdownToHtml($string) { - //For more details about how these regex work: - // https://github.com/RSS-Bridge/rss-bridge/pull/802#discussion_r216138702 - // Images: https://regex101.com/r/JW9Evr/1 - // Links: https://regex101.com/r/eRGVe7/1 - // Bold: https://regex101.com/r/2p40Y0/1 - // Italic: https://regex101.com/r/xJkET9/1 - // Separator: https://regex101.com/r/ZBEqFP/1 - // Plain URL: https://regex101.com/r/2JHYwb/1 - // Site name: https://regex101.com/r/qIuKYE/1 - - $string = preg_replace('/\!\[([^\]]+)\]\(([^\) ]+)(?: [^\)]+)?\)/', '<img src="$2" alt="$1" />', $string); - $string = preg_replace('/\[([^\]]+)\]\(([^\)]+)\)/', '<a href="$2">$1</a>', $string); - $string = preg_replace('/\*\*(.*)\*\*/U', '<b>$1</b>', $string); - $string = preg_replace('/\*(.*)\*/U', '<i>$1</i>', $string); - $string = preg_replace('/__(.*)__/U', '<b>$1</b>', $string); - $string = preg_replace('/_(.*)_/U', '<i>$1</i>', $string); - $string = preg_replace('/[-]{6,99}/', '<hr />', $string); - $string = str_replace(' ', '<br />', $string); - $string = preg_replace('/([^"])(https?:\/\/[^ "<]+)([^"])/', '$1<a href="$2">$2</a>$3', $string . ' '); - $string = preg_replace('/([^"\/])(www\.[^ "<]+)([^"])/', '$1<a href="http://$2">$2</a>$3', $string . ' '); - - //As the regex are not perfect, we need to fix <i> and </i> that are introduced in URLs - // Fixup regex <i>: https://regex101.com/r/NTRPf6/1 - // Fixup regex </i>: https://regex101.com/r/aNklRp/1 - - $count = 1; - while($count > 0) { - $string = preg_replace('/ (src|href)="([^"]+)<i>([^"]+)"/U', ' $1="$2_$3"', $string, -1, $count); - } - - $count = 1; - while($count > 0) { - $string = preg_replace('/ (src|href)="([^"]+)<\/i>([^"]+)"/U', ' $1="$2_$3"', $string, -1, $count); - } - - return '<div>' . trim($string) . '</div>'; + $Parsedown = new Parsedown(); + return $Parsedown->text($string); } diff --git a/lib/rssbridge.php b/lib/rssbridge.php index a025f22..2e7fbf2 100644 --- a/lib/rssbridge.php +++ b/lib/rssbridge.php @@ -74,6 +74,7 @@ require_once PATH_LIB . 'BridgeList.php'; require_once PATH_LIB . 'ParameterValidator.php'; require_once PATH_LIB . 'ActionFactory.php'; require_once PATH_LIB . 'ActionAbstract.php'; +require_once PATH_LIB . 'XPathAbstract.php'; // Functions require_once PATH_LIB . 'html.php'; @@ -82,5 +83,6 @@ require_once PATH_LIB . 'contents.php'; // Vendor define('MAX_FILE_SIZE', 10000000); /* Allow larger files for simple_html_dom */ -require_once PATH_LIB_VENDOR . 'simplehtmldom/simple_html_dom.php'; +require_once PATH_LIB_VENDOR . 'parsedown/Parsedown.php'; require_once PATH_LIB_VENDOR . 'php-urljoin/src/urljoin.php'; +require_once PATH_LIB_VENDOR . 'simplehtmldom/simple_html_dom.php'; diff --git a/static/style.css b/static/style.css index 5df2c51..80591e4 100644 --- a/static/style.css +++ b/static/style.css @@ -360,7 +360,7 @@ h5 { margin: 3px auto 0; } - .info { + .info, .no-info { display: none; } |