diff options
author | Johannes Schauer <josch@debian.org> | 2017-08-04 22:06:01 +0200 |
---|---|---|
committer | Johannes Schauer <josch@debian.org> | 2017-08-04 22:06:01 +0200 |
commit | b005331cd910c0cc7dee2ddf82491b8248f431cf (patch) | |
tree | ff8b5cbfe81d570b878cb8d60ee51d07c3b1d059 /bridges |
Import rss-bridge_2017-08-03.orig.tar.gz
[dgit import orig rss-bridge_2017-08-03.orig.tar.gz]
Diffstat (limited to 'bridges')
138 files changed, 11772 insertions, 0 deletions
diff --git a/bridges/ABCTabsBridge.php b/bridges/ABCTabsBridge.php new file mode 100644 index 0000000..2e451e2 --- /dev/null +++ b/bridges/ABCTabsBridge.php @@ -0,0 +1,42 @@ +<?php +class ABCTabsBridge extends BridgeAbstract { + + const MAINTAINER = 'kranack'; + const NAME = 'ABC Tabs Bridge'; + const URI = 'https://www.abc-tabs.com/'; + const DESCRIPTION = 'Returns 22 newest tabs'; + + public function collectData(){ + $html = ''; + $html = getSimpleHTMLDOM(static::URI.'tablatures/nouveautes.html') + or returnClientError('No results for this query.'); + + $table = $html->find('table#myTable', 0)->children(1); + + foreach ($table->find('tr') as $tab) { + $item = array(); + $item['author'] = $tab->find('td', 1)->plaintext + . ' - ' + . $tab->find('td', 2)->plaintext; + + $item['title'] = $tab->find('td', 1)->plaintext + . ' - ' + . $tab->find('td', 2)->plaintext; + + $item['content'] = 'Le ' + . $tab->find('td', 0)->plaintext + . '<br> Par: ' + . $tab->find('td', 5)->plaintext + . '<br> Type: ' + . $tab->find('td', 3)->plaintext; + + $item['id'] = static::URI + . $tab->find('td', 2)->find('a', 0)->getAttribute('href'); + + $item['uri'] = static::URI + . $tab->find('td', 2)->find('a', 0)->getAttribute('href'); + + $this->items[] = $item; + } + } +} diff --git a/bridges/AcrimedBridge.php b/bridges/AcrimedBridge.php new file mode 100644 index 0000000..8b40d1d --- /dev/null +++ b/bridges/AcrimedBridge.php @@ -0,0 +1,25 @@ +<?php +class AcrimedBridge extends FeedExpander { + + const MAINTAINER = 'qwertygc'; + const NAME = 'Acrimed Bridge'; + const URI = 'http://www.acrimed.org/'; + const CACHE_TIMEOUT = 4800; //2hours + const DESCRIPTION = 'Returns the newest articles'; + + public function collectData(){ + $this->collectExpandableDatas(static::URI . 'spip.php?page=backend'); + } + + protected function parseItem($newsItem){ + $item = parent::parseItem($newsItem); + + $articlePage = getSimpleHTMLDOM($newsItem->link); + $article = sanitize($articlePage->find('article.article1', 0)->innertext); + $article = defaultLinkTo($article, static::URI); + $item['content'] = $article; + + return $item; + } + +} diff --git a/bridges/AllocineFRBridge.php b/bridges/AllocineFRBridge.php new file mode 100644 index 0000000..604199b --- /dev/null +++ b/bridges/AllocineFRBridge.php @@ -0,0 +1,87 @@ +<?php +class AllocineFRBridge extends BridgeAbstract { + + const MAINTAINER = 'superbaillot.net'; + const NAME = 'Allo Cine Bridge'; + const CACHE_TIMEOUT = 25200; // 7h + const URI = 'http://www.allocine.fr/'; + const DESCRIPTION = 'Bridge for allocine.fr'; + const PARAMETERS = array( array( + 'category' => array( + 'name' => 'category', + 'type' => 'list', + 'required' => true, + 'exampleValue' => 'Faux Raccord', + 'title' => 'Select your category', + 'values' => array( + 'Faux Raccord' => 'faux-raccord', + 'Top 5' => 'top-5', + 'Tueurs en Séries' => 'tueurs-en-serie' + ) + ) + )); + + public function getURI(){ + if(!is_null($this->getInput('category'))) { + + switch($this->getInput('category')) { + case 'faux-raccord': + $uri = static::URI . 'video/programme-12284/saison-29841/'; + break; + case 'top-5': + $uri = static::URI . 'video/programme-12299/saison-29561/'; + break; + case 'tueurs-en-serie': + $uri = static::URI . 'video/programme-12286/saison-22938/'; + break; + } + + return $uri; + } + + return parent::getURI(); + } + + public function getName(){ + if(!is_null($this->getInput('category'))) { + return self::NAME . ' : ' + .array_search( + $this->getInput('category'), + self::PARAMETERS[$this->queriedContext]['category']['values'] + ); + } + + return parent::getName(); + } + + public function collectData(){ + + $html = getSimpleHTMLDOM($this->getURI()) + or returnServerError('Could not request ' . $this->getURI() . ' !'); + + $category = array_search( + $this->getInput('category'), + self::PARAMETERS[$this->queriedContext]['category']['values'] + ); + + foreach($html->find('figure.media-meta-fig') as $element) { + $item = array(); + + $title = $element->find('div.titlebar h3.title a', 0); + $content = trim($element->innertext); + $figCaption = strpos($content, $category); + + if($figCaption !== false) { + $content = str_replace('src="/', 'src="' . static::URI, $content); + $content = str_replace('href="/', 'href="' . static::URI, $content); + $content = str_replace('src=\'/', 'src=\'' . static::URI, $content); + $content = str_replace('href=\'/', 'href=\'' . static::URI, $content); + $item['content'] = $content; + $item['title'] = trim($title->innertext); + $item['uri'] = static::URI . $title->href; + $this->items[] = $item; + } + } + } + +} diff --git a/bridges/AmazonBridge.php b/bridges/AmazonBridge.php new file mode 100644 index 0000000..cbc6119 --- /dev/null +++ b/bridges/AmazonBridge.php @@ -0,0 +1,94 @@ +<?php + +class AmazonBridge extends BridgeAbstract { + + const MAINTAINER = 'Alexis CHEMEL'; + const NAME = 'Amazon'; + const URI = 'https://www.amazon.com/'; + const CACHE_TIMEOUT = 3600; // 1h + const DESCRIPTION = 'Returns products from Amazon search'; + + const PARAMETERS = array(array( + 'q' => array( + 'name' => 'Keyword', + 'required' => true, + ), + 'sort' => array( + 'name' => 'Sort by', + 'type' => 'list', + 'required' => false, + 'values' => array( + 'Relevance' => 'relevanceblender', + 'Price: Low to High' => 'price-asc-rank', + 'Price: High to Low' => 'price-desc-rank', + 'Average Customer Review' => 'review-rank', + 'Newest Arrivals' => 'date-desc-rank', + ), + 'defaultValue' => 'relevanceblender', + ), + 'tld' => array( + 'name' => 'Country', + 'type' => 'list', + 'required' => true, + 'values' => array( + 'Australia' => 'com.au', + 'Brazil' => 'com.br', + 'Canada' => 'ca', + 'China' => 'cn', + 'France' => 'fr', + 'Germany' => 'de', + 'India' => 'in', + 'Italy' => 'it', + 'Japan' => 'co.jp', + 'Mexico' => 'com.mx', + 'Netherlands' => 'nl', + 'Spain' => 'es', + 'United Kingdom' => 'co.uk', + 'United States' => 'com', + ), + 'defaultValue' => 'com', + ), + )); + + public function getName(){ + if(!is_null($this->getInput('tld')) && !is_null($this->getInput('q'))) { + return 'Amazon.'.$this->getInput('tld').': '.$this->getInput('q'); + } + + return parent::getName(); + } + + public function collectData() { + + $uri = 'https://www.amazon.'.$this->getInput('tld').'/'; + $uri .= 's/?field-keywords='.urlencode($this->getInput('q')).'&sort='.$this->getInput('sort'); + + $html = getSimpleHTMLDOM($uri) + or returnServerError('Could not request Amazon.'); + + foreach($html->find('li.s-result-item') as $element) { + + $item = array(); + + // Title + $title = $element->find('h2', 0); + + $item['title'] = html_entity_decode($title->innertext, ENT_QUOTES); + + // Url + $uri = $title->parent()->getAttribute('href'); + $uri = substr($uri, 0, strrpos($uri, '/')); + + $item['uri'] = substr($uri, 0, strrpos($uri, '/')); + + // Content + $image = $element->find('img', 0); + $price = $element->find('span.s-price', 0); + $price = ($price) ? $price->innertext : ''; + + $item['content'] = '<img src="'.$image->getAttribute('src').'" /><br />'.$price; + + $this->items[] = $item; + } + } +} diff --git a/bridges/AnimeUltimeBridge.php b/bridges/AnimeUltimeBridge.php new file mode 100644 index 0000000..6c5427e --- /dev/null +++ b/bridges/AnimeUltimeBridge.php @@ -0,0 +1,135 @@ +<?php +class AnimeUltimeBridge extends BridgeAbstract { + + const MAINTAINER = 'ORelio'; + const NAME = 'Anime-Ultime'; + const URI = 'http://www.anime-ultime.net/'; + const CACHE_TIMEOUT = 10800; // 3h + const DESCRIPTION = 'Returns the 10 newest releases posted on Anime-Ultime'; + const PARAMETERS = array( array( + 'type' => array( + 'name' => 'Type', + 'type' => 'list', + 'values' => array( + 'Everything' => '', + 'Anime' => 'A', + 'Drama' => 'D', + 'Tokusatsu' => 'T' + ) + ) + )); + + private $filter = 'Releases'; + + public function collectData(){ + + //Add type filter if provided + $typeFilter = array_search( + $this->getInput('type'), + self::PARAMETERS[$this->queriedContext]['type']['values'] + ); + + //Build date and filters for making requests + $thismonth = date('mY') . $typeFilter; + $lastmonth = date('mY', mktime(0, 0, 0, date('n') - 1, 1, date('Y'))) . $typeFilter; + + //Process each HTML page until having 10 releases + $processedOK = 0; + foreach (array($thismonth, $lastmonth) as $requestFilter) { + + //Retrive page contents + $url = self::URI . 'history-0-1/' . $requestFilter; + $html = getSimpleHTMLDOM($url) + or returnServerError('Could not request Anime-Ultime: ' . $url); + + //Relases are sorted by day : process each day individually + foreach($html->find('div.history', 0)->find('h3') as $daySection) { + + //Retrieve day and build date information + $dateString = $daySection->plaintext; + $day = intval(substr($dateString, strpos($dateString, ' ') + 1, 2)); + $item_date = strtotime(str_pad($day, 2, '0', STR_PAD_LEFT) + . '-' + . substr($requestFilter, 0, 2) + . '-' + . substr($requestFilter, 2, 4)); + + //<h3>day</h3><br /><table><tr> <-- useful data in table rows + $release = $daySection->next_sibling()->next_sibling()->first_child(); + + //Process each release of that day, ignoring first table row: contains table headers + while(!is_null($release = $release->next_sibling())) { + if(count($release->find('td')) > 0) { + + //Retrieve metadata from table columns + $item_link_element = $release->find('td', 0)->find('a', 0); + $item_uri = self::URI . $item_link_element->href; + $item_name = html_entity_decode($item_link_element->plaintext); + $item_episode = html_entity_decode( + str_pad( + $release->find('td', 1)->plaintext, + 2, + '0', + STR_PAD_LEFT + ) + ); + + $item_fansub = $release->find('td', 2)->plaintext; + $item_type = $release->find('td', 4)->plaintext; + + if(!empty($item_uri)) { + + // Retrieve description from description page and + // convert relative image src info absolute image src + $html_item = getContents($item_uri) + or returnServerError('Could not request Anime-Ultime: ' . $item_uri); + $item_description = substr( + $html_item, + strpos($html_item, 'class="principal_contain" align="center">') + 41 + ); + $item_description = substr($item_description, + 0, + strpos($item_description, '<div id="table">') + ); + $item_description = str_replace( + 'src="images', 'src="' . self::URI . 'images', + $item_description + ); + $item_description = str_replace("\r", '', $item_description); + $item_description = str_replace("\n", '', $item_description); + $item_description = utf8_encode($item_description); + + //Build and add final item + $item = array(); + $item['uri'] = $item_uri; + $item['title'] = $item_name . ' ' . $item_type . ' ' . $item_episode; + $item['author'] = $item_fansub; + $item['timestamp'] = $item_date; + $item['content'] = $item_description; + $this->items[] = $item; + $processedOK++; + + //Stop processing once limit is reached + if ($processedOK >= 10) + return; + } + } + } + } + } + } + + public function getName() { + if(!is_null($this->getInput('type'))) { + $typeFilter = array_search( + $this->getInput('type'), + self::PARAMETERS[$this->queriedContext]['type']['values'] + ); + + return 'Latest ' . $typeFilter . ' - Anime-Ultime Bridge'; + } + + return parent::getName(); + } + +} diff --git a/bridges/Arte7Bridge.php b/bridges/Arte7Bridge.php new file mode 100644 index 0000000..3d7ae9d --- /dev/null +++ b/bridges/Arte7Bridge.php @@ -0,0 +1,102 @@ +<?php +class Arte7Bridge extends BridgeAbstract { + + const MAINTAINER = 'mitsukarenai'; + const NAME = 'Arte +7'; + const URI = 'http://www.arte.tv/'; + const CACHE_TIMEOUT = 1800; // 30min + const DESCRIPTION = 'Returns newest videos from ARTE +7'; + const PARAMETERS = array( + 'Catégorie (Français)' => array( + 'catfr' => array( + 'type' => 'list', + 'name' => 'Catégorie', + 'values' => array( + 'Toutes les vidéos (français)' => 'toutes-les-videos', + 'Actu & société' => 'actu-société', + 'Séries & fiction' => 'séries-fiction', + 'Cinéma' => 'cinéma', + 'Arts & spectacles classiques' => 'arts-spectacles-classiques', + 'Culture pop' => 'culture-pop', + 'Découverte' => 'découverte', + 'Histoire' => 'histoire', + 'Junior' => 'junior' + ) + ) + ), + 'Catégorie (Allemand)' => array( + 'catde' => array( + 'type' => 'list', + 'name' => 'Catégorie', + 'values' => array( + 'Alle Videos (deutsch)' => 'alle-videos', + 'Aktuelles & Gesellschaft' => 'aktuelles-gesellschaft', + 'Fernsehfilme & Serien' => 'fernsehfilme-serien', + 'Kino' => 'kino', + 'Kunst & Kultur' => 'kunst-kultur', + 'Popkultur & Alternativ' => 'popkultur-alternativ', + 'Entdeckung' => 'entdeckung', + 'Geschichte' => 'geschichte', + 'Junior' => 'junior' + ) + ) + ) + ); + + public function collectData(){ + switch($this->queriedContext) { + case 'Catégorie (Français)': + $category = $this->getInput('catfr'); + $lang = 'fr'; + break; + case 'Catégorie (Allemand)': + $category = $this->getInput('catde'); + $lang = 'de'; + break; + } + + $url = self::URI . 'guide/' . $lang . '/plus7/' . $category; + $input = getContents($url) or die('Could not request ARTE.'); + + if(strpos($input, 'categoryVideoSet') !== false) { + $input = explode('categoryVideoSet="', $input); + $input = explode('}}', $input[1]); + $input = $input[0] . '}}'; + } else { + $input = explode('videoSet="', $input); + $input = explode('}]}', $input[1]); + $input = $input[0] . '}]}'; + } + + $input_json = json_decode(html_entity_decode($input, ENT_QUOTES), true); + + foreach($input_json['videos'] as $element) { + $item = array(); + $item['uri'] = str_replace("autoplay=1", "", $element['url']); + $item['id'] = $element['id']; + + $hack_broadcast_time = $element['rights_end']; + $hack_broadcast_time = strtok($hack_broadcast_time, 'T'); + $hack_broadcast_time = strtok('T'); + + $item['timestamp'] = strtotime($element['scheduled_on'] . 'T' . $hack_broadcast_time); + $item['title'] = $element['title']; + + if(!empty($element['subtitle'])) + $item['title'] = $element['title'] . ' | ' . $element['subtitle']; + + $item['duration'] = round((int)$element['duration'] / 60); + $item['content'] = $element['teaser'] + . '<br><br>' + . $item['duration'] + . 'min<br><a href="' + . $item['uri'] + . '"><img src="' + . $element['thumbnail_url'] + . '" /></a>'; + + $this->items[] = $item; + } + } + +} diff --git a/bridges/AskfmBridge.php b/bridges/AskfmBridge.php new file mode 100644 index 0000000..e227461 --- /dev/null +++ b/bridges/AskfmBridge.php @@ -0,0 +1,74 @@ +<?php +class AskfmBridge extends BridgeAbstract { + + const MAINTAINER = 'az5he6ch'; + const NAME = 'Ask.fm Answers'; + const URI = 'https://ask.fm/'; + const CACHE_TIMEOUT = 300; //5 min + const DESCRIPTION = 'Returns answers from an Ask.fm user'; + const PARAMETERS = array( + 'Ask.fm username' => array( + 'u' => array( + 'name' => 'Username', + 'required' => true + ) + ) + ); + + public function collectData(){ + $html = getSimpleHTMLDOM($this->getURI()) + or returnServerError('Requested username can\'t be found.'); + + foreach($html->find('div.streamItem-answer') as $element) { + $item = array(); + $item['uri'] = self::URI . $element->find('a.streamItemsAge', 0)->href; + $question = trim($element->find('h1.streamItemContent-question', 0)->innertext); + + $item['title'] = trim( + htmlspecialchars_decode($element->find('h1.streamItemContent-question', 0)->plaintext, + ENT_QUOTES + ) + ); + + $answer = trim($element->find('p.streamItemContent-answer', 0)->innertext); + + // Doesn't work, DOM parser doesn't seem to like data-hint, dunno why + #$item['update'] = $element->find('a.streamitemsage',0)->data-hint; + + // This probably should be cleaned up, especially for YouTube embeds + $visual = $element->find('div.streamItemContent-visual', 0)->innertext; + //Fix tracking links, also doesn't work + foreach($element->find('a') as $link) { + if(strpos($link->href, 'l.ask.fm') !== false) { + + // Too slow + #$link->href = str_replace('#_=_', '', get_headers($link->href, 1)['Location']); + + $link->href = $link->plaintext; + } + } + + $content = '<p>' . $question . '</p><p>' . $answer . '</p><p>' . $visual . '</p>'; + // Fix relative links without breaking // scheme used by YouTube stuff + $content = preg_replace('#href="\/(?!\/)#', 'href="' . self::URI, $content); + $item['content'] = $content; + $this->items[] = $item; + } + } + + public function getName(){ + if(!is_null($this->getInput('u'))) { + return self::NAME . ' : ' . $this->getInput('u'); + } + + return parent::getName(); + } + + public function getURI(){ + if(!is_null($this->getInput('u'))) { + return self::URI . urlencode($this->getInput('u')) . '/answers/more?page=0'; + } + + return parent::getURI(); + } +} diff --git a/bridges/BandcampBridge.php b/bridges/BandcampBridge.php new file mode 100644 index 0000000..0527da0 --- /dev/null +++ b/bridges/BandcampBridge.php @@ -0,0 +1,63 @@ +<?php +class BandcampBridge extends BridgeAbstract { + + const MAINTAINER = 'sebsauvage'; + const NAME = 'Bandcamp Tag'; + const URI = 'https://bandcamp.com/'; + const CACHE_TIMEOUT = 600; // 10min + const DESCRIPTION = 'New bandcamp release by tag'; + const PARAMETERS = array( array( + 'tag' => array( + 'name' => 'tag', + 'type' => 'text', + 'required' => true + ) + )); + + public function collectData(){ + $html = getSimpleHTMLDOM($this->getURI()) + or returnServerError('No results for this query.'); + + foreach($html->find('li.item') as $release) { + $script = $release->find('div.art', 0)->getAttribute('onclick'); + $uri = ltrim($script, "return 'url("); + $uri = rtrim($uri, "')"); + + $item = array(); + $item['author'] = $release->find('div.itemsubtext', 0)->plaintext + . ' - ' + . $release->find('div.itemtext', 0)->plaintext; + + $item['title'] = $release->find('div.itemsubtext', 0)->plaintext + . ' - ' + . $release->find('div.itemtext', 0)->plaintext; + + $item['content'] = '<img src="' + . $uri + . '"/><br/>' + . $release->find('div.itemsubtext', 0)->plaintext + . ' - ' + . $release->find('div.itemtext', 0)->plaintext; + + $item['id'] = $release->find('a', 0)->getAttribute('href'); + $item['uri'] = $release->find('a', 0)->getAttribute('href'); + $this->items[] = $item; + } + } + + public function getURI(){ + if(!is_null($this->getInput('tag'))) { + return self::URI . 'tag/' . urlencode($this->getInput('tag')) . '?sort_field=date'; + } + + return parent::getURI(); + } + + public function getName(){ + if(!is_null($this->getInput('tag'))) { + return $this->getInput('tag') . ' - Bandcamp Tag'; + } + + return parent::getName(); + } +} diff --git a/bridges/BastaBridge.php b/bridges/BastaBridge.php new file mode 100644 index 0000000..17d3da7 --- /dev/null +++ b/bridges/BastaBridge.php @@ -0,0 +1,34 @@ +<?php +class BastaBridge extends BridgeAbstract { + + const MAINTAINER = 'qwertygc'; + const NAME = 'Bastamag Bridge'; + const URI = 'http://www.bastamag.net/'; + const CACHE_TIMEOUT = 7200; // 2h + const DESCRIPTION = 'Returns the newest articles.'; + + public function collectData(){ + // Replaces all relative image URLs by absolute URLs. + // Relative URLs always start with 'local/'! + function replaceImageUrl($content){ + return preg_replace('/src=["\']{1}([^"\']+)/ims', 'src=\'' . self::URI . '$1\'', $content); + } + + $html = getSimpleHTMLDOM(self::URI . 'spip.php?page=backend') + or returnServerError('Could not request Bastamag.'); + + $limit = 0; + + foreach($html->find('item') as $element) { + if($limit < 10) { + $item = array(); + $item['title'] = $element->find('title', 0)->innertext; + $item['uri'] = $element->find('guid', 0)->plaintext; + $item['timestamp'] = strtotime($element->find('dc:date', 0)->plaintext); + $item['content'] = replaceImageUrl(getSimpleHTMLDOM($item['uri'])->find('div.texte', 0)->innertext); + $this->items[] = $item; + $limit++; + } + } + } +} diff --git a/bridges/BlaguesDeMerdeBridge.php b/bridges/BlaguesDeMerdeBridge.php new file mode 100644 index 0000000..25c018a --- /dev/null +++ b/bridges/BlaguesDeMerdeBridge.php @@ -0,0 +1,31 @@ +<?php +class BlaguesDeMerdeBridge extends BridgeAbstract { + + const MAINTAINER = 'superbaillot.net'; + const NAME = 'Blagues De Merde'; + const URI = 'http://www.blaguesdemerde.fr/'; + const CACHE_TIMEOUT = 7200; // 2h + const DESCRIPTION = 'Blagues De Merde'; + + public function collectData(){ + $html = getSimpleHTMLDOM(self::URI) + or returnServerError('Could not request BDM.'); + + foreach($html->find('article.joke_contener') as $element) { + $item = array(); + $temp = $element->find('a'); + + if(isset($temp[2])) { + $item['content'] = trim($element->find('div.joke_text_contener', 0)->innertext); + $uri = $temp[2]->href; + $item['uri'] = $uri; + $item['title'] = substr($uri, (strrpos($uri, "/") + 1)); + $date = $element->find('li.bdm_date', 0)->innertext; + $time = mktime(0, 0, 0, substr($date, 3, 2), substr($date, 0, 2), substr($date, 6, 4)); + $item['timestamp'] = $time; + $item['author'] = $element->find('li.bdm_pseudo', 0)->innertext; + $this->items[] = $item; + } + } + } +} diff --git a/bridges/BooruprojectBridge.php b/bridges/BooruprojectBridge.php new file mode 100644 index 0000000..6815d37 --- /dev/null +++ b/bridges/BooruprojectBridge.php @@ -0,0 +1,45 @@ +<?php +require_once('GelbooruBridge.php'); + +class BooruprojectBridge extends GelbooruBridge { + + const MAINTAINER = 'mitsukarenai'; + const NAME = 'Booruproject'; + const URI = 'http://booru.org/'; + const DESCRIPTION = 'Returns images from given page of booruproject'; + const PARAMETERS = array( + 'global' => array( + 'p' => array( + 'name' => 'page', + 'type' => 'number' + ), + 't' => array( + 'name' => 'tags' + ) + ), + 'Booru subdomain (subdomain.booru.org)' => array( + 'i' => array( + 'name' => 'Subdomain', + 'required' => true + ) + ) + ); + + const PIDBYPAGE = 20; + + public function getURI(){ + if(!is_null($this->getInput('i'))) { + return 'http://' . $this->getInput('i') . '.booru.org/'; + } + + return parent::getURI(); + } + + public function getName(){ + if(!is_null($this->getInput('i'))) { + return static::NAME . ' ' . $this->getInput('i'); + } + + return parent::getName(); + } +} diff --git a/bridges/CADBridge.php b/bridges/CADBridge.php new file mode 100644 index 0000000..09e3e65 --- /dev/null +++ b/bridges/CADBridge.php @@ -0,0 +1,45 @@ +<?php +class CADBridge extends FeedExpander { + const MAINTAINER = 'nyutag'; + const NAME = 'CAD Bridge'; + const URI = 'http://www.cad-comic.com/'; + const CACHE_TIMEOUT = 7200; //2h + const DESCRIPTION = 'Returns the newest articles.'; + + public function collectData(){ + $this->collectExpandableDatas('http://cdn2.cad-comic.com/rss.xml', 10); + } + + protected function parseItem($newsItem){ + $item = parent::parseItem($newsItem); + $item['content'] = $this->extractCADContent($item['uri']); + return $item; + } + + private function extractCADContent($url) { + $html3 = getSimpleHTMLDOMCached($url); + + // The request might fail due to missing https support or wrong URL + if($html3 == false) + return 'Daily comic not released yet'; + + $htmlpart = explode("/", $url); + + switch ($htmlpart[3]) { + case 'cad': + preg_match_all("/http:\/\/cdn2\.cad-comic\.com\/comics\/cad-\S*png/", $html3, $url2); + break; + case 'sillies': + preg_match_all("/http:\/\/cdn2\.cad-comic\.com\/comics\/sillies-\S*gif/", $html3, $url2); + break; + default: + return 'Daily comic not released yet'; + } + $img = implode($url2[0]); + $html3->clear(); + unset($html3); + if ($img == '') + return 'Daily comic not released yet'; + return '<img src="' . $img . '"/>'; + } +} diff --git a/bridges/CNETBridge.php b/bridges/CNETBridge.php new file mode 100644 index 0000000..eefb705 --- /dev/null +++ b/bridges/CNETBridge.php @@ -0,0 +1,93 @@ +<?php +class CNETBridge extends BridgeAbstract { + + const MAINTAINER = 'ORelio'; + const NAME = 'CNET News'; + const URI = 'http://www.cnet.com/'; + const CACHE_TIMEOUT = 1800; // 30min + const DESCRIPTION = 'Returns the newest articles. <br /> You may specify a +topic found in some section URLs, else all topics are selected.'; + + const PARAMETERS = array( array( + 'topic' => array( + 'name' => 'Topic name' + ) + )); + + public function collectData(){ + + function extractFromDelimiters($string, $start, $end){ + if(strpos($string, $start) !== false) { + $section_retrieved = substr($string, strpos($string, $start) + strlen($start)); + $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end)); + return $section_retrieved; + } + + return false; + } + + function stripWithDelimiters($string, $start, $end){ + while(strpos($string, $start) !== false) { + $section_to_remove = substr($string, strpos($string, $start)); + $section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end)); + $string = str_replace($section_to_remove, '', $string); + } + + return $string; + } + + function cleanArticle($article_html){ + $article_html = '<p>' . substr($article_html, strpos($article_html, '<p>') + 3); + $article_html = stripWithDelimiters($article_html, '<span class="credit">', '</span>'); + $article_html = stripWithDelimiters($article_html, '<script', '</script>'); + $article_html = stripWithDelimiters($article_html, '<div class="shortcode related-links', '</div>'); + $article_html = stripWithDelimiters($article_html, '<a class="clickToEnlarge">', '</a>'); + return $article_html; + } + + $pageUrl = self::URI . (empty($this->getInput('topic')) ? '' : 'topics/' . $this->getInput('topic') . '/'); + $html = getSimpleHTMLDOM($pageUrl) or returnServerError('Could not request CNET: ' . $pageUrl); + $limit = 0; + + foreach($html->find('div.assetBody') as $element) { + if($limit < 8) { + $article_title = trim($element->find('h2', 0)->plaintext); + $article_uri = self::URI . ($element->find('a', 0)->href); + $article_timestamp = strtotime($element->find('time.assetTime', 0)->plaintext); + $article_author = trim($element->find('a[rel=author]', 0)->plaintext); + + if(!empty($article_title) && !empty($article_uri) && strpos($article_uri, '/news/') !== false) { + $article_html = getSimpleHTMLDOM($article_uri) + or returnServerError('Could not request CNET: ' . $article_uri); + $article_content = trim( + cleanArticle( + extractFromDelimiters( + $article_html, + '<div class="articleContent', + '<footer>' + ) + ) + ); + + $item = array(); + $item['uri'] = $article_uri; + $item['title'] = $article_title; + $item['author'] = $article_author; + $item['timestamp'] = $article_timestamp; + $item['content'] = $article_content; + $this->items[] = $item; + $limit++; + } + } + } + } + + public function getName(){ + if(!is_null($this->getInput('topic'))) { + $topic = $this->getInput('topic'); + return 'CNET News Bridge' . (empty($topic) ? '' : ' - ' . $topic); + } + + return parent::getName(); + } +} diff --git a/bridges/CastorusBridge.php b/bridges/CastorusBridge.php new file mode 100644 index 0000000..3ed1331 --- /dev/null +++ b/bridges/CastorusBridge.php @@ -0,0 +1,118 @@ +<?php +class CastorusBridge extends BridgeAbstract { + const MAINTAINER = 'logmanoriginal'; + const NAME = 'Castorus Bridge'; + const URI = 'http://www.castorus.com'; + const CACHE_TIMEOUT = 600; // 10min + const DESCRIPTION = 'Returns the latest changes'; + + const PARAMETERS = array( + 'Get latest changes' => array(), + 'Get latest changes via ZIP code' => array( + 'zip' => array( + 'name' => 'ZIP code', + 'type' => 'text', + 'required' => true, + 'exampleValue' => '74910, 74', + 'title' => 'Insert ZIP code (complete or partial)' + ) + ), + 'Get latest changes via city name' => array( + 'city' => array( + 'name' => 'City name', + 'type' => 'text', + 'required' => true, + 'exampleValue' => 'Seyssel, Seys', + 'title' => 'Insert city name (complete or partial)' + ) + ) + ); + + // Extracts the title from an actitiy + private function extractActivityTitle($activity){ + $title = $activity->find('a', 0); + + if(!$title) + returnServerError('Cannot find title!'); + + return htmlspecialchars(trim($title->plaintext)); + } + + // Extracts the url from an actitiy + private function extractActivityUrl($activity){ + $url = $activity->find('a', 0); + + if(!$url) + returnServerError('Cannot find url!'); + + return self::URI . $url->href; + } + + // Extracts the time from an activity + private function extractActivityTime($activity){ + // Unfortunately the time is part of the parent node, + // so we have to clear all child nodes first + $nodes = $activity->find('*'); + + if(!$nodes) + returnServerError('Cannot find nodes!'); + + foreach($nodes as $node) { + $node->outertext = ''; + } + + return strtotime($activity->innertext); + } + + // Extracts the price change + private function extractActivityPrice($activity){ + $price = $activity->find('span', 1); + + if(!$price) + returnServerError('Cannot find price!'); + + return $price->innertext; + } + + public function collectData(){ + $zip_filter = trim($this->getInput('zip')); + $city_filter = trim($this->getInput('city')); + + $html = getSimpleHTMLDOM(self::URI); + + if(!$html) + returnServerError('Could not load data from ' . self::URI . '!'); + + $activities = $html->find('div#activite/li'); + + if(!$activities) + returnServerError('Failed to find activities!'); + + foreach($activities as $activity) { + $item = array(); + + $item['title'] = $this->extractActivityTitle($activity); + $item['uri'] = $this->extractActivityUrl($activity); + $item['timestamp'] = $this->extractActivityTime($activity); + $item['content'] = '<a href="' + . $item['uri'] + . '">' + . $item['title'] + . '</a><br><p>' + . $this->extractActivityPrice($activity) + . '</p>'; + + if(isset($zip_filter) + && !(substr($item['title'], 0, strlen($zip_filter)) === $zip_filter)) { + continue; // Skip this item + } + + if(isset($city_filter) + && !(substr($item['title'], strpos($item['title'], ' ') + 1, strlen($city_filter)) === $city_filter)) { + continue; // Skip this item + } + + $this->items[] = $item; + } + } +} diff --git a/bridges/CollegeDeFranceBridge.php b/bridges/CollegeDeFranceBridge.php new file mode 100644 index 0000000..1f81683 --- /dev/null +++ b/bridges/CollegeDeFranceBridge.php @@ -0,0 +1,84 @@ +<?php +class CollegeDeFranceBridge extends BridgeAbstract { + + const MAINTAINER = 'pit-fgfjiudghdf'; + const NAME = 'CollegeDeFrance'; + const URI = 'http://www.college-de-france.fr/'; + const CACHE_TIMEOUT = 10800; // 3h + const DESCRIPTION = 'Returns the latest audio and video from CollegeDeFrance'; + + public function collectData(){ + $months = array( + '01' => 'janv.', + '02' => 'févr.', + '03' => 'mars', + '04' => 'avr.', + '05' => 'mai', + '06' => 'juin', + '07' => 'juil.', + '08' => 'août', + '09' => 'sept.', + '10' => 'oct.', + '11' => 'nov.', + '12' => 'déc.' + ); + + // The "API" used by the site returns a list of partial HTML in this form + /* <li> + * <a href="/site/thomas-romer/guestlecturer-2016-04-15-14h30.htm" data-target="after"> + * <span class="date"><span class="list-icon list-icon-video"></span> + * <span class="list-icon list-icon-audio"></span>15 avr. 2016</span> + * <span class="lecturer">Christopher Hays</span> + * <span class='title'>Imagery of Divine Suckling in the Hebrew Bible and the Ancient Near East</span> + * </a> + * </li> + */ + $html = getSimpleHTMLDOM(self::URI + . 'components/search-audiovideo.jsp?fulltext=&siteid=1156951719600&lang=FR&type=all') + or returnServerError('Could not request CollegeDeFrance.'); + + foreach($html->find('a[data-target]') as $element) { + $item = array(); + $item['title'] = $element->find('.title', 0)->plaintext; + + // Most relative URLs contains an hour in addition to the date, so let's use it + // <a href="/site/yann-lecun/course-2016-04-08-11h00.htm" data-target="after"> + // + // Sometimes there's an __1, perhaps it signifies an update + // "/site/patrick-boucheron/seminar-2016-05-03-18h00__1.htm" + // + // But unfortunately some don't have any hours info + // <a href="/site/institut-physique/ + // The-Mysteries-of-Decoherence-Sebastien-Gleyzes-[Video-3-35].htm" data-target="after"> + $timezone = new DateTimeZone('Europe/Paris'); + + // strpos($element->href, '201') will break in 2020 but it'll + // probably break prior to then due to site changes anyway + $d = DateTime::createFromFormat( + '!Y-m-d-H\hi', + substr($element->href, strpos($element->href, '201'), 16), + $timezone + ); + + if(!$d) { + $d = DateTime::createFromFormat( + '!d m Y', + trim(str_replace( + array_values($months), + array_keys($months), + $element->find('.date', 0)->plaintext + )), + $timezone + ); + } + + $item['timestamp'] = $d->format('U'); + $item['content'] = $element->find('.lecturer', 0)->innertext + . ' - ' + . $element->find('.title', 0)->innertext; + + $item['uri'] = self::URI . $element->href; + $this->items[] = $item; + } + } +} diff --git a/bridges/CommonDreamsBridge.php b/bridges/CommonDreamsBridge.php new file mode 100644 index 0000000..e4dcb63 --- /dev/null +++ b/bridges/CommonDreamsBridge.php @@ -0,0 +1,26 @@ +<?php +class CommonDreamsBridge extends FeedExpander { + + const MAINTAINER = 'nyutag'; + const NAME = 'CommonDreams Bridge'; + const URI = 'http://www.commondreams.org/'; + const DESCRIPTION = 'Returns the newest articles.'; + + public function collectData(){ + $this->collectExpandableDatas('http://www.commondreams.org/rss.xml', 10); + } + + protected function parseItem($newsItem){ + $item = parent::parseItem($newsItem); + $item['content'] = $this->extractContent($item['uri']); + return $item; + } + + private function extractContent($url){ + $html3 = getSimpleHTMLDOMCached($url); + $text = $html3->find('div[class=field--type-text-with-summary]', 0)->innertext; + $html3->clear(); + unset ($html3); + return $text; + } +} diff --git a/bridges/CopieDoubleBridge.php b/bridges/CopieDoubleBridge.php new file mode 100644 index 0000000..767cdce --- /dev/null +++ b/bridges/CopieDoubleBridge.php @@ -0,0 +1,35 @@ +<?php +class CopieDoubleBridge extends BridgeAbstract { + + const MAINTAINER = 'superbaillot.net'; + const NAME = 'CopieDouble'; + const URI = 'http://www.copie-double.com/'; + const CACHE_TIMEOUT = 14400; // 4h + const DESCRIPTION = 'CopieDouble'; + + public function collectData(){ + $html = getSimpleHTMLDOM(self::URI) + or returnServerError('Could not request CopieDouble.'); + + $table = $html->find('table table', 2); + + foreach($table->find('tr') as $element) { + $td = $element->find('td', 0); + + if($td->class === 'couleur_1') { + $item = array(); + $title = $td->innertext; + $pos = strpos($title, '<a'); + $title = substr($title, 0, $pos); + $item['title'] = $title; + } elseif(strpos($element->innertext, '/images/suivant.gif') === false) { + $a = $element->find('a', 0); + $item['uri'] = self::URI . $a->href; + $content = str_replace('src="/', 'src="/' . self::URI, $element->find("td", 0)->innertext); + $content = str_replace('href="/', 'href="' . self::URI, $content); + $item['content'] = $content; + $this->items[] = $item; + } + } + } +} diff --git a/bridges/CourrierInternationalBridge.php b/bridges/CourrierInternationalBridge.php new file mode 100644 index 0000000..1573863 --- /dev/null +++ b/bridges/CourrierInternationalBridge.php @@ -0,0 +1,55 @@ +<?php +class CourrierInternationalBridge extends BridgeAbstract { + + const MAINTAINER = 'teromene'; + const NAME = 'Courrier International Bridge'; + const URI = 'http://CourrierInternational.com/'; + const CACHE_TIMEOUT = 300; // 5 min + const DESCRIPTION = 'Courrier International bridge'; + + public function collectData(){ + $html = getSimpleHTMLDOM(self::URI) + or returnServerError('Error.'); + + $element = $html->find("article"); + $article_count = 1; + + foreach($element as $article) { + $item = array(); + + $item['uri'] = $article->parent->getAttribute('href'); + + if(strpos($item['uri'], 'http') === false) { + $item['uri'] = self::URI . $item['uri']; + } + + $page = getSimpleHTMLDOMCached($item['uri']); + + $content = $page->find('.article-text', 0); + + if(!$content) { + $content = $page->find('.depeche-text', 0); + } + + $item['content'] = sanitize($content); + $item['title'] = strip_tags($article->find('.title', 0)); + + $dateTime = date_parse($page->find('time', 0)); + + $item['timestamp'] = mktime( + $dateTime['hour'], + $dateTime['minute'], + $dateTime['second'], + $dateTime['month'], + $dateTime['day'], + $dateTime['year'] + ); + + $this->items[] = $item; + $article_count ++; + + if($article_count > 5) + break; + } + } +} diff --git a/bridges/CpasbienBridge.php b/bridges/CpasbienBridge.php new file mode 100644 index 0000000..19efd84 --- /dev/null +++ b/bridges/CpasbienBridge.php @@ -0,0 +1,74 @@ +<?php +class CpasbienBridge extends BridgeAbstract { + + const MAINTAINER = 'lagaisse'; + const NAME = 'Cpasbien Bridge'; + const URI = 'http://www.cpasbien.cm'; + const CACHE_TIMEOUT = 86400; // 24h + const DESCRIPTION = 'Returns latest torrents from a request query'; + + const PARAMETERS = array( array( + 'q' => array( + 'name' => 'Search', + 'required' => true, + 'title' => 'Type your search' + ) + )); + + public function collectData(){ + $request = str_replace(" ", "-", trim($this->getInput('q'))); + $html = getSimpleHTMLDOM(self::URI . '/recherche/' . urlencode($request) . '.html') + or returnServerError('No results for this query.'); + + foreach($html->find('#gauche', 0)->find('div') as $episode) { + if($episode->getAttribute('class') == 'ligne0' + || $episode->getAttribute('class') == 'ligne1') { + + $urlepisode = $episode->find('a', 0)->getAttribute('href'); + $htmlepisode = getSimpleHTMLDOMCached($urlepisode, 86400 * 366 * 30); + + $item = array(); + $item['author'] = $episode->find('a', 0)->text(); + $item['title'] = $episode->find('a', 0)->text(); + $item['pubdate'] = $this->getCachedDate($urlepisode); + $textefiche = $htmlepisode->find('#textefiche', 0)->find('p', 1); + + if(isset($textefiche)) { + $item['content'] = $textefiche->text(); + } else { + $p = $htmlepisode->find('#textefiche', 0)->find('p'); + if(!empty($p)) { + $item['content'] = $htmlepisode->find('#textefiche', 0)->find('p', 0)->text(); + } + } + + $item['id'] = $episode->find('a', 0)->getAttribute('href'); + $item['uri'] = self::URI . $htmlepisode->find('#telecharger', 0)->getAttribute('href'); + $this->items[] = $item; + } + } + } + + public function getName(){ + if(!is_null($this->getInput('q'))) { + return $this->getInput('q') . ' : ' . self::NAME; + } + + return parent::getName(); + } + + private function getCachedDate($url){ + debugMessage('getting pubdate from url ' . $url . ''); + + // Initialize cache + $cache = Cache::create('FileCache'); + $cache->setPath(CACHE_DIR . '/pages'); + + $params = [$url]; + $cache->setParameters($params); + + // Get cachefile timestamp + $time = $cache->getTime(); + return ($time !== false ? $time : time()); + } +} diff --git a/bridges/CryptomeBridge.php b/bridges/CryptomeBridge.php new file mode 100644 index 0000000..8a3936f --- /dev/null +++ b/bridges/CryptomeBridge.php @@ -0,0 +1,45 @@ +<?php +class CryptomeBridge extends BridgeAbstract { + + const MAINTAINER = 'BoboTiG'; + const NAME = 'Cryptome'; + const URI = 'https://cryptome.org/'; + const CACHE_TIMEOUT = 21600; //6h + const DESCRIPTION = 'Returns the N most recent documents.'; + + const PARAMETERS = array( array( + 'n' => array( + 'name' => 'number of elements', + 'type' => 'number', + 'defaultValue' => 20, + 'exampleValue' => 10 + ) + )); + + public function collectData(){ + $html = getSimpleHTMLDOM(self::URI) + or returnServerError('Could not request Cryptome.'); + + $number = $this->getInput('n'); + + /* number of documents */ + if(!empty($number)) { + $num = min($number, 20); + } + + foreach($html->find('pre') as $element) { + for($i = 0; $i < $num; ++$i) { + $item = array(); + $item['uri'] = self::URI . substr($element->find('a', $i)->href, 20); + $item['title'] = substr($element->find('b', $i)->plaintext, 22); + $item['content'] = preg_replace( + '#http://cryptome.org/#', + self::URI, + $element->find('b', $i)->innertext + ); + $this->items[] = $item; + } + break; + } + } +} diff --git a/bridges/DailymotionBridge.php b/bridges/DailymotionBridge.php new file mode 100644 index 0000000..d075041 --- /dev/null +++ b/bridges/DailymotionBridge.php @@ -0,0 +1,123 @@ +<?php +class DailymotionBridge extends BridgeAbstract { + + const MAINTAINER = 'mitsukarenai'; + const NAME = 'Dailymotion Bridge'; + const URI = 'https://www.dailymotion.com/'; + const CACHE_TIMEOUT = 10800; // 3h + const DESCRIPTION = 'Returns the 5 newest videos by username/playlist or search'; + + const PARAMETERS = array ( + 'By username' => array( + 'u' => array( + 'name' => 'username', + 'required' => true + ) + ), + 'By playlist id' => array( + 'p' => array( + 'name' => 'playlist id', + 'required' => true + ) + ), + 'From search results' => array( + 's' => array( + 'name' => 'Search keyword', + 'required' => true + ), + 'pa' => array( + 'name' => 'Page', + 'type' => 'number' + ) + ) + ); + + protected function getMetadata($id){ + $metadata = array(); + $html2 = getSimpleHTMLDOM(self::URI . 'video/' . $id); + if(!$html2) { + return $metadata; + } + + $metadata['title'] = $html2->find('meta[property=og:title]', 0)->getAttribute('content'); + $metadata['timestamp'] = strtotime( + $html2->find('meta[property=video:release_date]', 0)->getAttribute('content') + ); + $metadata['thumbnailUri'] = $html2->find('meta[property=og:image]', 0)->getAttribute('content'); + $metadata['uri'] = $html2->find('meta[property=og:url]', 0)->getAttribute('content'); + return $metadata; + } + + public function collectData(){ + $html = ''; + $limit = 5; + $count = 0; + + $html = getSimpleHTMLDOM($this->getURI()) + or returnServerError('Could not request Dailymotion.'); + + foreach($html->find('div.media a.preview_link') as $element) { + if($count < $limit) { + $item = array(); + $item['id'] = str_replace('/video/', '', strtok($element->href, '_')); + $metadata = $this->getMetadata($item['id']); + if(empty($metadata)) { + continue; + } + $item['uri'] = $metadata['uri']; + $item['title'] = $metadata['title']; + $item['timestamp'] = $metadata['timestamp']; + + $item['content'] = '<a href="' + . $item['uri'] + . '"><img src="' + . $metadata['thumbnailUri'] + . '" /></a><br><a href="' + . $item['uri'] + . '">' + . $item['title'] + . '</a>'; + + $this->items[] = $item; + $count++; + } + } + } + + public function getName(){ + switch($this->queriedContext) { + case 'By username': + $specific = $this->getInput('u'); + break; + case 'By playlist id': + $specific = strtok($this->getInput('p'), '_'); + break; + case 'From search results': + $specific = $this->getInput('s'); + break; + default: return parent::getName(); + } + + return $specific . ' : Dailymotion Bridge'; + } + + public function getURI(){ + $uri = self::URI; + switch($this->queriedContext) { + case 'By username': + $uri .= 'user/' . urlencode($this->getInput('u')) . '/1'; + break; + case 'By playlist id': + $uri .= 'playlist/' . urlencode(strtok($this->getInput('p'), '_')); + break; + case 'From search results': + $uri .= 'search/' . urlencode($this->getInput('s')); + if($this->getInput('pa')) { + $uri .= '/' . $this->getInput('pa'); + } + break; + default: return parent::getURI(); + } + return $uri; + } +} diff --git a/bridges/DanbooruBridge.php b/bridges/DanbooruBridge.php new file mode 100644 index 0000000..f2cddf4 --- /dev/null +++ b/bridges/DanbooruBridge.php @@ -0,0 +1,62 @@ +<?php +class DanbooruBridge extends BridgeAbstract { + + const MAINTAINER = 'mitsukarenai'; + const NAME = 'Danbooru'; + const URI = 'http://donmai.us/'; + const CACHE_TIMEOUT = 1800; // 30min + const DESCRIPTION = 'Returns images from given page'; + + const PARAMETERS = array( + 'global' => array( + 'p' => array( + 'name' => 'page', + 'defaultValue' => 1, + 'type' => 'number' + ), + 't' => array( + 'name' => 'tags' + ) + ), + 0 => array() + ); + + const PATHTODATA = 'article'; + const IDATTRIBUTE = 'data-id'; + + protected function getFullURI(){ + return $this->getURI() + . 'posts?&page=' . $this->getInput('p') + . '&tags=' . urlencode($this->getInput('t')); + } + + protected function getItemFromElement($element){ + // Fix links + defaultLinkTo($element, $this->getURI()); + + $item = array(); + $item['uri'] = $element->find('a', 0)->href; + $item['postid'] = (int)preg_replace("/[^0-9]/", '', $element->getAttribute(static::IDATTRIBUTE)); + $item['timestamp'] = time(); + $thumbnailUri = $element->find('img', 0)->src; + $item['tags'] = $element->find('img', 0)->getAttribute('alt'); + $item['title'] = $this->getName() . ' | ' . $item['postid']; + $item['content'] = '<a href="' + . $item['uri'] + . '"><img src="' + . $thumbnailUri + . '" /></a><br>Tags: ' + . $item['tags']; + + return $item; + } + + public function collectData(){ + $html = getSimpleHTMLDOM($this->getFullURI()) + or returnServerError('Could not request ' . $this->getName()); + + foreach($html->find(static::PATHTODATA) as $element) { + $this->items[] = $this->getItemFromElement($element); + } + } +} diff --git a/bridges/DansTonChatBridge.php b/bridges/DansTonChatBridge.php new file mode 100644 index 0000000..545f162 --- /dev/null +++ b/bridges/DansTonChatBridge.php @@ -0,0 +1,23 @@ +<?php +class DansTonChatBridge extends BridgeAbstract { + + const MAINTAINER = 'Astalaseven'; + const NAME = 'DansTonChat Bridge'; + const URI = 'https://danstonchat.com/'; + const CACHE_TIMEOUT = 21600; //6h + const DESCRIPTION = 'Returns latest quotes from DansTonChat.'; + + public function collectData(){ + + $html = getSimpleHTMLDOM(self::URI . 'latest.html') + or returnServerError('Could not request DansTonChat.'); + + foreach($html->find('div.item') as $element) { + $item = array(); + $item['uri'] = $element->find('a', 0)->href; + $item['title'] = 'DansTonChat ' . $element->find('a', 1)->plaintext; + $item['content'] = $element->find('a', 0)->innertext; + $this->items[] = $item; + } + } +} diff --git a/bridges/DauphineLibereBridge.php b/bridges/DauphineLibereBridge.php new file mode 100644 index 0000000..7547d74 --- /dev/null +++ b/bridges/DauphineLibereBridge.php @@ -0,0 +1,56 @@ +<?php +class DauphineLibereBridge extends FeedExpander { + + const MAINTAINER = 'qwertygc'; + const NAME = 'Dauphine Bridge'; + const URI = 'http://www.ledauphine.com/'; + const CACHE_TIMEOUT = 7200; // 2h + const DESCRIPTION = 'Returns the newest articles.'; + + const PARAMETERS = array( array( + 'u' => array( + 'name' => 'Catégorie de l\'article', + 'type' => 'list', + 'values' => array( + 'À la une' => '', + 'France Monde' => 'france-monde', + 'Faits Divers' => 'faits-divers', + 'Économie et Finance' => 'economie-et-finance', + 'Politique' => 'politique', + 'Sport' => 'sport', + 'Ain' => 'ain', + 'Alpes-de-Haute-Provence' => 'haute-provence', + 'Hautes-Alpes' => 'hautes-alpes', + 'Ardèche' => 'ardeche', + 'Drôme' => 'drome', + 'Isère Sud' => 'isere-sud', + 'Savoie' => 'savoie', + 'Haute-Savoie' => 'haute-savoie', + 'Vaucluse' => 'vaucluse' + ) + ) + )); + + public function collectData(){ + $url = self::URI . 'rss'; + + if(empty($this->getInput('u'))) { + $url = self::URI . $this->getInput('u') . '/rss'; + } + + $this->collectExpandableDatas($url, 10); + } + + protected function parseItem($newsItem){ + $item = parent::parseItem($newsItem); + $item['content'] = $this->extractContent($item['uri']); + return $item; + } + + private function extractContent($url){ + $html2 = getSimpleHTMLDOMCached($url); + $text = $html2->find('div.column', 0)->innertext; + $text = preg_replace('@<script[^>]*?>.*?</script>@si', '', $text); + return $text; + } +} diff --git a/bridges/DemoBridge.php b/bridges/DemoBridge.php new file mode 100644 index 0000000..ea2088e --- /dev/null +++ b/bridges/DemoBridge.php @@ -0,0 +1,46 @@ +<?php +class DemoBridge extends BridgeAbstract { + + const MAINTAINER = 'teromene'; + const NAME = 'DemoBridge'; + const URI = 'http://github.com/rss-bridge/rss-bridge'; + const DESCRIPTION = 'Bridge used for demos'; + + const PARAMETERS = array( + 'testCheckbox' => array( + 'testCheckbox' => array( + 'type' => 'checkbox', + 'name' => 'test des checkbox' + ) + ), + 'testList' => array( + 'testList' => array( + 'type' => 'list', + 'name' => 'test des listes', + 'values' => array( + 'Test' => 'test', + 'Test 2' => 'test2' + ) + ) + ), + 'testNumber' => array( + 'testNumber' => array( + 'type' => 'number', + 'name' => 'test des numéros', + 'exampleValue' => '1515632' + ) + ) + ); + + public function collectData(){ + + $item = array(); + $item['author'] = "Me!"; + $item['title'] = "Test"; + $item['content'] = "Awesome content !"; + $item['id'] = "Lalala"; + $item['uri'] = "http://example.com/test"; + + $this->items[] = $item; + } +} diff --git a/bridges/DeveloppezDotComBridge.php b/bridges/DeveloppezDotComBridge.php new file mode 100644 index 0000000..5719cf3 --- /dev/null +++ b/bridges/DeveloppezDotComBridge.php @@ -0,0 +1,47 @@ +<?php +class DeveloppezDotComBridge extends FeedExpander { + + const MAINTAINER = 'polopollo'; + const NAME = 'Developpez.com Actus (FR)'; + const URI = 'https://www.developpez.com/'; + const CACHE_TIMEOUT = 1800; // 30min + const DESCRIPTION = 'Returns the 15 newest posts from DeveloppezDotCom (full text).'; + + public function collectData(){ + $this->collectExpandableDatas(self::URI . 'index/rss', 15); + } + + protected function parseItem($newsItem){ + $item = parent::parseItem($newsItem); + $item['content'] = $this->extractContent($item['uri']); + return $item; + } + + // F***ing quotes from Microsoft Word badly encoded, here was the trick: + // http://stackoverflow.com/questions/1262038/how-to-replace-microsoft-encoded-quotes-in-php + private function convertSmartQuotes($string) + { + $search = array(chr(145), + chr(146), + chr(147), + chr(148), + chr(151)); + + $replace = array( + "'", + "'", + '"', + '"', + '-' + ); + + return str_replace($search, $replace, $string); + } + + private function extractContent($url){ + $articleHTMLContent = getSimpleHTMLDOMCached($url); + $text = $this->convertSmartQuotes($articleHTMLContent->find('div.content', 0)->innertext); + $text = utf8_encode($text); + return trim($text); + } +} diff --git a/bridges/DiceBridge.php b/bridges/DiceBridge.php new file mode 100644 index 0000000..dc6ea15 --- /dev/null +++ b/bridges/DiceBridge.php @@ -0,0 +1,120 @@ +<?php +class DiceBridge extends BridgeAbstract { + + const MAINTAINER = 'rogerdc'; + const NAME = 'Dice Unofficial RSS'; + const URI = 'https://www.dice.com/'; + const DESCRIPTION = 'The Unofficial Dice RSS'; + // const CACHE_TIMEOUT = 86400; // 1 day + + const PARAMETERS = array(array( + 'for_one' => array( + 'name' => 'With at least one of the words', + 'required' => false, + ), + 'for_all' => array( + 'name' => 'With all of the words', + 'required' => false, + ), + 'for_exact' => array( + 'name' => 'With the exact phrase', + 'required' => false, + ), + 'for_none' => array( + 'name' => 'With none of these words', + 'required' => false, + ), + 'for_jt' => array( + 'name' => 'Within job title', + 'required' => false, + ), + 'for_com' => array( + 'name' => 'Within company name', + 'required' => false, + ), + 'for_loc' => array( + 'name' => 'City, State, or ZIP code', + 'required' => false, + ), + 'radius' => array( + 'name' => 'Radius in miles', + 'type' => 'list', + 'required' => false, + 'values' => array( + 'Exact Location' => 'El', + 'Within 5 miles' => '5', + 'Within 10 miles' => '10', + 'Within 20 miles' => '20', + 'Within 30 miles' => '0', + 'Within 40 miles' => '40', + 'Within 50 miles' => '50', + 'Within 75 miles' => '75', + 'Within 100 miles' => '100', + ), + 'defaultValue' => '0', + ), + 'jtype' => array( + 'name' => 'Job type', + 'type' => 'list', + 'required' => false, + 'values' => array( + 'Full-Time' => 'Full Time', + 'Part-Time' => 'Part Time', + 'Contract - Independent' => 'Contract Independent', + 'Contract - W2' => 'Contract W2', + 'Contract to Hire - Independent' => 'C2H Independent', + 'Contract to Hire - W2' => 'C2H W2', + 'Third Party - Contract - Corp-to-Corp' => 'Contract Corp-To-Corp', + 'Third Party - Contract to Hire - Corp-to-Corp' => 'C2H Corp-To-Corp', + ), + 'defaultValue' => 'Full Time', + ), + 'telecommute' => array( + 'name' => 'Telecommute', + 'type' => 'checkbox', + ), + )); + + public function collectData() { + $uri = 'https://www.dice.com/jobs/advancedResult.html'; + $uri .= '?for_one=' . urlencode($this->getInput('for_one')); + $uri .= '&for_all=' . urlencode($this->getInput('for_all')); + $uri .= '&for_exact=' . urlencode($this->getInput('for_exact')); + $uri .= '&for_none=' . urlencode($this->getInput('for_none')); + $uri .= '&for_jt=' . urlencode($this->getInput('for_jt')); + $uri .= '&for_com=' . urlencode($this->getInput('for_com')); + $uri .= '&for_loc=' . urlencode($this->getInput('for_loc')); + if ($this->getInput('jtype')) { + $uri .= '&jtype=' . urlencode($this->getInput('jtype')); + } + $uri .= '&sort=date&limit=100'; + $uri .= '&radius=' . urlencode($this->getInput('radius')); + if ($this->getInput('telecommute')) { + $uri .= '&telecommute=true'; + } + + $html = getSimpleHTMLDOM($uri) + or returnServerError('Could not request Dice.'); + foreach($html->find('div.complete-serp-result-div') as $element) { + $item = array(); + // Title + $masterLink = $element->find('a[id^=position]', 0); + $item['title'] = $masterLink->title; + // URL + $uri = $masterLink->href; + // $uri = substr($uri, 0, strrpos($uri, '?')); + $item['uri'] = substr($uri, 0, strrpos($uri, '?')); + // ID + $item['id'] = $masterLink->value; + // Image + $image = $element->find('img', 0); + if ($image) + $item['image'] = $image->getAttribute('src'); + // Content + $shortdesc = $element->find('.shortdesc', '0'); + $shortdesc = ($shortdesc) ? $shortdesc->innertext : ''; + $item['content'] = $shortdesc; + $this->items[] = $item; + } + } +} diff --git a/bridges/DilbertBridge.php b/bridges/DilbertBridge.php new file mode 100644 index 0000000..959a91a --- /dev/null +++ b/bridges/DilbertBridge.php @@ -0,0 +1,36 @@ +<?php +class DilbertBridge extends BridgeAbstract { + + const MAINTAINER = 'kranack'; + const NAME = 'Dilbert Daily Strip'; + const URI = 'http://dilbert.com'; + const CACHE_TIMEOUT = 21600; // 6h + const DESCRIPTION = 'The Unofficial Dilbert Daily Comic Strip'; + + public function collectData(){ + + $html = getSimpleHTMLDOM($this->getURI()) + or returnServerError('Could not request Dilbert: ' . $this->getURI()); + + foreach($html->find('section.comic-item') as $element) { + + $img = $element->find('img', 0); + $link = $element->find('a', 0); + $comic = $img->src; + $title = $link->alt; + $url = $link->href; + $date = substr($url, 25); + if (empty($title)) + $title = 'Dilbert Comic Strip on ' . $date; + $date = strtotime($date); + + $item = array(); + $item['uri'] = $url; + $item['title'] = $title; + $item['author'] = 'Scott Adams'; + $item['timestamp'] = $date; + $item['content'] = '<img src="' . $comic . '" alt="' . $img->alt . '" />'; + $this->items[] = $item; + } + } +} diff --git a/bridges/DollbooruBridge.php b/bridges/DollbooruBridge.php new file mode 100644 index 0000000..5ed4119 --- /dev/null +++ b/bridges/DollbooruBridge.php @@ -0,0 +1,9 @@ +<?php +require_once('Shimmie2Bridge.php'); + +class DollbooruBridge extends Shimmie2Bridge { + const MAINTAINER = 'mitsukarenai'; + const NAME = 'Dollbooru'; + const URI = 'http://dollbooru.org/'; + const DESCRIPTION = 'Returns images from given page'; +} diff --git a/bridges/DuckDuckGoBridge.php b/bridges/DuckDuckGoBridge.php new file mode 100644 index 0000000..8533be5 --- /dev/null +++ b/bridges/DuckDuckGoBridge.php @@ -0,0 +1,42 @@ +<?php +class DuckDuckGoBridge extends BridgeAbstract { + + const MAINTAINER = 'Astalaseven'; + const NAME = 'DuckDuckGo'; + const URI = 'https://duckduckgo.com/'; + const CACHE_TIMEOUT = 21600; // 6h + const DESCRIPTION = 'Returns results from DuckDuckGo.'; + + const SORT_DATE = '+sort:date'; + const SORT_RELEVANCE = ''; + + const PARAMETERS = array( array( + 'u' => array( + 'name' => 'keyword', + 'required' => true + ), + 'sort' => array( + 'name' => 'sort by', + 'type' => 'list', + 'required' => false, + 'values' => array( + 'date' => self::SORT_DATE, + 'relevance' => self::SORT_RELEVANCE + ), + 'defaultValue' => self::SORT_DATE + ) + )); + + public function collectData(){ + $html = getSimpleHTMLDOM(self::URI . 'html/?kd=-1&q=' . $this->getInput('u') . $this->getInput('sort')) + or returnServerError('Could not request DuckDuckGo.'); + + foreach($html->find('div.results_links') as $element) { + $item = array(); + $item['uri'] = $element->find('a', 0)->href; + $item['title'] = $element->find('a', 1)->innertext; + $item['content'] = $element->find('div.snippet', 0)->plaintext; + $this->items[] = $item; + } + } +} diff --git a/bridges/EZTVBridge.php b/bridges/EZTVBridge.php new file mode 100644 index 0000000..4fb9e57 --- /dev/null +++ b/bridges/EZTVBridge.php @@ -0,0 +1,67 @@ +<?php +class EZTVBridge extends BridgeAbstract { + + const MAINTAINER = "alexAubin"; + const NAME = 'EZTV'; + const URI = 'https://eztv.ch/'; + const DESCRIPTION = 'Returns list of *recent* torrents for a specific show +on EZTV. Get showID from URLs in https://eztv.ch/shows/showID/show-full-name.'; + + const PARAMETERS = array( array( + 'i' => array( + 'name' => 'Show ids', + 'exampleValue' => 'showID1,showID2,…', + 'required' => true + ) + )); + + public function collectData(){ + + // Make timestamp from relative released time in table + function makeTimestamp($relativeReleaseTime){ + + $relativeDays = 0; + $relativeHours = 0; + + foreach(explode(" ", $relativeReleaseTime) as $relativeTimeElement) { + if(substr($relativeTimeElement, -1) == "d") $relativeDays = substr($relativeTimeElement, 0, -1); + if(substr($relativeTimeElement, -1) == "h") $relativeHours = substr($relativeTimeElement, 0, -1); + } + return mktime(date('h') - $relativeHours, 0, 0, date('m'), date('d') - $relativeDays, date('Y')); + } + + // Loop on show ids + $showList = explode(",", $this->getInput('i')); + foreach($showList as $showID) { + + // Get show page + $html = getSimpleHTMLDOM(self::URI . 'shows/' . rawurlencode($showID) . '/') + or returnServerError('Could not request EZTV for id "' . $showID . '"'); + + // Loop on each element that look like an episode entry... + foreach($html->find('.forum_header_border') as $element) { + + // Filter entries that are not episode entries + $ep = $element->find('td', 1); + if(empty($ep)) continue; + $epinfo = $ep->find('.epinfo', 0); + $released = $element->find('td', 3); + if(empty($epinfo)) continue; + if(empty($released->plaintext)) continue; + + // Filter entries that are older than 1 week + if($released->plaintext == '>1 week') continue; + + // Fill item + $item = array(); + $item['uri'] = self::URI . $epinfo->href; + $item['id'] = $item['uri']; + $item['timestamp'] = makeTimestamp($released->plaintext); + $item['title'] = $epinfo->plaintext; + $item['content'] = $epinfo->alt; + if(isset($item['title'])) + $this->items[] = $item; + } + } + } +} diff --git a/bridges/EliteDangerousGalnetBridge.php b/bridges/EliteDangerousGalnetBridge.php new file mode 100644 index 0000000..86a1bbf --- /dev/null +++ b/bridges/EliteDangerousGalnetBridge.php @@ -0,0 +1,35 @@ +<?php +class EliteDangerousGalnetBridge extends BridgeAbstract { + + const MAINTAINER = 'corenting'; + const NAME = 'Elite: Dangerous Galnet'; + const URI = 'https://community.elitedangerous.com/galnet/'; + const CACHE_TIMEOUT = 7200; // 2h + const DESCRIPTION = 'Returns the latest page of news from Galnet'; + + public function collectData(){ + $html = getSimpleHTMLDOM(self::URI) + or returnServerError('Error while downloading the website content'); + + foreach($html->find('div.article') as $element) { + $item = array(); + + $uri = $element->find('h3 a', 0)->href; + $uri = self::URI . substr($uri, strlen('/galnet/')); + $item['uri'] = $uri; + + $title = $element->find('h3 a', 0)->plaintext; + $item['title'] = substr($title, 1); //remove the space between icon and title + + $content = $element->find('p', -1)->innertext; + $item['content'] = $content; + + $date = $element->find('p.small', 0)->innertext; + $article_year = substr($date, -4) - 1286; //Convert E:D date to actual date + $date = substr($date, 0, -4) . $article_year; + $item['timestamp'] = strtotime($date); + + $this->items[] = $item; + } + } +} diff --git a/bridges/ElsevierBridge.php b/bridges/ElsevierBridge.php new file mode 100644 index 0000000..f6ba7dd --- /dev/null +++ b/bridges/ElsevierBridge.php @@ -0,0 +1,75 @@ +<?php +class ElsevierBridge extends BridgeAbstract { + + const MAINTAINER = 'Pierre Mazière'; + const NAME = 'Elsevier journals recent articles'; + const URI = 'http://www.journals.elsevier.com/'; + const CACHE_TIMEOUT = 43200; //12h + const DESCRIPTION = 'Returns the recent articles published in Elsevier journals'; + + const PARAMETERS = array( array( + 'j' => array( + 'name' => 'Journal name', + 'required' => true, + 'exampleValue' => 'academic-pediactrics', + 'title' => 'Insert html-part of your journal' + ) + )); + + // Extracts the list of names from an article as string + private function extractArticleName($article){ + $names = $article->find('small', 0); + if($names) + return trim($names->plaintext); + return ''; + } + + // Extracts the timestamp from an article + private function extractArticleTimestamp($article){ + $time = $article->find('.article-info', 0); + if($time) { + $timestring = trim($time->plaintext); + /* + The format depends on the age of an article: + - Available online 29 July 2016 + - July 2016 + - May–June 2016 + */ + if(preg_match('/\S*(\d+\s\S+\s\d{4})/ims', $timestring, $matches)) { + return strtotime($matches[0]); + } elseif (preg_match('/[A-Za-z]+\-([A-Za-z]+\s\d{4})/ims', $timestring, $matches)) { + return strtotime($matches[0]); + } elseif (preg_match('/([A-Za-z]+\s\d{4})/ims', $timestring, $matches)) { + return strtotime($matches[0]); + } else { + return 0; + } + } + return 0; + } + + // Extracts the content from an article + private function extractArticleContent($article){ + $content = $article->find('.article-content', 0); + if($content) { + return trim($content->plaintext); + } + return ''; + } + + public function collectData(){ + $uri = self::URI . $this->getInput('j') . '/recent-articles/'; + $html = getSimpleHTMLDOM($uri) + or returnServerError('No results for Elsevier journal ' . $this->getInput('j')); + + foreach($html->find('.pod-listing') as $article) { + $item = array(); + $item['uri'] = $article->find('.pod-listing-header>a', 0)->getAttribute('href') . '?np=y'; + $item['title'] = $article->find('.pod-listing-header>a', 0)->plaintext; + $item['author'] = $this->extractArticleName($article); + $item['timestamp'] = $this->extractArticleTimestamp($article); + $item['content'] = $this->extractArticleContent($article); + $this->items[] = $item; + } + } +} diff --git a/bridges/EstCeQuonMetEnProdBridge.php b/bridges/EstCeQuonMetEnProdBridge.php new file mode 100644 index 0000000..db9d1d5 --- /dev/null +++ b/bridges/EstCeQuonMetEnProdBridge.php @@ -0,0 +1,37 @@ +<?php +class EstCeQuonMetEnProdBridge extends BridgeAbstract { + + const MAINTAINER = 'ORelio'; + const NAME = 'Est-ce qu\'on met en prod aujourd\'hui ?'; + const URI = 'https://www.estcequonmetenprodaujourdhui.info/'; + const CACHE_TIMEOUT = 21600; // 6h + const DESCRIPTION = 'Should we put a website in production today? (French)'; + + public function collectData(){ + function extractFromDelimiters($string, $start, $end){ + if(strpos($string, $start) !== false) { + $section_retrieved = substr($string, strpos($string, $start) + strlen($start)); + $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end)); + return $section_retrieved; + } + + return false; + } + + $html = getSimpleHTMLDOM($this->getURI()) + or returnServerError('Could not request EstCeQuonMetEnProd: ' . $this->getURI()); + + $item = array(); + $item['uri'] = $this->getURI() . '#' . date('Y-m-d'); + $item['title'] = $this->getName(); + $item['author'] = 'Nicolas Hoffmann'; + $item['timestamp'] = strtotime('today midnight'); + $item['content'] = str_replace( + 'src="/', + 'src="' . $this->getURI(), + trim(extractFromDelimiters($html->outertext, '<body role="document">', '<br /><br />')) + ); + + $this->items[] = $item; + } +} diff --git a/bridges/EtsyBridge.php b/bridges/EtsyBridge.php new file mode 100644 index 0000000..311d910 --- /dev/null +++ b/bridges/EtsyBridge.php @@ -0,0 +1,83 @@ +<?php +class EtsyBridge extends BridgeAbstract { + + const NAME = 'Etsy search'; + const URI = 'https://www.etsy.com'; + const DESCRIPTION = 'Returns feeds for search results'; + const MAINTAINER = 'logmanoriginal'; + const PARAMETERS = array( + array( + 'query' => array( + 'name' => 'Search query', + 'type' => 'text', + 'required' => true, + 'title' => 'Insert your search term here', + 'exampleValue' => 'Enter your search term' + ), + 'queryextension' => array( + 'name' => 'Query extension', + 'type' => 'text', + 'requied' => false, + 'title' => 'Insert additional query parts here +(anything after ?search=<your search query>)', + 'exampleValue' => '&explicit=1&locationQuery=2921044' + ), + 'showimage' => array( + 'name' => 'Show image in content', + 'type' => 'checkbox', + 'requrired' => false, + 'title' => 'Activate to show the image in the content', + 'defaultValue' => false + ) + ) + ); + + public function collectData(){ + $html = getSimpleHTMLDOM($this->getURI()) + or returnServerError('Failed to receive ' . $this->getURI()); + + $results = $html->find('div.block-grid-item'); + + foreach($results as $result) { + // Skip banner cards (ads for categories) + if($result->find('a.banner-card')) + continue; + + $item = array(); + + $item['title'] = $result->find('a', 0)->title; + $item['uri'] = $result->find('a', 0)->href; + $item['author'] = $result->find('div.card-shop-name', 0)->plaintext; + + $item['content'] = '<p>' + . $result->find('div.card-price', 0)->plaintext + . '</p><p>' + . $result->find('div.card-title', 0)->plaintext + . '</p>'; + + $image = $result->find('img.placeholder', 0)->src; + + if($this->getInput('showimage')) { + $item['content'] .= '<img src="' . $image . '">'; + } + + $item['enclosures'] = array($image); + + $this->items[] = $item; + } + } + + public function getURI(){ + if(!is_null($this->getInput('query'))) { + $uri = self::URI . '/search?q=' . urlencode($this->getInput('query')); + + if(!is_null($this->getInput('queryextension'))) { + $uri .= $this->getInput('queryextension'); + } + + return $uri; + } + + return parent::getURI(); + } +} diff --git a/bridges/FB2Bridge.php b/bridges/FB2Bridge.php new file mode 100644 index 0000000..7d78b87 --- /dev/null +++ b/bridges/FB2Bridge.php @@ -0,0 +1,281 @@ +<?php +class FB2Bridge extends BridgeAbstract { + + const MAINTAINER = 'teromene'; + const NAME = 'Facebook Alternate'; + const URI = 'https://www.facebook.com/'; + const CACHE_TIMEOUT = 1000; + const DESCRIPTION = 'Input a page title or a profile log. For a profile log, + please insert the parameter as follow : myExamplePage/132621766841117'; + + const PARAMETERS = array( array( + 'u' => array( + 'name' => 'Username', + 'required' => true + ) + )); + + public function collectData(){ + + function extractFromDelimiters($string, $start, $end){ + if(strpos($string, $start) !== false) { + $section_retrieved = substr($string, strpos($string, $start) + strlen($start)); + $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end)); + return $section_retrieved; + } + + return false; + } + + //Utility function for cleaning a Facebook link + $unescape_fb_link = function($matches){ + if(is_array($matches) && count($matches) > 1) { + $link = $matches[1]; + if(strpos($link, '/') === 0) + $link = self::URI . $link . '"'; + if(strpos($link, 'facebook.com/l.php?u=') !== false) + $link = urldecode(extractFromDelimiters($link, 'facebook.com/l.php?u=', '&')); + return ' href="' . $link . '"'; + } + }; + + //Utility function for converting facebook emoticons + $unescape_fb_emote = function($matches){ + static $facebook_emoticons = array( + 'smile' => ':)', + 'frown' => ':(', + 'tongue' => ':P', + 'grin' => ':D', + 'gasp' => ':O', + 'wink' => ';)', + 'pacman' => ':<', + 'grumpy' => '>_<', + 'unsure' => ':/', + 'cry' => ':\'(', + 'kiki' => '^_^', + 'glasses' => '8-)', + 'sunglasses' => 'B-)', + 'heart' => '<3', + 'devil' => ']:D', + 'angel' => '0:)', + 'squint' => '-_-', + 'confused' => 'o_O', + 'upset' => 'xD', + 'colonthree' => ':3', + 'like' => '👍'); + $len = count($matches); + if ($len > 1) + for ($i = 1; $i < $len; $i++) + foreach ($facebook_emoticons as $name => $emote) + if ($matches[$i] === $name) + return $emote; + return $matches[0]; + }; + + if($this->getInput('u') !== null) { + $page = 'https://touch.facebook.com/' . $this->getInput('u'); + $cookies = $this->getCookies($page); + $pageID = $this->getPageID($page, $cookies); + + if($pageID === null) { + echo <<<EOD +Unable to get the page id. You should consider getting the ID by hand, then importing it into FB2Bridge +EOD; + die(); + } elseif($pageID == -1) { + echo <<<EOD +This page is not accessible without being logged in. +EOD; + die(); + } + } + + //Build the string for the first request + $requestString = 'https://touch.facebook.com/pages_reaction_units/more/?page_id=' + . $pageID + . '&cursor={"card_id"%3A"videos"%2C"has_next_page"%3Atrue}&surface=mobile_page_home&unit_count=8'; + + $fileContent = file_get_contents($requestString); + + $articleIndex = 0; + $maxArticle = 3; + + $html = $this->buildContent($fileContent); + $author = $this->getInput('u'); + + foreach($html->find("article") as $content) { + + $item = array(); + + $item['uri'] = "http://touch.facebook.com" + . $content->find("div[class='_52jc _5qc4 _24u0 _36xo']", 0)->find("a", 0)->getAttribute("href"); + + if($content->find("header", 0) !== null) { + $content->find("header", 0)->innertext = ""; + } + + if($content->find("footer", 0) !== null) { + $content->find("footer", 0)->innertext = ""; + } + + //Remove html nodes, keep only img, links, basic formatting + $content = strip_tags($content, '<a><img><i><u><br><p>'); + + //Adapt link hrefs: convert relative links into absolute links and bypass external link redirection + $content = preg_replace_callback('/ href=\"([^"]+)\"/i', $unescape_fb_link, $content); + + //Clean useless html tag properties and fix link closing tags + foreach (array( + 'onmouseover', + 'onclick', + 'target', + 'ajaxify', + 'tabindex', + 'class', + 'style', + 'data-[^=]*', + 'aria-[^=]*', + 'role', + 'rel', + 'id') as $property_name) + $content = preg_replace('/ ' . $property_name . '=\"[^"]*\"/i', '', $content); + $content = preg_replace('/<\/a [^>]+>/i', '</a>', $content); + + //Convert textual representation of emoticons eg + // "<i><u>smile emoticon</u></i>" back to ASCII emoticons eg ":)" + $content = preg_replace_callback('/<i><u>([^ <>]+) ([^<>]+)<\/u><\/i>/i', $unescape_fb_emote, $content); + + $item['content'] = $content; + + $title = $author; + if (strlen($title) > 24) + $title = substr($title, 0, strpos(wordwrap($title, 24), "\n")) . '...'; + $title = $title . ' | ' . strip_tags($content); + if (strlen($title) > 64) + $title = substr($title, 0, strpos(wordwrap($title, 64), "\n")) . '...'; + + $item['title'] = $title; + $item['author'] = $author; + + array_push($this->items, $item); + } + } + + + // Currently not used. Is used to get more than only 3 elements, as they appear on another page. + private function computeNextLink($string, $pageID){ + + $regex = implode( + '', + array( + "/timeline_unit", + "\\\\\\\\u00253A1", + "\\\\\\\\u00253A([0-9]*)", + "\\\\\\\\u00253A([0-9]*)", + "\\\\\\\\u00253A([0-9]*)", + "\\\\\\\\u00253A([0-9]*)/" + ) + ); + + preg_match($regex, $string, $result); + + return implode( + '', + array( + "https://touch.facebook.com/pages_reaction_units/more/?page_id=", + $pageID, + "&cursor=%7B%22timeline_cursor%22%3A%22timeline_unit%3A1%3A", + $result[1], + "%3A", + $result[2], + "%3A", + $result[3], + "%3A", + $result[4], + "%22%2C%22timeline_section_cursor%22%3A%7B%7D%2C%22", + "has_next_page%22%3Atrue%7D&surface=mobile_page_home&unit_count=3" + ) + ); + } + + //Builds the HTML from the encoded JS that Facebook provides. + private function buildContent($pageContent){ + + $regex = "/\\\"html\\\":\\\"(.*?)\\\",\\\"replace/"; + preg_match($regex, $pageContent, $result); + + return str_get_html(html_entity_decode(json_decode('"' . $result[1] . '"'))); + } + + + //Builds the cookie from the page, as Facebook sometimes refuses to give + //the page if no cookie is provided. + private function getCookies($pageURL){ + + $ctx = stream_context_create(array( + 'http' => array( + 'user_agent' => "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0", + 'Accept' => 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8' + ) + ) + ); + $a = file_get_contents($pageURL, 0, $ctx); + + //First request to get the cookie + $cookies = ""; + foreach($http_response_header as $hdr) { + if(strpos($hdr, "Set-Cookie") !== false) { + $cLine = explode(":", $hdr)[1]; + $cLine = explode(";", $cLine)[0]; + $cookies .= ";" . $cLine; + } + } + + return substr($cookies, 1); + } + + //Get the page ID from the Facebook page. + private function getPageID($page, $cookies){ + + $context = stream_context_create(array( + 'http' => array( + 'user_agent' => "Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:46.0) Gecko/20100101 Firefox/46.0", + 'header' => 'Cookie: ' . $cookies + ) + ) + ); + + $pageContent = file_get_contents($page, 0, $context); + + if(strpos($pageContent, "signup-button") != false) { + return -1; + } + + //Get the page ID if we don't have a captcha + $regex = "/page_id=([0-9]*)&/"; + preg_match($regex, $pageContent, $matches); + + if(count($matches) > 0) { + return $matches[1]; + } + + //Get the page ID if we do have a captcha + $regex = "/\"pageID\":\"([0-9]*)\"/"; + preg_match($regex, $pageContent, $matches); + + return $matches[1]; + + } + + public function getName(){ + return (isset($this->name) ? $this->name . ' - ' : '') . 'Facebook Bridge'; + } + + public function getURI(){ + return 'http://facebook.com'; + } + + public function getCacheDuration(){ + return 60 * 60 * 3; // 5 minutes + } +} diff --git a/bridges/FacebookBridge.php b/bridges/FacebookBridge.php new file mode 100644 index 0000000..cc16196 --- /dev/null +++ b/bridges/FacebookBridge.php @@ -0,0 +1,305 @@ +<?php +class FacebookBridge extends BridgeAbstract { + + const MAINTAINER = 'teromene'; + const NAME = 'Facebook'; + const URI = 'https://www.facebook.com/'; + const CACHE_TIMEOUT = 300; // 5min + const DESCRIPTION = 'Input a page title or a profile log. For a profile log, + please insert the parameter as follow : myExamplePage/132621766841117'; + + const PARAMETERS = array( array( + 'u' => array( + 'name' => 'Username', + 'required' => true + ), + 'media_type' => array( + 'name' => 'Media type', + 'type' => 'list', + 'required' => false, + 'values' => array( + 'All' => 'all', + 'Video' => 'video', + 'No Video' => 'novideo' + ), + 'defaultValue' => 'all' + ) + )); + + private $authorName = ''; + + public function collectData(){ + + //Extract a string using start and end delimiters + function extractFromDelimiters($string, $start, $end){ + if(strpos($string, $start) !== false) { + $section_retrieved = substr($string, strpos($string, $start) + strlen($start)); + $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end)); + return $section_retrieved; + } + + return false; + } + + //Utility function for cleaning a Facebook link + $unescape_fb_link = function($matches){ + if(is_array($matches) && count($matches) > 1) { + $link = $matches[1]; + if(strpos($link, '/') === 0) + $link = self::URI . $link . '"'; + if(strpos($link, 'facebook.com/l.php?u=') !== false) + $link = urldecode(extractFromDelimiters($link, 'facebook.com/l.php?u=', '&')); + return ' href="' . $link . '"'; + } + }; + + //Utility function for converting facebook emoticons + $unescape_fb_emote = function($matches){ + static $facebook_emoticons = array( + 'smile' => ':)', + 'frown' => ':(', + 'tongue' => ':P', + 'grin' => ':D', + 'gasp' => ':O', + 'wink' => ';)', + 'pacman' => ':<', + 'grumpy' => '>_<', + 'unsure' => ':/', + 'cry' => ':\'(', + 'kiki' => '^_^', + 'glasses' => '8-)', + 'sunglasses' => 'B-)', + 'heart' => '<3', + 'devil' => ']:D', + 'angel' => '0:)', + 'squint' => '-_-', + 'confused' => 'o_O', + 'upset' => 'xD', + 'colonthree' => ':3', + 'like' => '👍'); + $len = count($matches); + if ($len > 1) + for ($i = 1; $i < $len; $i++) + foreach ($facebook_emoticons as $name => $emote) + if ($matches[$i] === $name) + return $emote; + return $matches[0]; + }; + + $html = null; + + //Handle captcha response sent by the viewer + if (isset($_POST['captcha_response'])) { + if (session_status() == PHP_SESSION_NONE) + session_start(); + if (isset($_SESSION['captcha_fields'], $_SESSION['captcha_action'])) { + $captcha_action = $_SESSION['captcha_action']; + $captcha_fields = $_SESSION['captcha_fields']; + $captcha_fields['captcha_response'] = preg_replace("/[^a-zA-Z0-9]+/", "", $_POST['captcha_response']); + $http_options = array( + 'http' => array( + 'method' => 'POST', + 'user_agent' => ini_get('user_agent'), + 'header' => array("Content-type: + application/x-www-form-urlencoded\r\nReferer: $captcha_action\r\nCookie: noscript=1\r\n"), + 'content' => http_build_query($captcha_fields) + ), + ); + $context = stream_context_create($http_options); + $html = getContents($captcha_action, false, $context); + + if($html === false) { + returnServerError('Failed to submit captcha response back to Facebook'); + } + unset($_SESSION['captcha_fields']); + $html = str_get_html($html); + } + unset($_SESSION['captcha_fields']); + unset($_SESSION['captcha_action']); + } + + //Retrieve page contents + if(is_null($html)) { + $http_options = array( + 'http' => array( + 'method' => 'GET', + 'user_agent' => ini_get('user_agent'), + 'header' => 'Accept-Language: ' . getEnv('HTTP_ACCEPT_LANGUAGE') . "\r\n" + ) + ); + $context = stream_context_create($http_options); + if(!strpos($this->getInput('u'), "/")) { + $html = getSimpleHTMLDOM(self::URI . urlencode($this->getInput('u')) . '?_fb_noscript=1', + false, + $context) + or returnServerError('No results for this query.'); + } else { + $html = getSimpleHTMLDOM(self::URI . 'pages/' . $this->getInput('u') . '?_fb_noscript=1', + false, + $context) + or returnServerError('No results for this query.'); + } + } + + //Handle captcha form? + $captcha = $html->find('div.captcha_interstitial', 0); + if (!is_null($captcha)) { + //Save form for submitting after getting captcha response + if (session_status() == PHP_SESSION_NONE) + session_start(); + $captcha_fields = array(); + foreach ($captcha->find('input, button') as $input) + $captcha_fields[$input->name] = $input->value; + $_SESSION['captcha_fields'] = $captcha_fields; + $_SESSION['captcha_action'] = $captcha->find('form', 0)->action; + + //Show captcha filling form to the viewer, proxying the captcha image + $img = base64_encode(getContents($captcha->find('img', 0)->src)); + header('HTTP/1.1 500 ' . Http::getMessageForCode(500)); + header('Content-Type: text/html'); + $message = <<<EOD +<form method="post" action="?{$_SERVER['QUERY_STRING']}"> + <h2>Facebook captcha challenge</h2> + <p>Unfortunately, rss-bridge cannot fetch the requested page.<br /> + Facebook wants rss-bridge to resolve the following captcha:</p> + <p><img src="data:image/png;base64,{$img}" /></p> + <p><b>Response:</b> <input name="captcha_response" placeholder="please fill in" /> + <input type="submit" value="Submit!" /></p> +</form> +EOD; + die($message); + } + + //No captcha? We can carry on retrieving page contents :) + $element = $html + ->find('#pagelet_timeline_main_column')[0] + ->children(0) + ->children(0) + ->children(0) + ->next_sibling() + ->children(0); + + if(isset($element)) { + + $author = str_replace(' | Facebook', '', $html->find('title#pageTitle', 0)->innertext); + $profilePic = 'https://graph.facebook.com/' + . $this->getInput('u') + . '/picture?width=200&height=200'; + + $this->authorName = $author; + + foreach($element->children() as $cell) { + // Manage summary posts + if(strpos($cell->class, '_3xaf') !== false) { + $posts = $cell->children(); + } else { + $posts = array($cell); + } + + foreach($posts as $post) { + // Check media type + switch($this->getInput('media_type')) { + case 'all': break; + case 'video': + if(empty($post->find('[aria-label=Video]'))) continue 2; + break; + case 'novideo': + if(!empty($post->find('[aria-label=Video]'))) continue 2; + break; + default: break; + } + + $item = array(); + + if(count($post->find('abbr')) > 0) { + + //Retrieve post contents + $content = preg_replace( + '/(?i)><div class=\"clearfix([^>]+)>(.+?)div\ class=\"userContent\"/i', + '', + $post); + + $content = preg_replace( + '/(?i)><div class=\"_59tj([^>]+)>(.+?)<\/div><\/div><a/i', + '', + $content); + + $content = preg_replace( + '/(?i)><div class=\"_3dp([^>]+)>(.+?)div\ class=\"[^u]+userContent\"/i', + '', + $content); + + $content = preg_replace( + '/(?i)><div class=\"_4l5([^>]+)>(.+?)<\/div>/i', + '', + $content); + + //Remove html nodes, keep only img, links, basic formatting + $content = strip_tags($content, '<a><img><i><u><br><p>'); + + //Adapt link hrefs: convert relative links into absolute links and bypass external link redirection + $content = preg_replace_callback('/ href=\"([^"]+)\"/i', $unescape_fb_link, $content); + + //Clean useless html tag properties and fix link closing tags + foreach (array( + 'onmouseover', + 'onclick', + 'target', + 'ajaxify', + 'tabindex', + 'class', + 'style', + 'data-[^=]*', + 'aria-[^=]*', + 'role', + 'rel', + 'id') as $property_name) + $content = preg_replace('/ ' . $property_name . '=\"[^"]*\"/i', '', $content); + $content = preg_replace('/<\/a [^>]+>/i', '</a>', $content); + + //Convert textual representation of emoticons eg + //"<i><u>smile emoticon</u></i>" back to ASCII emoticons eg ":)" + $content = preg_replace_callback( + '/<i><u>([^ <>]+) ([^<>]+)<\/u><\/i>/i', + $unescape_fb_emote, + $content + ); + + //Retrieve date of the post + $date = $post->find("abbr")[0]; + if(isset($date) && $date->hasAttribute('data-utime')) { + $date = $date->getAttribute('data-utime'); + } else { + $date = 0; + } + + //Build title from username and content + $title = $author; + if(strlen($title) > 24) + $title = substr($title, 0, strpos(wordwrap($title, 24), "\n")) . '...'; + $title = $title . ' | ' . strip_tags($content); + if(strlen($title) > 64) + $title = substr($title, 0, strpos(wordwrap($title, 64), "\n")) . '...'; + + //Build and add final item + $item['uri'] = self::URI . $post->find('abbr')[0]->parent()->getAttribute('href'); + $item['content'] = $content; + $item['title'] = $title; + $item['author'] = $author; + $item['timestamp'] = $date; + $this->items[] = $item; + } + } + } + } + } + + public function getName(){ + if(!empty($this->authorName)) { + return isset($this->extraInfos['name']) ? $this->extraInfos['name'] : $this->authorName + . ' - Facebook Bridge'; + } + + return parent::getName(); + } +} diff --git a/bridges/FeedExpanderExampleBridge.php b/bridges/FeedExpanderExampleBridge.php new file mode 100644 index 0000000..9d2f178 --- /dev/null +++ b/bridges/FeedExpanderExampleBridge.php @@ -0,0 +1,62 @@ +<?php +class FeedExpanderExampleBridge extends FeedExpander { + + const MAINTAINER = 'logmanoriginal'; + const NAME = 'FeedExpander Example'; + const URI = '#'; + const DESCRIPTION = 'Example bridge to test FeedExpander'; + + const PARAMETERS = array( + 'Feed' => array( + 'version' => array( + 'name' => 'Version', + 'type' => 'list', + 'required' => true, + 'title' => 'Select your feed format/version', + 'defaultValue' => 'RSS 2.0', + 'values' => array( + 'RSS 0.91' => 'rss_0_9_1', + 'RSS 1.0' => 'rss_1_0', + 'RSS 2.0' => 'rss_2_0', + 'ATOM 1.0' => 'atom_1_0' + ) + ) + ) + ); + + public function collectData(){ + switch($this->getInput('version')) { + case 'rss_0_9_1': + parent::collectExpandableDatas('http://static.userland.com/gems/backend/sampleRss.xml'); + break; + case 'rss_1_0': + parent::collectExpandableDatas('http://feeds.nature.com/nature/rss/current?format=xml'); + break; + case 'rss_2_0': + parent::collectExpandableDatas('http://feeds.rssboard.org/rssboard?format=xml'); + break; + case 'atom_1_0': + parent::collectExpandableDatas('http://segfault.linuxmint.com/feed/atom/'); + break; + default: returnClientError('Unknown version ' . $this->getInput('version') . '!'); + } + } + + protected function parseItem($newsItem) { + switch($this->getInput('version')) { + case 'rss_0_9_1': + return $this->parseRSS_0_9_1_Item($newsItem); + break; + case 'rss_1_0': + return $this->parseRSS_1_0_Item($newsItem); + break; + case 'rss_2_0': + return $this->parseRSS_2_0_Item($newsItem); + break; + case 'atom_1_0': + return $this->parseATOMItem($newsItem); + break; + default: returnClientError('Unknown version ' . $this->getInput('version') . '!'); + } + } +} diff --git a/bridges/FierPandaBridge.php b/bridges/FierPandaBridge.php new file mode 100644 index 0000000..cd9d11b --- /dev/null +++ b/bridges/FierPandaBridge.php @@ -0,0 +1,24 @@ +<?php +class FierPandaBridge extends BridgeAbstract { + + const MAINTAINER = 'snroki'; + const NAME = 'Fier Panda Bridge'; + const URI = 'http://www.fier-panda.fr/'; + const CACHE_TIMEOUT = 21600; // 6h + const DESCRIPTION = 'Returns latest articles from Fier Panda.'; + + public function collectData(){ + $html = getSimpleHTMLDOM(self::URI) + or returnServerError('Could not request Fier Panda.'); + + foreach($html->find('div.container-content article') as $element) { + $item = array(); + $item['uri'] = $this->getURI() . $element->find('a', 0)->href; + $item['title'] = trim($element->find('h1 a', 0)->innertext); + // Remove the link at the end of the article + $element->find('p a', 0)->outertext = ''; + $item['content'] = $element->find('p', 0)->innertext; + $this->items[] = $item; + } + } +} diff --git a/bridges/FilterBridge.php b/bridges/FilterBridge.php new file mode 100644 index 0000000..e8b451c --- /dev/null +++ b/bridges/FilterBridge.php @@ -0,0 +1,77 @@ +<?php + +class FilterBridge extends FeedExpander { + + const MAINTAINER = 'Frenzie'; + const NAME = 'Filter'; + const CACHE_TIMEOUT = 3600; // 1h + const DESCRIPTION = 'Filters a feed of your choice'; + + const PARAMETERS = array(array( + 'url' => array( + 'name' => 'Feed URL', + 'required' => true, + ), + 'filter' => array( + 'name' => 'Filter item title (regular expression)', + 'required' => false, + ), + 'filter_type' => array( + 'name' => 'Filter type', + 'type' => 'list', + 'required' => false, + 'values' => array( + 'Permit' => 'permit', + 'Block' => 'block', + ), + 'defaultValue' => 'permit', + ), + )); + + protected function parseItem($newItem){ + $item = parent::parseItem($newItem); + + switch(true) { + case $this->getFilterType() === 'permit': + if (preg_match($this->getFilter(), $item['title'])) { + return $item; + } + break; + case $this->getFilterType() === 'block': + if (!preg_match($this->getFilter(), $item['title'])) { + return $item; + } + break; + } + return null; + } + + protected function getFilter(){ + return '/' . $this->getInput('filter') . '/'; + } + + protected function getFilterType(){ + return $this->getInput('filter_type'); + } + + public function getURI(){ + $url = $this->getInput('url'); + + if(empty($url)) { + $url = parent::getURI(); + } + return $url; + } + + public function collectData(){ + if($this->getInput('url') && substr($this->getInput('url'), 0, strlen('http')) !== 'http') { + // just in case someone find a way to access local files by playing with the url + returnClientError('The url parameter must either refer to http or https protocol.'); + } + try{ + $this->collectExpandableDatas($this->getURI()); + } catch (HttpException $e) { + $this->collectExpandableDatas($this->getURI()); + } + } +} diff --git a/bridges/FlickrBridge.php b/bridges/FlickrBridge.php new file mode 100644 index 0000000..f5ebe9c --- /dev/null +++ b/bridges/FlickrBridge.php @@ -0,0 +1,120 @@ +<?php + +/* This is a mashup of FlickrExploreBridge by sebsauvage and FlickrTagBridge + * by erwang, providing the functionality of both in one. + */ +class FlickrBridge extends BridgeAbstract { + + const MAINTAINER = 'logmanoriginal'; + const NAME = 'Flickr Bridge'; + const URI = 'https://www.flickr.com/'; + const CACHE_TIMEOUT = 21600; // 6 hours + const DESCRIPTION = 'Returns images from Flickr'; + + const PARAMETERS = array( + 'Explore' => array(), + 'By keyword' => array( + 'q' => array( + 'name' => 'Keyword', + 'type' => 'text', + 'required' => true, + 'title' => 'Insert keyword', + 'exampleValue' => 'bird' + ) + ), + 'By username' => array( + 'u' => array( + 'name' => 'Username', + 'type' => 'text', + 'required' => true, + 'title' => 'Insert username (as shown in the address bar)', + 'exampleValue' => 'flickr' + ) + ), + ); + + public function collectData(){ + switch($this->queriedContext) { + case 'Explore': + $key = 'photos'; + $html = getSimpleHTMLDOM(self::URI . 'explore') + or returnServerError('Could not request Flickr.'); + break; + case 'By keyword': + $key = 'photos'; + $html = getSimpleHTMLDOM(self::URI . 'search/?q=' . urlencode($this->getInput('q')) . '&s=rec') + or returnServerError('No results for this query.'); + break; + case 'By username': + $key = 'photoPageList'; + $html = getSimpleHTMLDOM(self::URI . 'photos/' . urlencode($this->getInput('u'))) + or returnServerError('Requested username can\'t be found.'); + break; + default: + returnClientError('Invalid context: ' . $this->queriedContext); + } + + // Find SCRIPT containing JSON data + $model = $html->find('.modelExport', 0); + $model_text = $model->innertext; + + // Find start and end of JSON data + $start = strpos($model_text, 'modelExport:') + strlen('modelExport:'); + $end = strpos($model_text, 'auth:') - strlen('auth:'); + + // Dissect JSON data and remove trailing comma + $model_text = trim(substr($model_text, $start, $end - $start)); + $model_text = substr($model_text, 0, strlen($model_text) - 1); + + $model_json = json_decode($model_text, true); + + foreach($html->find('.photo-list-photo-view') as $element) { + // Get the styles + $style = explode(';', $element->style); + + // Get the background-image style + $backgroundImage = explode(':', end($style)); + + // URI type : url(//cX.staticflickr.com/X/XXXXX/XXXXXXXXX.jpg) + $imageURI = trim(str_replace(['url(', ')'], '', end($backgroundImage))); + + // Get the image ID + $imageURIs = explode('_', basename($imageURI)); + $imageID = reset($imageURIs); + + // Use JSON data to build items + foreach(reset($model_json)[0][$key]['_data'] as $element) { + if($element['id'] === $imageID) { + $item = array(); + + /* Author name depends on scope. On a keyword search the + * author is part of the picture data. On a username search + * the author is part of the owner data. + */ + if(array_key_exists('username', $element)) { + $item['author'] = $element['username']; + } elseif (array_key_exists('owner', reset($model_json)[0])) { + $item['author'] = reset($model_json)[0]['owner']['username']; + } + + $item['title'] = (array_key_exists('title', $element) ? $element['title'] : 'Untitled'); + $item['uri'] = self::URI . 'photo.gne?id=' . $imageID; + + $description = (array_key_exists('description', $element) ? $element['description'] : ''); + + $item['content'] = '<a href="' + . $item['uri'] + . '"><img src="' + . $imageURI + . '" /></a><br><p>' + . $description + . '</p>'; + + $this->items[] = $item; + + break; + } + } + } + } +} diff --git a/bridges/FootitoBridge.php b/bridges/FootitoBridge.php new file mode 100644 index 0000000..ac06cd5 --- /dev/null +++ b/bridges/FootitoBridge.php @@ -0,0 +1,75 @@ +<?php +class FootitoBridge extends BridgeAbstract { + + const MAINTAINER = 'superbaillot.net'; + const NAME = 'Footito'; + const URI = 'http://www.footito.fr/'; + const DESCRIPTION = 'Footito'; + + public function collectData(){ + $html = getSimpleHTMLDOM(self::URI) + or returnServerError('Could not request Footito.'); + + foreach($html->find('div.post') as $element) { + $item = array(); + + $content = trim($element->innertext); + $content = str_replace( + "<img", + "<img style='float : left;'", + $content ); + + $content = str_replace( + "class=\"logo\"", + "style='float : left;'", + $content ); + + $content = str_replace( + "class=\"contenu\"", + "style='margin-left : 60px;'", + $content ); + + $content = str_replace( + "class=\"responsive-comment\"", + "style='border-top : 1px #DDD solid; background-color : white; padding : 10px;'", + $content ); + + $content = str_replace( + "class=\"jaime\"", + "style='display : none;'", + $content ); + + $content = str_replace( + "class=\"auteur-event responsive\"", + "style='display : none;'", + $content ); + + $content = str_replace( + "class=\"report-abuse-button\"", + "style='display : none;'", + $content ); + + $content = str_replace( + "class=\"reaction clearfix\"", + "style='margin : 10px 0px; padding : 5px; border-bottom : 1px #DDD solid;'", + $content ); + + $content = str_replace( + "class=\"infos\"", + "style='font-size : 0.7em;'", + $content ); + + $item['content'] = $content; + + $title = $element->find('.contenu .texte ', 0)->plaintext; + $item['title'] = $title; + + $info = $element->find('div.infos', 0); + + $item['timestamp'] = strtotime($info->find('time', 0)->datetime); + $item['author'] = $info->find('a.auteur', 0)->plaintext; + + $this->items[] = $item; + } + } +} diff --git a/bridges/FourchanBridge.php b/bridges/FourchanBridge.php new file mode 100644 index 0000000..6aaa13e --- /dev/null +++ b/bridges/FourchanBridge.php @@ -0,0 +1,78 @@ +<?php +class FourchanBridge extends BridgeAbstract { + + const MAINTAINER = 'mitsukarenai'; + const NAME = '4chan'; + const URI = 'https://boards.4chan.org/'; + const CACHE_TIMEOUT = 300; // 5min + const DESCRIPTION = 'Returns posts from the specified thread'; + + const PARAMETERS = array( array( + 'c' => array( + 'name' => 'Thread category', + 'required' => true + ), + 't' => array( + 'name' => 'Thread number', + 'type' => 'number', + 'required' => true + ) + )); + + public function getURI(){ + if(!is_null($this->getInput('c')) && !is_null($this->getInput('t'))) { + return static::URI . $this->getInput('c') . '/thread/' . $this->getInput('t'); + } + + return parent::getURI(); + } + + public function collectData(){ + + $html = getSimpleHTMLDOM($this->getURI()) + or returnServerError("Could not request 4chan, thread not found"); + + foreach($html->find('div.postContainer') as $element) { + $item = array(); + $item['id'] = $element->find('.post', 0)->getAttribute('id'); + $item['uri'] = $this->getURI() . '#' . $item['id']; + $item['timestamp'] = $element->find('span.dateTime', 0)->getAttribute('data-utc'); + $item['author'] = $element->find('span.name', 0)->plaintext; + + $file = $element->find('.file', 0); + + if(!empty($file)) { + $item['image'] = $element->find('.file a', 0)->href; + $item['imageThumb'] = $element->find('.file img', 0)->src; + if(!isset($item['imageThumb']) and strpos($item['image'], '.swf') !== false) + $item['imageThumb'] = 'http://i.imgur.com/eO0cxf9.jpg'; + } + + if(!empty($element->find('span.subject', 0)->innertext)) { + $item['subject'] = $element->find('span.subject', 0)->innertext; + } + + $item['title'] = 'reply ' . $item['id'] . ' | ' . $item['author']; + if(isset($item['subject'])) { + $item['title'] = $item['subject'] . ' - ' . $item['title']; + } + + $content = $element->find('.postMessage', 0)->innertext; + $content = str_replace('href="#p', 'href="' . $this->getURI() . '#p', $content); + $item['content'] = '<span id="' . $item['id'] . '">' . $content . '</span>'; + + if(isset($item['image'])) { + $item['content'] = '<a href="' + . $item['image'] + . '"><img alt="' + . $item['id'] + . '" src="' + . $item['imageThumb'] + . '" /></a><br>' + .$item['content']; + } + $this->items[] = $item; + } + $this->items = array_reverse($this->items); + } +} diff --git a/bridges/FuturaSciencesBridge.php b/bridges/FuturaSciencesBridge.php new file mode 100644 index 0000000..b9479c3 --- /dev/null +++ b/bridges/FuturaSciencesBridge.php @@ -0,0 +1,173 @@ +<?php +class FuturaSciencesBridge extends FeedExpander { + + const MAINTAINER = 'ORelio'; + const NAME = 'Futura-Sciences Bridge'; + const URI = 'http://www.futura-sciences.com/'; + const DESCRIPTION = 'Returns the newest articles.'; + + const PARAMETERS = array( array( + 'feed' => array( + 'name' => 'Feed', + 'type' => 'list', + 'values' => array( + 'Les flux multi-magazines' => array( + 'Les dernières actualités de Futura-Sciences' => 'actualites', + 'Les dernières définitions de Futura-Sciences' => 'definitions', + 'Les dernières photos de Futura-Sciences' => 'photos', + 'Les dernières questions - réponses de Futura-Sciences' => 'questions-reponses', + 'Les derniers dossiers de Futura-Sciences' => 'dossiers' + ), + 'Les flux Services' => array( + 'Les cartes virtuelles de Futura-Sciences' => 'services/cartes-virtuelles', + 'Les fonds d\'écran de Futura-Sciences' => 'services/fonds-ecran' + ), + 'Les flux Santé' => array( + 'Les dernières actualités de Futura-Santé' => 'sante/actualites', + 'Les dernières définitions de Futura-Santé' => 'sante/definitions', + 'Les dernières questions-réponses de Futura-Santé' => 'sante/question-reponses', + 'Les derniers dossiers de Futura-Santé' => 'sante/dossiers' + ), + 'Les flux High-Tech' => array( + 'Les dernières actualités de Futura-High-Tech' => 'high-tech/actualites', + 'Les dernières astuces de Futura-High-Tech' => 'high-tech/question-reponses', + 'Les dernières définitions de Futura-High-Tech' => 'high-tech/definitions', + 'Les derniers dossiers de Futura-High-Tech' => 'high-tech/dossiers' + ), + 'Les flux Espace' => array( + 'Les dernières actualités de Futura-Espace' => 'espace/actualites', + 'Les dernières définitions de Futura-Espace' => 'espace/definitions', + 'Les dernières questions-réponses de Futura-Espace' => 'espace/question-reponses', + 'Les derniers dossiers de Futura-Espace' => 'espace/dossiers' + ), + 'Les flux Environnement' => array( + 'Les dernières actualités de Futura-Environnement' => 'environnement/actualites', + 'Les dernières définitions de Futura-Environnement' => 'environnement/definitions', + 'Les dernières questions-réponses de Futura-Environnement' => 'environnement/question-reponses', + 'Les derniers dossiers de Futura-Environnement' => 'environnement/dossiers' + ), + 'Les flux Maison' => array( + 'Les dernières actualités de Futura-Maison' => 'maison/actualites', + 'Les dernières astuces de Futura-Maison' => 'maison/question-reponses', + 'Les dernières définitions de Futura-Maison' => 'maison/definitions', + 'Les derniers dossiers de Futura-Maison' => 'maison/dossiers' + ), + 'Les flux Nature' => array( + 'Les dernières actualités de Futura-Nature' => 'nature/actualites', + 'Les dernières définitions de Futura-Nature' => 'nature/definitions', + 'Les dernières questions-réponses de Futura-Nature' => 'nature/question-reponses', + 'Les derniers dossiers de Futura-Nature' => 'nature/dossiers' + ), + 'Les flux Terre' => array( + 'Les dernières actualités de Futura-Terre' => 'terre/actualites', + 'Les dernières définitions de Futura-Terre' => 'terre/definitions', + 'Les dernières questions-réponses de Futura-Terre' => 'terre/question-reponses', + 'Les derniers dossiers de Futura-Terre' => 'terre/dossiers' + ), + 'Les flux Matière' => array( + 'Les dernières actualités de Futura-Matière' => 'matiere/actualites', + 'Les dernières définitions de Futura-Matière' => 'matiere/definitions', + 'Les dernières questions-réponses de Futura-Matière' => 'matiere/question-reponses', + 'Les derniers dossiers de Futura-Matière' => 'matiere/dossiers' + ), + 'Les flux Mathématiques' => array( + 'Les dernières actualités de Futura-Mathématiques' => 'mathematiques/actualites', + 'Les derniers dossiers de Futura-Mathématiques' => 'mathematiques/dossiers' + ) + ) + ) + )); + + public function collectData(){ + $url = self::URI . 'rss/' . $this->getInput('feed') . '.xml'; + $this->collectExpandableDatas($url, 10); + } + + protected function parseItem($newsItem){ + $item = parent::parseItem($newsItem); + $item['uri'] = str_replace('#xtor=RSS-8', '', $item['uri']); + $article = getSimpleHTMLDOMCached($item['uri']) + or returnServerError('Could not request Futura-Sciences: ' . $item['uri']); + $item['content'] = $this->extractArticleContent($article); + $author = $this->extractAuthor($article); + $item['author'] = empty($author) ? $item['author'] : $author; + return $item; + } + + private function stripWithDelimiters($string, $start, $end){ + while(strpos($string, $start) !== false) { + $section_to_remove = substr($string, strpos($string, $start)); + $section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end)); + $string = str_replace($section_to_remove, '', $string); + } return $string; + } + + private function stripRecursiveHTMLSection($string, $tag_name, $tag_start){ + $open_tag = '<' . $tag_name; + $close_tag = '</' . $tag_name . '>'; + $close_tag_length = strlen($close_tag); + if(strpos($tag_start, $open_tag) === 0) { + while(strpos($string, $tag_start) !== false) { + $max_recursion = 100; + $section_to_remove = null; + $section_start = strpos($string, $tag_start); + $search_offset = $section_start; + do { + $max_recursion--; + $section_end = strpos($string, $close_tag, $search_offset); + $search_offset = $section_end + $close_tag_length; + $section_to_remove = substr($string, $section_start, $section_end - $section_start + $close_tag_length); + $open_tag_count = substr_count($section_to_remove, $open_tag); + $close_tag_count = substr_count($section_to_remove, $close_tag); + } while ($open_tag_count > $close_tag_count && $max_recursion > 0); + $string = str_replace($section_to_remove, '', $string); + } + } + return $string; + } + + private function extractArticleContent($article){ + $contents = $article->find('section.article-text-classic', 0)->innertext; + $headline = trim($article->find('p.description', 0)->plaintext); + if(!empty($headline)) + $headline = '<p><b>' . $headline . '</b></p>'; + + foreach (array( + '<div class="clear', + '<div class="sharebar2', + '<div class="diaporamafullscreen"', + '<div class="module social-button', + '<div style="margin-bottom:10px;" class="noprint"', + '<div class="ficheprevnext', + '<div class="bar noprint', + '<div class="toolbar noprint', + '<div class="addthis_toolbox', + '<div class="noprint', + '<div class="bg bglight border border-full noprint', + '<div class="httplogbar-wrapper noprint', + '<div id="forumcomments', + '<div ng-if="active"' + ) as $div_start) { + $contents = $this->stripRecursiveHTMLSection($contents, 'div', $div_start); + } + + $contents = $this->stripWithDelimiters($contents, '<hr ', '/>'); + $contents = $this->stripWithDelimiters($contents, '<p class="content-date', '</p>'); + $contents = $this->stripWithDelimiters($contents, '<h1 class="content-title', '</h1>'); + $contents = $this->stripWithDelimiters($contents, 'fs:definition="', '"'); + $contents = $this->stripWithDelimiters($contents, 'fs:xt:clicktype="', '"'); + $contents = $this->stripWithDelimiters($contents, 'fs:xt:clickname="', '"'); + $contents = $this->stripWithDelimiters($contents, '<script ', '</script>'); + + return $headline . trim($contents); + } + + // Extracts the author from an article or element + private function extractAuthor($article){ + $article_author = $article->find('h3.epsilon', 0); + if($article_author) { + return trim(str_replace(', Futura-Sciences', '', $article_author->plaintext)); + } + return ''; + } +} diff --git a/bridges/GBAtempBridge.php b/bridges/GBAtempBridge.php new file mode 100644 index 0000000..f80a25c --- /dev/null +++ b/bridges/GBAtempBridge.php @@ -0,0 +1,157 @@ +<?php +class GBAtempBridge extends BridgeAbstract { + + const MAINTAINER = 'ORelio'; + const NAME = 'GBAtemp'; + const URI = 'https://gbatemp.net/'; + const DESCRIPTION = 'GBAtemp is a user friendly underground video game community.'; + + const PARAMETERS = array( array( + 'type' => array( + 'name' => 'Type', + 'type' => 'list', + 'required' => true, + 'values' => array( + 'News' => 'N', + 'Reviews' => 'R', + 'Tutorials' => 'T', + 'Forum' => 'F' + ) + ) + )); + + private function extractFromDelimiters($string, $start, $end){ + if(strpos($string, $start) !== false) { + $section_retrieved = substr($string, strpos($string, $start) + strlen($start)); + $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end)); + return $section_retrieved; + } + + return false; + } + + private function stripWithDelimiters($string, $start, $end){ + while(strpos($string, $start) !== false) { + $section_to_remove = substr($string, strpos($string, $start)); + $section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end)); + $string = str_replace($section_to_remove, '', $string); + } + + return $string; + } + + private function buildItem($uri, $title, $author, $timestamp, $content){ + $item = array(); + $item['uri'] = $uri; + $item['title'] = $title; + $item['author'] = $author; + $item['timestamp'] = $timestamp; + $item['content'] = $content; + return $item; + } + + private function cleanupPostContent($content, $site_url){ + $content = str_replace(':arrow:', '➤', $content); + $content = str_replace('href="attachments/', 'href="'.$site_url.'attachments/', $content); + $content = $this->stripWithDelimiters($content, '<script', '</script>'); + return $content; + } + + private function fetchPostContent($uri, $site_url){ + $html = getSimpleHTMLDOM($uri); + if(!$html) { + return 'Could not request GBAtemp ' . $uri; + } + + $content = $html->find('div.messageContent', 0)->innertext; + return $this->cleanupPostContent($content, $site_url); + } + + public function collectData(){ + + $html = getSimpleHTMLDOM(self::URI) + or returnServerError('Could not request GBAtemp.'); + + switch($this->getInput('type')) { + case 'N': + foreach($html->find('li[class=news_item full]') as $newsItem) { + $url = self::URI . $newsItem->find('a', 0)->href; + $time = intval( + $this->extractFromDelimiters( + $newsItem->find('abbr.DateTime', 0)->outertext, + 'data-time="', + '"' + ) + ); + $author = $newsItem->find('a.username', 0)->plaintext; + $title = $newsItem->find('a', 1)->plaintext; + $content = $this->fetchPostContent($url, self::URI); + $this->items[] = $this->buildItem($url, $title, $author, $time, $content); + } + case 'R': + foreach($html->find('li.portal_review') as $reviewItem) { + $url = self::URI . $reviewItem->find('a', 0)->href; + $title = $reviewItem->find('span.review_title', 0)->plaintext; + $content = getSimpleHTMLDOM($url) + or returnServerError('Could not request GBAtemp: ' . $uri); + $author = $content->find('a.username', 0)->plaintext; + $time = intval( + $this->extractFromDelimiters( + $content->find('abbr.DateTime', 0)->outertext, + 'data-time="', + '"' + ) + ); + $intro = '<p><b>' . ($content->find('div#review_intro', 0)->plaintext) . '</b></p>'; + $review = $content->find('div#review_main', 0)->innertext; + $subheader = '<p><b>' . $content->find('div.review_subheader', 0)->plaintext . '</b></p>'; + $procons = $content->find('table.review_procons', 0)->outertext; + $scores = $content->find('table.reviewscores', 0)->outertext; + $content = $this->cleanupPostContent($intro . $review . $subheader . $procons . $scores, self::URI); + $this->items[] = $this->buildItem($url, $title, $author, $time, $content); + } + case 'T': + foreach($html->find('li.portal-tutorial') as $tutorialItem) { + $url = self::URI . $tutorialItem->find('a', 0)->href; + $title = $tutorialItem->find('a', 0)->plaintext; + $time = intval( + $this->extractFromDelimiters( + $tutorialItem->find('abbr.DateTime', 0)->outertext, + 'data-time="', + '"' + ) + ); + $author = $tutorialItem->find('a.username', 0)->plaintext; + $content = $this->fetchPostContent($url, self::URI); + $this->items[] = $this->buildItem($url, $title, $author, $time, $content); + } + case 'F': + foreach($html->find('li.rc_item') as $postItem) { + $url = self::URI . $postItem->find('a', 1)->href; + $title = $postItem->find('a', 1)->plaintext; + $time = intval( + $this->extractFromDelimiters( + $postItem->find('abbr.DateTime', 0)->outertext, + 'data-time="', + '"' + ) + ); + $author = $postItem->find('a.username', 0)->plaintext; + $content = $this->fetchPostContent($url, self::URI); + $this->items[] = $this->buildItem($url, $title, $author, $time, $content); + } + } + } + + public function getName() { + if(!is_null($this->getInput('type'))) { + $type = array_search( + $this->getInput('type'), + self::PARAMETERS[$this->queriedContext]['type']['values'] + ); + return 'GBAtemp ' . $type . ' Bridge'; + } + + return parent::getName(); + } +} diff --git a/bridges/GelbooruBridge.php b/bridges/GelbooruBridge.php new file mode 100644 index 0000000..fa4ce11 --- /dev/null +++ b/bridges/GelbooruBridge.php @@ -0,0 +1,22 @@ +<?php +require_once('DanbooruBridge.php'); + +class GelbooruBridge extends DanbooruBridge { + + const MAINTAINER = 'mitsukarenai'; + const NAME = 'Gelbooru'; + const URI = 'http://gelbooru.com/'; + const DESCRIPTION = 'Returns images from given page'; + + const PATHTODATA = '.thumb'; + const IDATTRIBUTE = 'id'; + + const PIDBYPAGE = 63; + + protected function getFullURI(){ + return $this->getURI() + . 'index.php?page=post&s=list&pid=' + . ($this->getInput('p') ? ($this->getInput('p') - 1) * static::PIDBYPAGE : '') + . '&tags=' . urlencode($this->getInput('t')); + } +} diff --git a/bridges/GiphyBridge.php b/bridges/GiphyBridge.php new file mode 100644 index 0000000..26d1eba --- /dev/null +++ b/bridges/GiphyBridge.php @@ -0,0 +1,76 @@ +<?php +define('GIPHY_LIMIT', 10); + +class GiphyBridge extends BridgeAbstract { + + const MAINTAINER = 'kraoc'; + const NAME = 'Giphy Bridge'; + const URI = 'http://giphy.com/'; + const CACHE_TIMEOUT = 300; //5min + const DESCRIPTION = 'Bridge for giphy.com'; + + const PARAMETERS = array( array( + 's' => array( + 'name' => 'search tag', + 'required' => true + ), + 'n' => array( + 'name' => 'max number of returned items', + 'type' => 'number' + ) + )); + + public function collectData(){ + $html = ''; + $base_url = 'http://giphy.com'; + $html = getSimpleHTMLDOM(self::URI . '/search/' . urlencode($this->getInput('s') . '/')) + or returnServerError('No results for this query.'); + + $max = GIPHY_LIMIT; + if($this->getInput('n')) { + $max = $this->getInput('n'); + } + + $limit = 0; + $kw = urlencode($this->getInput('s')); + foreach($html->find('div.hoverable-gif') as $entry) { + if($limit < $max) { + $node = $entry->first_child(); + $href = $node->getAttribute('href'); + + $html2 = getSimpleHTMLDOM(self::URI . $href) + or returnServerError('No results for this query.'); + $figure = $html2->getElementByTagName('figure'); + $img = $figure->firstChild(); + $caption = $figure->lastChild(); + + $item = array(); + $item['id'] = $img->getAttribute('data-gif_id'); + $item['uri'] = $img->getAttribute('data-bitly_gif_url'); + $item['username'] = 'Giphy - ' . ucfirst($kw); + $title = $caption->innertext(); + $title = preg_replace('/\s+/', ' ', $title); + $title = str_replace('animated GIF', '', $title); + $title = str_replace($kw, '', $title); + $title = preg_replace('/\s+/', ' ', $title); + $title = trim($title); + if(strlen($title) <= 0) { + $title = $item['id']; + } + $item['title'] = trim($title); + $item['content'] = '<a href="' + . $item['uri'] + . '"><img src="' + . $img->getAttribute('src') + . '" width="' + . $img->getAttribute('data-original-width') + . '" height="' + . $img->getAttribute('data-original-height') + . '" /></a>'; + + $this->items[] = $item; + $limit++; + } + } + } +} diff --git a/bridges/GithubIssueBridge.php b/bridges/GithubIssueBridge.php new file mode 100644 index 0000000..4f121d8 --- /dev/null +++ b/bridges/GithubIssueBridge.php @@ -0,0 +1,192 @@ +<?php +class GithubIssueBridge extends BridgeAbstract { + + const MAINTAINER = 'Pierre Mazière'; + const NAME = 'Github Issue'; + const URI = 'https://github.com/'; + const CACHE_TIMEOUT = 600; // 10min + const DESCRIPTION = 'Returns the issues or comments of an issue of a github project'; + + const PARAMETERS = array( + 'global' => array( + 'u' => array( + 'name' => 'User name', + 'required' => true + ), + 'p' => array( + 'name' => 'Project name', + 'required' => true + ) + ), + 'Project Issues' => array( + 'c' => array( + 'name' => 'Show Issues Comments', + 'type' => 'checkbox' + ) + ), + 'Issue comments' => array( + 'i' => array( + 'name' => 'Issue number', + 'type' => 'number', + 'required' => 'true' + ) + ) + ); + + public function getName(){ + $name = $this->getInput('u') . '/' . $this->getInput('p'); + switch($this->queriedContext) { + case 'Project Issues': + if($this->getInput('c')) { + $prefix = static::NAME . 's comments for '; + } else { + $prefix = static::NAME . 's for '; + } + $name = $prefix . $name; + break; + case 'Issue comments': + $name = static::NAME . ' ' . $name . ' #' . $this->getInput('i'); + break; + default: return parent::getName(); + } + return $name; + } + + public function getURI(){ + if(!is_null($this->getInput('u')) && !is_null($this->getInput('p'))) { + $uri = static::URI . $this->getInput('u') . '/' . $this->getInput('p') . '/issues'; + if($this->queriedContext === 'Issue comments') { + $uri .= '/' . $this->getInput('i'); + } elseif($this->getInput('c')) { + $uri .= '?q=is%3Aissue+sort%3Aupdated-desc'; + } + return $uri; + } + + return parent::getURI(); + } + + protected function extractIssueComment($issueNbr, $title, $comment){ + $class = $comment->getAttribute('class'); + $classes = explode(' ', $class); + $event = false; + if(in_array('discussion-item', $classes)) { + $event = true; + } + + $author = 'unknown'; + if($comment->find('.author', 0)) { + $author = $comment->find('.author', 0)->plaintext; + } + + $uri = static::URI . $this->getInput('u') . '/' . $this->getInput('p') . '/issues/' . $issueNbr; + + $comment = $comment->firstChild(); + if(!$event) { + $comment = $comment->nextSibling(); + } + + if($event) { + $title .= ' / ' . substr($class, strpos($class, 'discussion-item-') + strlen('discussion-item-')); + if(!$comment->hasAttribute('id')) { + $items = array(); + $timestamp = strtotime($comment->find('relative-time', 0)->getAttribute('datetime')); + $content = $comment->innertext; + while($comment = $comment->nextSibling()) { + $item = array(); + $item['author'] = $author; + $item['title'] = html_entity_decode($title, ENT_QUOTES, 'UTF-8'); + $item['timestamp'] = $timestamp; + $item['content'] = $content . '<p>' . $comment->children(1)->innertext . '</p>'; + $item['uri'] = $uri . '#' . $comment->children(1)->getAttribute('id'); + $items[] = $item; + } + return $items; + } + $content = $comment->parent()->innertext; + } else { + $title .= ' / ' . trim($comment->firstChild()->plaintext); + $content = "<pre>" . $comment->find('.comment-body', 0)->innertext . "</pre>"; + } + + $item = array(); + $item['author'] = $author; + $item['uri'] = $uri . '#' . $comment->getAttribute('id'); + $item['title'] = html_entity_decode($title, ENT_QUOTES, 'UTF-8'); + $item['timestamp'] = strtotime($comment->find('relative-time', 0)->getAttribute('datetime')); + $item['content'] = $content; + return $item; + } + + protected function extractIssueComments($issue){ + $items = array(); + $title = $issue->find('.gh-header-title', 0)->plaintext; + $issueNbr = trim(substr($issue->find('.gh-header-number', 0)->plaintext, 1)); + $comments = $issue->find('.js-discussion', 0); + foreach($comments->children() as $comment) { + $classes = explode(' ', $comment->getAttribute('class')); + if(in_array('discussion-item', $classes) + || in_array('timeline-comment-wrapper', $classes)) { + $item = $this->extractIssueComment($issueNbr, $title, $comment); + if(array_keys($item) !== range(0, count($item) - 1)) { + $item = array($item); + } + $items = array_merge($items, $item); + } + } + return $items; + } + + public function collectData(){ + $html = getSimpleHTMLDOM($this->getURI()) + or returnServerError('No results for Github Issue ' . $this->getURI()); + + switch($this->queriedContext) { + case 'Issue comments': + $this->items = $this->extractIssueComments($html); + break; + case 'Project Issues': + foreach($html->find('.js-active-navigation-container .js-navigation-item') as $issue) { + $info = $issue->find('.opened-by', 0); + $issueNbr = substr(trim($info->plaintext), 1, strpos(trim($info->plaintext), ' ')); + + $item = array(); + $item['content'] = ''; + + if($this->getInput('c')) { + $uri = static::URI . $this->getInput('u') . '/' . $this->getInput('p') . '/issues/' . $issueNbr; + $issue = getSimpleHTMLDOMCached($uri, static::CACHE_TIMEOUT); + if($issue) { + $this->items = array_merge($this->items, $this->extractIssueComments($issue)); + continue; + } + $item['content'] = 'Can not extract comments from ' . $uri; + } + + $item['author'] = $info->find('a', 0)->plaintext; + $item['timestamp'] = strtotime($info->find('relative-time', 0)->getAttribute('datetime')); + $item['title'] = html_entity_decode( + $issue->find('.js-navigation-open', 0)->plaintext, + ENT_QUOTES, + 'UTF-8' + ); + $comments = $issue->find('.col-5', 0)->plaintext; + $item['content'] .= "\n" . 'Comments: ' . ($comments ? $comments : '0'); + $item['uri'] = self::URI . $issue->find('.js-navigation-open', 0)->getAttribute('href'); + $this->items[] = $item; + } + break; + } + + array_walk($this->items, function(&$item){ + $item['content'] = preg_replace('/\s+/', ' ', $item['content']); + $item['content'] = str_replace('href="/', 'href="' . static::URI, $item['content']); + $item['content'] = str_replace( + 'href="#', + 'href="' . substr($item['uri'], 0, strpos($item['uri'], '#') + 1), + $item['content'] + ); + $item['title'] = preg_replace('/\s+/', ' ', $item['title']); + }); + } +} diff --git a/bridges/GithubSearchBridge.php b/bridges/GithubSearchBridge.php new file mode 100644 index 0000000..d3a615b --- /dev/null +++ b/bridges/GithubSearchBridge.php @@ -0,0 +1,50 @@ +<?php +class GithubSearchBridge extends BridgeAbstract { + + const MAINTAINER = 'corenting'; + const NAME = 'Github Repositories Search'; + const URI = 'https://github.com/'; + const CACHE_TIMEOUT = 600; // 10min + const DESCRIPTION = 'Returns a specified repositories search (sorted by recently updated)'; + const PARAMETERS = array( array( + 's' => array( + 'type' => 'text', + 'name' => 'Search query' + ) + )); + + public function collectData(){ + $params = array('utf8' => '✓', + 'q' => urlencode($this->getInput('s')), + 's' => 'updated', + 'o' => 'desc', + 'type' => 'Repositories'); + $url = self::URI . 'search?' . http_build_query($params); + + $html = getSimpleHTMLDOM($url) + or returnServerError('Error while downloading the website content'); + + foreach($html->find('div.repo-list-item') as $element) { + $item = array(); + + $uri = $element->find('h3 a', 0)->href; + $uri = substr(self::URI, 0, -1) . $uri; + $item['uri'] = $uri; + + $title = $element->find('h3', 0)->plaintext; + $item['title'] = $title; + + if (count($element->find('p')) == 2) { + $content = $element->find('p', 0)->innertext; + } else{ + $content = ''; + } + $item['content'] = $content; + + $date = $element->find('relative-time', 0)->datetime; + $item['timestamp'] = strtotime($date); + + $this->items[] = $item; + } + } +} diff --git a/bridges/GizmodoBridge.php b/bridges/GizmodoBridge.php new file mode 100644 index 0000000..35f162b --- /dev/null +++ b/bridges/GizmodoBridge.php @@ -0,0 +1,36 @@ +<?php +class GizmodoBridge extends FeedExpander { + + const MAINTAINER = 'polopollo'; + const NAME = 'Gizmodo'; + const URI = 'http://gizmodo.com/'; + const CACHE_TIMEOUT = 1800; // 30min + const DESCRIPTION = 'Returns the newest posts from Gizmodo (full text).'; + + protected function parseItem($item){ + $item = parent::parseItem($item); + + $articleHTMLContent = getSimpleHTMLDOMCached($item['uri']); + if(!$articleHTMLContent) { + $text = 'Could not load ' . $item['uri']; + } else { + $text = $articleHTMLContent->find('div.entry-content', 0)->innertext; + foreach($articleHTMLContent->find('pagespeed_iframe') as $element) { + $text .= '<p>link to a iframe (could be a video): <a href="' + . $element->src + . '">' + . $element->src + . '</a></p><br>'; + } + + $text = strip_tags($text, '<p><b><a><blockquote><img><em>'); + } + + $item['content'] = $text; + return $item; + } + + public function collectData(){ + $this->collectExpandableDatas('http://feeds.gawker.com/gizmodo/full'); + } +} diff --git a/bridges/GoComicsBridge.php b/bridges/GoComicsBridge.php new file mode 100644 index 0000000..27621d8 --- /dev/null +++ b/bridges/GoComicsBridge.php @@ -0,0 +1,59 @@ +<?php +class GoComicsBridge extends BridgeAbstract { + + const MAINTAINER = 'sky'; + const NAME = 'GoComics Unofficial RSS'; + const URI = 'http://www.gocomics.com/'; + const CACHE_TIMEOUT = 21600; // 6h + const DESCRIPTION = 'The Unofficial GoComics RSS'; + const PARAMETERS = array( array( + 'comicname' => array( + 'name' => 'comicname', + 'type' => 'text', + 'required' => true + ) + )); + + public function collectData(){ + $html = getSimpleHTMLDOM($this->getURI()) + or returnServerError('Could not request GoComics: ' . $this->getURI()); + + foreach($html->find('div.item-comic-container') as $element) { + + $img = $element->find('img', 0); + $link = $element->find('a.item-comic-link', 0); + $comic = $img->src; + $title = $link->title; + $url = $html->find('input.js-copy-link', 0)->value; + $date = substr($title, -10); + if (empty($title)) + $title = 'GoComics ' . $this->getInput('comicname') . ' on ' . $date; + $date = strtotime($date); + + $item = array(); + $item['id'] = $url; + $item['uri'] = $url; + $item['title'] = $title; + $item['author'] = preg_replace('/by /', '', $element->find('a.link-blended small', 0)->plaintext); + $item['timestamp'] = $date; + $item['content'] = '<img src="' . $comic . '" alt="' . $title . '" />'; + $this->items[] = $item; + } + } + + public function getURI(){ + if(!is_null($this->getInput('comicname'))) { + return self::URI . urlencode($this->getInput('comicname')); + } + + return parent::getURI(); + } + + public function getName(){ + if(!is_null($this->getInput('comicname'))) { + return $this->getInput('comicname') . ' - GoComics'; + } + + return parent::getName(); + } +} diff --git a/bridges/GooglePlusPostBridge.php b/bridges/GooglePlusPostBridge.php new file mode 100644 index 0000000..0fccc96 --- /dev/null +++ b/bridges/GooglePlusPostBridge.php @@ -0,0 +1,111 @@ +<?php +class GooglePlusPostBridge extends BridgeAbstract{ + + protected $_title; + protected $_url; + + const MAINTAINER = 'Grummfy'; + const NAME = 'Google Plus Post Bridge'; + const URI = 'https://plus.google.com/'; + const CACHE_TIMEOUT = 600; //10min + const DESCRIPTION = 'Returns user public post (without API).'; + + const PARAMETERS = array( array( + 'username' => array( + 'name' => 'username or Id', + 'required' => true + ) + )); + + public function collectData(){ + $username = $this->getInput('username'); + + // Usernames start with a + if it's not an ID + if(!is_numeric($username) && substr($username, 0, 1) !== '+') { + $username = '+' . $username; + } + + // get content parsed + $html = getSimpleHTMLDOMCached(self::URI . urlencode($username) . '/posts') + or returnServerError('No results for this query.'); + + // get title, url, ... there is a lot of intresting stuff in meta + $this->_title = $html->find('meta[property=og:title]', 0)->getAttribute('content'); + $this->_url = $html->find('meta[property=og:url]', 0)->getAttribute('content'); + + // I don't even know where to start with this discusting html... + foreach($html->find('div[jsname=WsjYwc]') as $post) { + $item = array(); + + $item['author'] = $item['fullname'] = $post->find('div div div div a', 0)->innertext; + $item['id'] = $post->find('div div div', 0)->getAttribute('id'); + $item['avatar'] = $post->find('div img', 0)->src; + $item['uri'] = self::URI . $post->find('div div div a', 1)->href; + + $timestamp = $post->find('a.qXj2He span', 0); + + if($timestamp) { + $item['timestamp'] = strtotime('+' . preg_replace( + '/[^0-9A-Za-z]/', + '', + $timestamp->getAttribute('aria-label'))); + } + + // hashtag to treat : https://plus.google.com/explore/tag + // $hashtags = array(); + // foreach($post->find('a.d-s') as $hashtag){ + // $hashtags[trim($hashtag->plaintext)] = self::URI . $hashtag->href; + // } + + $item['content'] = ''; + + // avatar display + $item['content'] .= '<div style="float:left; margin: 0 0.5em 0.5em 0;"><a href="' + . self::URI + . urlencode($this->getInput('username')); + + $item['content'] .= '"><img align="top" alt="' + . $item['author'] + . '" src="' + . $item['avatar'] + . '" /></a></div>'; + + $content = $post->find('div[jsname=EjRJtf]', 0); + // extract plaintext + $item['content_simple'] = $content->plaintext; + $item['title'] = substr($item['content_simple'], 0, 72) . '...'; + + // XXX ugly but I don't have any idea how to do a better stuff, + // str_replace on link doesn't work as expected and ask too many checks + foreach($content->find('a') as $link) { + $hasHttp = strpos($link->href, 'http'); + $hasDoubleSlash = strpos($link->href, '//'); + + if((!$hasHttp && !$hasDoubleSlash) + || (false !== $hasHttp && strpos($link->href, 'http') != 0) + || (false === $hasHttp && false !== $hasDoubleSlash && $hasDoubleSlash != 0)) { + // skipp bad link, for some hashtag or other stuff + if(strpos($link->href, '/') == 0) { + $link->href = substr($link->href, 1); + } + + $link->href = self::URI . $link->href; + } + } + $content = $content->innertext; + + $item['content'] .= '<div style="margin-top: -1.5em">' . $content . '</div>'; + $item['content'] = trim(strip_tags($item['content'], '<a><p><div><img>')); + + $this->items[] = $item; + } + } + + public function getName(){ + return $this->_title ?: 'Google Plus Post Bridge'; + } + + public function getURI(){ + return $this->_url ?: parent::getURI(); + } +} diff --git a/bridges/GoogleSearchBridge.php b/bridges/GoogleSearchBridge.php new file mode 100644 index 0000000..2c4a5f1 --- /dev/null +++ b/bridges/GoogleSearchBridge.php @@ -0,0 +1,64 @@ +<?php +/** +* Returns the 100 most recent links in results in past year, sorting by date (most recent first). +* Example: +* http://www.google.com/search?q=sebsauvage&num=100&complete=0&tbs=qdr:y,sbd:1 +* complete=0&num=100 : get 100 results +* qdr:y : in past year +* sbd:1 : sort by date (will only work if qdr: is specified) +*/ +class GoogleSearchBridge extends BridgeAbstract { + + const MAINTAINER = 'sebsauvage'; + const NAME = 'Google search'; + const URI = 'https://www.google.com/'; + const CACHE_TIMEOUT = 1800; // 30min + const DESCRIPTION = 'Returns most recent results from Google search.'; + + const PARAMETERS = array(array( + 'q' => array( + 'name' => "keyword", + 'required' => true + ) + )); + + public function collectData(){ + $html = ''; + + $html = getSimpleHTMLDOM(self::URI + . 'search?q=' + . urlencode($this->getInput('q')) + .'&num=100&complete=0&tbs=qdr:y,sbd:1') + or returnServerError('No results for this query.'); + + $emIsRes = $html->find('div[id=ires]', 0); + + if(!is_null($emIsRes)) { + foreach($emIsRes->find('div[class=g]') as $element) { + + $item = array(); + + // Extract direct URL from google href (eg. /url?q=...) + $t = $element->find('a[href]', 0)->href; + $item['uri'] = '' . $t; + parse_str(parse_url($t, PHP_URL_QUERY), $parameters); + if(isset($parameters['q'])) { + $item['uri'] = $parameters['q']; + } + + $item['title'] = $element->find('h3', 0)->plaintext; + $item['content'] = $element->find('span[class=st]', 0)->plaintext; + + $this->items[] = $item; + } + } + } + + public function getName(){ + if(!is_null($this->getInput('q'))) { + return $this->getInput('q') . ' - Google search'; + } + + return parent::getName(); + } +} diff --git a/bridges/HDWallpapersBridge.php b/bridges/HDWallpapersBridge.php new file mode 100644 index 0000000..cea6e34 --- /dev/null +++ b/bridges/HDWallpapersBridge.php @@ -0,0 +1,83 @@ +<?php +class HDWallpapersBridge extends BridgeAbstract { + const MAINTAINER = 'nel50n'; + const NAME = 'HD Wallpapers Bridge'; + const URI = 'http://www.hdwallpapers.in/'; + const CACHE_TIMEOUT = 43200; //12h + const DESCRIPTION = 'Returns the latests wallpapers from HDWallpapers'; + + const PARAMETERS = array( array( + 'c' => array( + 'name' => 'category', + 'defaultValue' => 'latest_wallpapers' + ), + 'm' => array( + 'name' => 'max number of wallpapers' + ), + 'r' => array( + 'name' => 'resolution', + 'defaultValue' => '1920x1200', + 'exampleValue' => '1920x1200, 1680x1050,…' + ) + )); + + public function collectData(){ + $category = $this->category; + if(strrpos($category, 'wallpapers') !== strlen($category) - strlen('wallpapers')) { + $category .= '-desktop-wallpapers'; + } + + $num = 0; + $max = $this->getInput('m') ?: 14; + $lastpage = 1; + + for($page = 1; $page <= $lastpage; $page++) { + $link = self::URI . '/' . $category . '/page/' . $page; + $html = getSimpleHTMLDOM($link) + or returnServerError('No results for this query.'); + + if($page === 1) { + preg_match('/page\/(\d+)$/', $html->find('.pagination a', -2)->href, $matches); + $lastpage = min($matches[1], ceil($max / 14)); + } + + foreach($html->find('.wallpapers .wall a') as $element) { + $thumbnail = $element->find('img', 0); + + $item = array(); + // http://www.hdwallpapers.in/download/yosemite_reflections-1680x1050.jpg + $item['uri'] = self::URI + . '/download' + . str_replace('wallpapers.html', $this->getInput('r') . '.jpg', $element->href); + + $item['timestamp'] = time(); + $item['title'] = $element->find('p', 0)->text(); + $item['content'] = $item['title'] + . '<br><a href="' + . $item['uri'] + . '"><img src="' + . self::URI + . $thumbnail->src + . '" /></a>'; + + $this->items[] = $item; + + $num++; + if ($num >= $max) + break 2; + } + } + } + + public function getName(){ + if(!is_null($this->getInput('c')) && !is_null($this->getInput('r'))) { + return 'HDWallpapers - ' + . str_replace(['__', '_'], [' & ', ' '], $this->getInput('c')) + . ' [' + . $this->getInput('r') + . ']'; + } + + return parent::getName(); + } +} diff --git a/bridges/HentaiHavenBridge.php b/bridges/HentaiHavenBridge.php new file mode 100644 index 0000000..21a0ff5 --- /dev/null +++ b/bridges/HentaiHavenBridge.php @@ -0,0 +1,37 @@ +<?php +class HentaiHavenBridge extends BridgeAbstract { + + const MAINTAINER = 'albirew'; + const NAME = 'Hentai Haven'; + const URI = 'http://hentaihaven.org/'; + const CACHE_TIMEOUT = 21600; // 6h + const DESCRIPTION = 'Returns releases from Hentai Haven'; + + public function collectData(){ + $html = getSimpleHTMLDOM(self::URI) + or returnServerError('Could not request Hentai Haven.'); + + foreach($html->find('div.zoe-grid') as $element) { + $item = array(); + $item['uri'] = $element->find('div.brick-content h3 a', 0)->href; + $thumbnailUri = $element->find('a.thumbnail-image img', 0)->getAttribute('data-src'); + $item['title'] = mb_convert_encoding( + trim($element->find('div.brick-content h3 a', 0)->innertext), + 'UTF-8', + 'HTML-ENTITIES' + ); + + $item['tags'] = $element->find('div.oFlyout_bg div.oFlyout div.flyoutContent span.tags', 0)->plaintext; + $item['content'] = 'Tags: ' + . $item['tags'] + . '<br><br><a href="' + . $item['uri'] + . '"><img width="300" height="169" src="' + . $thumbnailUri + . '" /></a><br>' + . $element->find('div.oFlyout_bg div.oFlyout div.flyoutContent p.description', 0)->innertext; + + $this->items[] = $item; + } + } +} diff --git a/bridges/IdenticaBridge.php b/bridges/IdenticaBridge.php new file mode 100644 index 0000000..ef52998 --- /dev/null +++ b/bridges/IdenticaBridge.php @@ -0,0 +1,52 @@ +<?php +class IdenticaBridge extends BridgeAbstract { + + const MAINTAINER = 'mitsukarenai'; + const NAME = 'Identica Bridge'; + const URI = 'https://identi.ca/'; + const CACHE_TIMEOUT = 300; // 5min + const DESCRIPTION = 'Returns user timelines'; + + const PARAMETERS = array( array( + 'u' => array( + 'name' => 'username', + 'required' => true + ) + )); + + public function collectData(){ + $html = getSimpleHTMLDOM($this->getURI()) + or returnServerError('Requested username can\'t be found.'); + + foreach($html->find('li.major') as $dent) { + $item = array(); + + // get dent link + $item['uri'] = html_entity_decode($dent->find('a', 0)->href); + + // extract dent timestamp + $item['timestamp'] = strtotime($dent->find('abbr.easydate', 0)->plaintext); + + // extract dent text + $item['content'] = trim($dent->find('div.activity-content', 0)->innertext); + $item['title'] = $this->getInput('u') . ' | ' . $item['content']; + $this->items[] = $item; + } + } + + public function getName(){ + if(!is_null($this->getInput('u'))) { + return $this->getInput('u') . ' - Identica Bridge'; + } + + return parent::getName(); + } + + public function getURI(){ + if(!is_null($this->getInput('u'))) { + return self::URI . urlencode($this->getInput('u')); + } + + return parent::getURI(); + } +} diff --git a/bridges/InstagramBridge.php b/bridges/InstagramBridge.php new file mode 100644 index 0000000..7ae6a45 --- /dev/null +++ b/bridges/InstagramBridge.php @@ -0,0 +1,93 @@ +<?php +class InstagramBridge extends BridgeAbstract { + + const MAINTAINER = 'pauder'; + const NAME = 'Instagram Bridge'; + const URI = 'https://instagram.com/'; + const DESCRIPTION = 'Returns the newest images'; + + const PARAMETERS = array( array( + 'u' => array( + 'name' => 'username', + 'required' => true + ), + 'media_type' => array( + 'name' => 'Media type', + 'type' => 'list', + 'required' => false, + 'values' => array( + 'Both' => 'all', + 'Video' => 'video', + 'Picture' => 'picture' + ), + 'defaultValue' => 'all' + ) + )); + + public function collectData(){ + $html = getSimpleHTMLDOM($this->getURI()) + or returnServerError('Could not request Instagram.'); + + $innertext = null; + + foreach($html->find('script') as $script) { + if('' === $script->innertext) { + continue; + } + + $pos = strpos(trim($script->innertext), 'window._sharedData'); + if(0 !== $pos) { + continue; + } + + $innertext = $script->innertext; + break; + } + + $json = trim(substr($innertext, $pos + 18), ' =;'); + $data = json_decode($json); + + $userMedia = $data->entry_data->ProfilePage[0]->user->media->nodes; + + foreach($userMedia as $media) { + // Check media type + switch($this->getInput('media_type')) { + case 'all': break; + case 'video': + if($media->is_video === false) continue 2; + break; + case 'picture': + if($media->is_video === true) continue 2; + break; + default: break; + } + + $item = array(); + $item['uri'] = self::URI . 'p/' . $media->code . '/'; + $item['content'] = '<img src="' . htmlentities($media->display_src) . '" />'; + if (isset($media->caption)) { + $item['title'] = $media->caption; + } else { + $item['title'] = basename($media->display_src); + } + $item['timestamp'] = $media->date; + $this->items[] = $item; + } + } + + public function getName(){ + if(!is_null($this->getInput('u'))) { + return $this->getInput('u') . ' - Instagram Bridge'; + } + + return parent::getName(); + } + + public function getURI(){ + if(!is_null($this->getInput('u'))) { + return self::URI . urlencode($this->getInput('u')); + } + + return parent::getURI(); + } +} diff --git a/bridges/IsoHuntBridge.php b/bridges/IsoHuntBridge.php new file mode 100644 index 0000000..57038fc --- /dev/null +++ b/bridges/IsoHuntBridge.php @@ -0,0 +1,465 @@ +<?php +class IsoHuntBridge extends BridgeAbstract { + const MAINTAINER = 'logmanoriginal'; + const NAME = 'isoHunt Bridge'; + const URI = 'https://isohunt.to/'; + const CACHE_TIMEOUT = 300; //5min + const DESCRIPTION = 'Returns the latest results by category or search result'; + + const PARAMETERS = array( + /* + * Get feeds for one of the "latest" categories + * Notice: The categories "News" and "Top Searches" are received from the main page + * Elements are sorted by name ascending! + */ + 'By "Latest" category' => array( + 'latest_category' => array( + 'name' => 'Latest category', + 'type' => 'list', + 'required' => true, + 'title' => 'Select your category', + 'defaultValue' => 'news', + 'values' => array( + 'Hot Torrents' => 'hot_torrents', + 'News' => 'news', + 'Releases' => 'releases', + 'Torrents' => 'torrents' + ) + ) + ), + + /* + * Get feeds for one of the "torrent" categories + * Make sure to add new categories also to get_torrent_category_index($)! + * Elements are sorted by name ascending! + */ + 'By "Torrent" category' => array( + 'torrent_category' => array( + 'name' => 'Torrent category', + 'type' => 'list', + 'required' => true, + 'title' => 'Select your category', + 'defaultValue' => 'anime', + 'values' => array( + 'Adult' => 'adult', + 'Anime' => 'anime', + 'Books' => 'books', + 'Games' => 'games', + 'Movies' => 'movies', + 'Music' => 'music', + 'Other' => 'other', + 'Series & TV' => 'series_tv', + 'Software' => 'software' + ) + ), + 'torrent_popularity' => array( + 'name' => 'Sort by popularity', + 'type' => 'checkbox', + 'title' => 'Activate to receive results by popularity' + ) + ), + + /* + * Get feeds for a specific search request + */ + 'Search torrent by name' => array( + 'search_name' => array( + 'name' => 'Name', + 'required' => true, + 'title' => 'Insert your search query', + 'exampleValue' => 'Bridge' + ), + 'search_category' => array( + 'name' => 'Category', + 'type' => 'list', + 'title' => 'Select your category', + 'defaultValue' => 'all', + 'values' => array( + 'Adult' => 'adult', + 'All' => 'all', + 'Anime' => 'anime', + 'Books' => 'books', + 'Games' => 'games', + 'Movies' => 'movies', + 'Music' => 'music', + 'Other' => 'other', + 'Series & TV' => 'series_tv', + 'Software' => 'software' + ) + ) + ) + ); + + public function getURI(){ + $uri = self::URI; + switch($this->queriedContext) { + case 'By "Latest" category': + switch($this->getInput('latest_category')) { + case 'hot_torrents': + $uri .= 'statistic/hot/torrents'; + break; + case 'news': + break; + case 'releases': + $uri .= 'releases.php'; + break; + case 'torrents': + $uri .= 'latest.php'; + break; + } + break; + case 'By "Torrent" category': + $uri .= $this->buildCategoryUri( + $this->getInput('torrent_category'), + $this->getInput('torrent_popularity') + ); + break; + case 'Search torrent by name': + $category = $this->getInput('search_category'); + $uri .= $this->buildCategoryUri($category); + if($category !== 'movies') + $uri .= '&ihq=' . urlencode($this->getInput('search_name')); + break; + + default: parent::getURI(); + } + + return $uri; + } + + public function getName(){ + switch($this->queriedContext) { + case 'By "Latest" category': + $categoryName = array_search( + $this->getInput('latest_category'), + self::PARAMETERS['By "Latest" category']['latest_category']['values'] + ); + $name = 'Latest ' . $categoryName . ' - ' . self::NAME; + break; + case 'By "Torrent" category': + $categoryName = array_search( + $this->getInput('torrent_category'), + self::PARAMETERS['By "Torrent" category']['torrent_category']['values'] + ); + $name = 'Category: ' . $categoryName . ' - ' . self::NAME; + break; + case 'Search torrent by name': + $categoryName = array_search( + $this->getInput('search_category'), + self::PARAMETERS['Search torrent by name']['search_category']['values'] + ); + $name = 'Search: "' + . $this->getInput('search_name') + . '" in category: ' + . $categoryName . ' - ' + . self::NAME; + break; + default: return parent::getName(); + } + + return $name; + } + + public function collectData(){ + $html = $this->loadHtml($this->getURI()); + + switch($this->queriedContext) { + case 'By "Latest" category': + switch($this->getInput('latest_category')) { + case 'hot_torrents': + $this->getLatestHotTorrents($html); + break; + case 'news': + $this->getLatestNews($html); + break; + case 'releases': + case 'torrents': + $this->getLatestTorrents($html); + break; + } + break; + case 'By "Torrent" category': + if($this->getInput('torrent_category') === 'movies') { + // This one is special (content wise) + $this->getMovieTorrents($html); + } else { + $this->getLatestTorrents($html); + } + break; + case 'Search torrent by name': + if($this->getInput('search_category') === 'movies') { + // This one is special (content wise) + $this->getMovieTorrents($html); + } else { + $this->getLatestTorrents($html); + } + break; + } + } + + #region Helper functions for "Movie Torrents" + + private function getMovieTorrents($html){ + $container = $html->find('div#w0', 0); + if(!$container) + returnServerError('Unable to find torrent container!'); + + $torrents = $container->find('article'); + if(!$torrents) + returnServerError('Unable to find torrents!'); + + foreach($torrents as $torrent) { + + $anchor = $torrent->find('a', 0); + if(!$anchor) + returnServerError('Unable to find anchor!'); + + $date = $torrent->find('small', 0); + if(!$date) + returnServerError('Unable to find date!'); + + $item = array(); + + $item['uri'] = $this->fixRelativeUri($anchor->href); + $item['title'] = $anchor->title; + // $item['author'] = + $item['timestamp'] = strtotime($date->plaintext); + $item['content'] = $this->fixRelativeUri($torrent->innertext); + + $this->items[] = $item; + } + } + + #endregion + + #region Helper functions for "Latest Hot Torrents" + + private function getLatestHotTorrents($html){ + $container = $html->find('div#serps', 0); + if(!$container) + returnServerError('Unable to find torrent container!'); + + $torrents = $container->find('tr'); + if(!$torrents) + returnServerError('Unable to find torrents!'); + + // Remove first element (header row) + $torrents = array_slice($torrents, 1); + + foreach($torrents as $torrent) { + + $cell = $torrent->find('td', 0); + if(!$cell) + returnServerError('Unable to find cell!'); + + $element = $cell->find('a', 0); + if(!$element) + returnServerError('Unable to find element!'); + + $item = array(); + + $item['uri'] = $element->href; + $item['title'] = $element->plaintext; + // $item['author'] = + // $item['timestamp'] = + // $item['content'] = + + $this->items[] = $item; + } + } + + #endregion + + #region Helper functions for "Latest News" + + private function getLatestNews($html){ + $container = $html->find('div#postcontainer', 0); + if(!$container) + returnServerError('Unable to find post container!'); + + $posts = $container->find('div.index-post'); + if(!$posts) + returnServerError('Unable to find posts!'); + + foreach($posts as $post) { + $item = array(); + + $item['uri'] = $this->latestNewsExtractUri($post); + $item['title'] = $this->latestNewsExtractTitle($post); + $item['author'] = $this->latestNewsExtractAuthor($post); + $item['timestamp'] = $this->latestNewsExtractTimestamp($post); + $item['content'] = $this->latestNewsExtractContent($post); + + $this->items[] = $item; + } + } + + private function latestNewsExtractAuthor($post){ + $author = $post->find('small', 0); + if(!$author) + returnServerError('Unable to find author!'); + + // The author is hidden within a string like: 'Posted by {author} on {date}' + preg_match('/Posted\sby\s(.*)\son/i', $author->innertext, $matches); + + return $matches[1]; + } + + private function latestNewsExtractTimestamp($post){ + $date = $post->find('small', 0); + if(!$date) + returnServerError('Unable to find date!'); + + // The date is hidden within a string like: 'Posted by {author} on {date}' + preg_match('/Posted\sby\s.*\son\s(.*)/i', $date->innertext, $matches); + + $timestamp = strtotime($matches[1]); + + // Make sure date is not in the future (dates are given like 'Nov. 20' without year) + if($timestamp > time()) { + $timestamp = strtotime('-1 year', $timestamp); + } + + return $timestamp; + } + + private function latestNewsExtractTitle($post){ + $title = $post->find('a', 0); + if(!$title) + returnServerError('Unable to find title!'); + + return $title->plaintext; + } + + private function latestNewsExtractUri($post){ + $uri = $post->find('a', 0); + if(!$uri) + returnServerError('Unable to find uri!'); + + return $uri->href; + } + + private function latestNewsExtractContent($post){ + $content = $post->find('div', 0); + if(!$content) + returnServerError('Unable to find content!'); + + // Remove <h2>...</h2> (title) + foreach($content->find('h2') as $element) { + $element->outertext = ''; + } + + // Remove <small>...</small> (author) + foreach($content->find('small') as $element) { + $element->outertext = ''; + } + + return $content->innertext; + } + + #endregion + + #region Helper functions for "Latest Torrents", "Latest Releases" and "Torrent Category" + + private function getLatestTorrents($html){ + $container = $html->find('div#serps', 0); + if(!$container) + returnServerError('Unable to find torrent container!'); + + $torrents = $container->find('tr[data-key]'); + if(!$torrents) + returnServerError('Unable to find torrents!'); + + foreach($torrents as $torrent) { + $item = array(); + + $item['uri'] = $this->latestTorrentsExtractUri($torrent); + $item['title'] = $this->latestTorrentsExtractTitle($torrent); + $item['author'] = $this->latestTorrentsExtractAuthor($torrent); + $item['timestamp'] = $this->latestTorrentsExtractTimestamp($torrent); + $item['content'] = ''; // There is no valuable content + + $this->items[] = $item; + } + } + + private function latestTorrentsExtractTitle($torrent){ + $cell = $torrent->find('td.title-row', 0); + if(!$cell) + returnServerError('Unable to find title cell!'); + + $title = $cell->find('span', 0); + if(!$title) + returnServerError('Unable to find title!'); + + return $title->plaintext; + } + + private function latestTorrentsExtractUri($torrent){ + $cell = $torrent->find('td.title-row', 0); + if(!$cell) + returnServerError('Unable to find title cell!'); + + $uri = $cell->find('a', 0); + if(!$uri) + returnServerError('Unable to find uri!'); + + return $this->fixRelativeUri($uri->href); + } + + private function latestTorrentsExtractAuthor($torrent){ + $cell = $torrent->find('td.user-row', 0); + if(!$cell) + return; // No author + + $user = $cell->find('a', 0); + if(!$user) + returnServerError('Unable to find user!'); + + return $user->plaintext; + } + + private function latestTorrentsExtractTimestamp($torrent){ + $cell = $torrent->find('td.date-row', 0); + if(!$cell) + returnServerError('Unable to find date cell!'); + + return strtotime('-' . $cell->plaintext, time()); + } + + #endregion + + #region Generic helper functions + + private function loadHtml($uri){ + $html = getSimpleHTMLDOM($uri); + if(!$html) + returnServerError('Unable to load ' . $uri . '!'); + + return $html; + } + + private function fixRelativeUri($uri){ + return preg_replace('/\//i', self::URI, $uri, 1); + } + + private function buildCategoryUri($category, $order_popularity = false){ + switch($category) { + case 'anime': $index = 1; break; + case 'software' : $index = 2; break; + case 'games' : $index = 3; break; + case 'adult' : $index = 4; break; + case 'movies' : $index = 5; break; + case 'music' : $index = 6; break; + case 'other' : $index = 7; break; + case 'series_tv' : $index = 8; break; + case 'books': $index = 9; break; + case 'all': + default: $index = 0; break; + } + + return 'torrents/?iht=' . $index . '&ihs=' . ($order_popularity ? 1 : 0) . '&age=0'; + } + + #endregion +} diff --git a/bridges/JapanExpoBridge.php b/bridges/JapanExpoBridge.php new file mode 100644 index 0000000..c80bb24 --- /dev/null +++ b/bridges/JapanExpoBridge.php @@ -0,0 +1,100 @@ +<?php +class JapanExpoBridge extends BridgeAbstract { + + const MAINTAINER = 'Ginko'; + const NAME = 'Japan Expo Actualités'; + const URI = 'http://www.japan-expo-paris.com/fr/actualites'; + const CACHE_TIMEOUT = 14400; // 4h + const DESCRIPTION = 'Returns most recent entries from Japan Expo actualités.'; + const PARAMETERS = array( array( + 'mode' => array( + 'name' => 'Show full contents', + 'type' => 'checkbox', + ) + )); + + public function collectData(){ + + function frenchPubDateToTimestamp($date_to_parse) { + return strtotime( + strtr( + strtolower(str_replace('Publié le ', '', $date_to_parse)), + array( + 'janvier' => 'jan', + 'février' => 'feb', + 'mars' => 'march', + 'avril' => 'apr', + 'mai' => 'may', + 'juin' => 'jun', + 'juillet' => 'jul', + 'août' => 'aug', + 'septembre' => 'sep', + 'octobre' => 'oct', + 'novembre' => 'nov', + 'décembre' => 'dec' + ) + ) + ); + } + + $convert_article_images = function($matches){ + if(is_array($matches) && count($matches) > 1) { + return '<img src="' . $matches[1] . '" />'; + } + }; + + $html = getSimpleHTMLDOM(self::URI) + or returnServerError('Could not request JapanExpo: ' . self::URI); + $fullcontent = $this->getInput('mode'); + $count = 0; + + foreach($html->find('a._tile2') as $element) { + + $url = $element->href; + $thumbnail = 'http://s.japan-expo.com/katana/images/JES049/paris.png'; + preg_match('/url\(([^)]+)\)/', $element->find('img.rspvimgset', 0)->style, $img_search_result); + + if(count($img_search_result) >= 2) + $thumbnail = trim($img_search_result[1], "'"); + + if($fullcontent) { + if($count >= 5) { + break; + } + + $article_html = getSimpleHTMLDOMCached('Could not request JapanExpo: ' . $url); + $header = $article_html->find('header.pageHeadBox', 0); + $timestamp = strtotime($header->find('time', 0)->datetime); + $title_html = $header->find('div.section', 0)->next_sibling(); + $title = $title_html->plaintext; + $headings = $title_html->next_sibling()->outertext; + $article = $article_html->find('div.content', 0)->innertext; + $article = preg_replace_callback( + '/<img [^>]+ style="[^\(]+\(\'([^\']+)\'[^>]+>/i', + $convert_article_images, + $article); + + $content = $headings . $article; + } else { + $date_text = $element->find('span.date', 0)->plaintext; + $timestamp = frenchPubDateToTimestamp($date_text); + $title = trim($element->find('span._title', 0)->plaintext); + $content = '<img src="' + . $thumbnail + . '"></img><br />' + . $date_text + . '<br /><a href="' + . $url + . '">Lire l\'article</a>'; + } + + $item = array(); + $item['uri'] = $url; + $item['title'] = $title; + $item['timestamp'] = $timestamp; + $item['content'] = $content; + $this->items[] = $item; + $count++; + } + } +} diff --git a/bridges/KATBridge.php b/bridges/KATBridge.php new file mode 100644 index 0000000..c4325a6 --- /dev/null +++ b/bridges/KATBridge.php @@ -0,0 +1,123 @@ +<?php +class KATBridge extends BridgeAbstract { + const MAINTAINER = 'niawag'; + const NAME = 'KickassTorrents'; + const URI = 'https://katcr.co/new/'; + const DESCRIPTION = 'Returns results for the keywords. You can put several + list of keywords by separating them with a semicolon (e.g. "one show;another + show"). Category based search needs the category number as input. User based + search takes the Uploader ID: see KAT URL for user feed. Search can be done in a specified category'; + + const PARAMETERS = array( array( + 'q' => array( + 'name' => 'keywords, separated by semicolons', + 'exampleValue' => 'first list;second list;…', + 'required' => true + ), + 'crit' => array( + 'type' => 'list', + 'name' => 'Search type', + 'values' => array( + 'search' => 'search', + 'category' => 'cat', + 'user' => 'usr' + ) + ), + 'cat_check' => array( + 'type' => 'checkbox', + 'name' => 'Specify category for normal search ?', + ), + 'cat' => array( + 'name' => 'Category number', + 'exampleValue' => '100, 200… See KAT for category number' + ), + 'trusted' => array( + 'type' => 'checkbox', + 'name' => 'Only get results from Elite or Verified uploaders ?', + ), + )); + public function collectData(){ + function parseDateTimestamp($element){ + $guessedDate = strptime($element, '%d-%m-%Y %H:%M:%S'); + $timestamp = mktime( + $guessedDate['tm_hour'], + $guessedDate['tm_min'], + $guessedDate['tm_sec'], + $guessedDate['tm_mon'] + 1, + $guessedDate['tm_mday'], + $guessedDate['tm_year'] + 1900); + return $timestamp; + } + $catBool = $this->getInput('cat_check'); + if($catBool) { + $catNum = $this->getInput('cat'); + } + $critList = $this->getInput('crit'); + $trustedBool = $this->getInput('trusted'); + $keywordsList = explode(';', $this->getInput('q')); + foreach($keywordsList as $keywords) { + switch($critList) { + case 'search': + if($catBool == false) { + $html = getSimpleHTMLDOM( + self::URI . + 'torrents-search.php?search=' . + rawurlencode($keywords) + ) or returnServerError('Could not request KAT.'); + } else { + $html = getSimpleHTMLDOM( + self::URI . + 'torrents-search.php?search=' . + rawurlencode($keywords) . + '&cat=' . + rawurlencode($catNum) + ) or returnServerError('Could not request KAT.'); + } + break; + case 'cat': + $html = getSimpleHTMLDOM( + self::URI . + 'torrents.php?cat=' . + rawurlencode($keywords) + ) or returnServerError('Could not request KAT.'); + break; + case 'usr': + $html = getSimpleHTMLDOM( + self::URI . + 'account-details.php?id=' . + rawurlencode($keywords) + ) or returnServerError('Could not request KAT.'); + break; + } + if ($html->find('table.ttable_headinner', 0) == false) + returnServerError('No result for query ' . $keywords); + foreach($html->find('tr.t-row') as $element) { + if(!$trustedBool + || !is_null($element->find('i[title="Elite Uploader"]', 0)) + || !is_null($element->find('i[title="Verified Uploader"]', 0))) { + $item = array(); + $item['uri'] = self::URI . $element->find('a', 2)->href; + $item['id'] = self::URI . $element->find('a.cellMainLink', 0)->href; + $item['timestamp'] = parseDateTimestamp($element->find('td', 2)->plaintext); + $item['author'] = $element->find('a.plain', 0)->plaintext; + $item['title'] = $element->find('a.cellMainLink', 0)->plaintext; + $item['seeders'] = (int)$element->find('td', 3)->plaintext; + $item['leechers'] = (int)$element->find('td', 4)->plaintext; + $item['size'] = $element->find('td', 1)->plaintext; + $item['content'] = $item['title'] + . '<br>size: ' + . $item['size'] + . '<br>seeders: ' + . $item['seeders'] + . ' | leechers: ' + . $item['leechers'] + . '<br><a href="' + . $item['id'] + . '">info page</a>'; + if(isset($item['title'])) + $this->items[] = $item; + } + } + } + } +} diff --git a/bridges/KernelBugTrackerBridge.php b/bridges/KernelBugTrackerBridge.php new file mode 100644 index 0000000..567ee50 --- /dev/null +++ b/bridges/KernelBugTrackerBridge.php @@ -0,0 +1,152 @@ +<?php +class KernelBugTrackerBridge extends BridgeAbstract { + + const NAME = 'Kernel Bug Tracker'; + const URI = 'https://bugzilla.kernel.org'; + const DESCRIPTION = 'Returns feeds for bug comments'; + const MAINTAINER = 'logmanoriginal'; + const PARAMETERS = array( + 'Bug comments' => array( + 'id' => array( + 'name' => 'Bug tracking ID', + 'type' => 'number', + 'required' => true, + 'title' => 'Insert bug tracking ID', + 'exampleValue' => 121241 + ), + 'limit' => array( + 'name' => 'Number of comments to return', + 'type' => 'number', + 'required' => false, + 'title' => 'Specify number of comments to return', + 'defaultValue' => -1 + ), + 'sorting' => array( + 'name' => 'Sorting', + 'type' => 'list', + 'required' => false, + 'title' => 'Defines the sorting order of the comments returned', + 'defaultValue' => 'of', + 'values' => array( + 'Oldest first' => 'of', + 'Latest first' => 'lf' + ) + ) + ) + ); + + private $bugid = ''; + private $bugdesc = ''; + + public function collectData(){ + $limit = $this->getInput('limit'); + $sorting = $this->getInput('sorting'); + + // We use the print preview page for simplicity + $html = getSimpleHTMLDOMCached($this->getURI() . '&format=multiple', + 86400, + false, + null, + 0, + null, + true, + true, + DEFAULT_TARGET_CHARSET, + false, // Do NOT remove line breaks + DEFAULT_BR_TEXT, + DEFAULT_SPAN_TEXT); + + if($html === false) + returnServerError('Failed to load page!'); + + // Store header information into private members + $this->bugid = $html->find('#bugzilla-body', 0)->find('a', 0)->innertext; + $this->bugdesc = $html->find('table.bugfields', 0)->find('tr', 0)->find('td', 0)->innertext; + + // Get and limit comments + $comments = $html->find('div.bz_comment'); + + if($limit > 0 && count($comments) > $limit) { + $comments = array_slice($comments, count($comments) - $limit, $limit); + } + + // Order comments + switch($sorting) { + case 'lf': $comments = array_reverse($comments, true); + case 'of': + default: // Nothing to do, keep original order + } + + foreach($comments as $comment) { + $comment = $this->inlineStyles($comment); + + $item = array(); + $item['uri'] = $this->getURI() . '#' . $comment->id; + $item['author'] = $comment->find('span.bz_comment_user', 0)->innertext; + $item['title'] = $comment->find('span.bz_comment_number', 0)->find('a', 0)->innertext; + $item['timestamp'] = strtotime($comment->find('span.bz_comment_time', 0)->innertext); + $item['content'] = $comment->find('pre.bz_comment_text', 0)->innertext; + + // Fix line breaks (they use LF) + $item['content'] = str_replace("\n", '<br>', $item['content']); + + // Fix relative URIs + $item['content'] = $this->replaceRelativeURI($item['content']); + + $this->items[] = $item; + } + + } + + public function getURI(){ + switch($this->queriedContext) { + case 'Bug comments': + return parent::getURI() + . '/show_bug.cgi?id=' + . $this->getInput('id'); + break; + default: return parent::getURI(); + } + } + + public function getName(){ + switch($this->queriedContext) { + case 'Bug comments': + return 'Bug ' + . $this->bugid + . ' tracker for ' + . $this->bugdesc + . ' - ' + . parent::getName(); + break; + default: return parent::getName(); + } + } + + /** + * Replaces all relative URIs with absolute ones + * + * @param string $content The source string + * @return string Returns the source string with all relative URIs replaced + * by absolute ones. + */ + private function replaceRelativeURI($content){ + return preg_replace('/href="(?!http)/', 'href="' . self::URI . '/', $content); + } + + /** + * Adds styles as attributes to tags with known classes + * + * @param object $html A simplehtmldom object + * @return object Returns the original object with styles added as + * attributes. + */ + private function inlineStyles($html){ + foreach($html->find('.bz_obsolete') as $element) { + $element->style = 'text-decoration:line-through;'; + } + + return $html; + } + +} diff --git a/bridges/KonachanBridge.php b/bridges/KonachanBridge.php new file mode 100644 index 0000000..4250e8b --- /dev/null +++ b/bridges/KonachanBridge.php @@ -0,0 +1,11 @@ +<?php +require_once('MoebooruBridge.php'); + +class KonachanBridge extends MoebooruBridge { + + const MAINTAINER = 'mitsukarenai'; + const NAME = 'Konachan'; + const URI = 'http://konachan.com/'; + const DESCRIPTION = 'Returns images from given page'; + +} diff --git a/bridges/KoreusBridge.php b/bridges/KoreusBridge.php new file mode 100644 index 0000000..a5e09cb --- /dev/null +++ b/bridges/KoreusBridge.php @@ -0,0 +1,22 @@ +<?php +class KoreusBridge extends FeedExpander { + + const MAINTAINER = 'pit-fgfjiudghdf'; + const NAME = 'Koreus'; + const URI = 'http://www.koreus.com/'; + const DESCRIPTION = 'Returns the newest posts from Koreus (full text)'; + + protected function parseItem($item){ + $item = parent::parseItem($item); + + $html = getSimpleHTMLDOMCached($item['uri']); + $text = $html->find('p.itemText', 0)->innertext; + $item['content'] = utf8_encode($text); + + return $item; + } + + public function collectData(){ + $this->collectExpandableDatas('http://feeds.feedburner.com/Koreus-articles'); + } +} diff --git a/bridges/KununuBridge.php b/bridges/KununuBridge.php new file mode 100644 index 0000000..e99e135 --- /dev/null +++ b/bridges/KununuBridge.php @@ -0,0 +1,249 @@ +<?php +class KununuBridge extends BridgeAbstract { + const MAINTAINER = 'logmanoriginal'; + const NAME = 'Kununu Bridge'; + const URI = 'https://www.kununu.com/'; + const CACHE_TIMEOUT = 86400; // 24h + const DESCRIPTION = 'Returns the latest reviews for a company and site of your choice.'; + + const PARAMETERS = array( + 'global' => array( + 'site' => array( + 'name' => 'Site', + 'type' => 'list', + 'required' => true, + 'title' => 'Select your site', + 'values' => array( + 'Austria' => 'at', + 'Germany' => 'de', + 'Switzerland' => 'ch', + 'United States' => 'us' + ) + ), + 'full' => array( + 'name' => 'Load full article', + 'type' => 'checkbox', + 'required' => false, + 'exampleValue' => 'checked', + 'title' => 'Activate to load full article' + ) + ), + array( + 'company' => array( + 'name' => 'Company', + 'required' => true, + 'exampleValue' => 'kununu-us', + 'title' => 'Insert company name (i.e. Kununu US) or URI path (i.e. kununu-us)' + ) + ) + ); + + private $companyName = ''; + + public function getURI(){ + if(!is_null($this->getInput('company')) && !is_null($this->getInput('site'))) { + + $company = $this->fixCompanyName($this->getInput('company')); + $site = $this->getInput('site'); + $section = ''; + + switch($site) { + case 'at': + case 'de': + case 'ch': + $section = 'kommentare'; + break; + case 'us': + $section = 'reviews'; + break; + } + + return self::URI . $site . '/' . $company . '/' . $section; + } + + return parent::getURI(); + } + + function getName(){ + if(!is_null($this->getInput('company'))) { + $company = $this->fixCompanyName($this->getInput('company')); + return ($this->companyName ?: $company) . ' - ' . self::NAME; + } + + return parent::getName(); + } + + public function collectData(){ + $full = $this->getInput('full'); + + // Load page + $html = getSimpleHTMLDOMCached($this->getURI()); + if(!$html) + returnServerError('Unable to receive data from ' . $this->getURI() . '!'); + // Update name for this request + $this->companyName = $this->extractCompanyName($html); + + // Find the section with all the panels (reviews) + $section = $html->find('section.kununu-scroll-element', 0); + if($section === false) + returnServerError('Unable to find panel section!'); + + // Find all articles (within the panels) + $articles = $section->find('article'); + if($articles === false || empty($articles)) + returnServerError('Unable to find articles!'); + + // Go through all articles + foreach($articles as $article) { + $item = array(); + + $item['author'] = $this->extractArticleAuthorPosition($article); + $item['timestamp'] = $this->extractArticleDate($article); + $item['title'] = $this->extractArticleRating($article) + . ' : ' + . $this->extractArticleSummary($article); + + $item['uri'] = $this->extractArticleUri($article); + + if($full) + $item['content'] = $this->extractFullDescription($item['uri']); + else + $item['content'] = $this->extractArticleDescription($article); + + $this->items[] = $item; + } + } + + /** + * Fixes relative URLs in the given text + */ + private function fixUrl($text){ + return preg_replace('/href=(\'|\")\//i', 'href="'.self::URI, $text); + } + + /* + * Returns a fixed version of the provided company name + */ + private function fixCompanyName($company){ + $company = trim($company); + $company = str_replace(' ', '-', $company); + $company = strtolower($company); + return $this->encodeUmlauts($company); + } + + /** + * Encodes unmlauts in the given text + */ + private function encodeUmlauts($text){ + $umlauts = Array("/ä/","/ö/","/ü/","/Ä/","/Ö/","/Ü/","/ß/"); + $replace = Array("ae","oe","ue","Ae","Oe","Ue","ss"); + + return preg_replace($umlauts, $replace, $text); + } + + /** + * Returns the company name from the review html + */ + private function extractCompanyName($html){ + $company_name = $html->find('h1[itemprop=name]', 0); + if(is_null($company_name)) + returnServerError('Cannot find company name!'); + + return $company_name->plaintext; + } + + /** + * Returns the date from a given article + */ + private function extractArticleDate($article){ + // They conviniently provide a time attribute for us :) + $date = $article->find('meta[itemprop=dateCreated]', 0); + if(is_null($date)) + returnServerError('Cannot find article date!'); + + return strtotime($date->content); + } + + /** + * Returns the rating from a given article + */ + private function extractArticleRating($article){ + $rating = $article->find('span.rating', 0); + if(is_null($rating)) + returnServerError('Cannot find article rating!'); + + return $rating->getAttribute('aria-label'); + } + + /** + * Returns the summary from a given article + */ + private function extractArticleSummary($article){ + $summary = $article->find('[itemprop=name]', 0); + if(is_null($summary)) + returnServerError('Cannot find article summary!'); + + return strip_tags($summary->innertext); + } + + /** + * Returns the URI from a given article + */ + private function extractArticleUri($article){ + $anchor = $article->find('ku-company-review-more', 0); + if(is_null($anchor)) + returnServerError('Cannot find article URI!'); + + return self::URI . $anchor->{'review-url'}; + } + + /** + * Returns the position of the author from a given article + */ + private function extractArticleAuthorPosition($article){ + // We need to parse the user-content manually + $user_content = $article->find('div.user-content', 0); + if(is_null($user_content)) + returnServerError('Cannot find user content!'); + + // Go through all h2 elements to find index of required span (I know... it's stupid) + $author_position = 'Unknown'; + foreach($user_content->find('div') as $content) { + if(stristr(strtolower($content->plaintext), 'position')) { /* This works for at, ch, de, us */ + $author_position = $content->next_sibling()->plaintext; + break; + } + } + + return $author_position; + } + + /** + * Returns the description from a given article + */ + private function extractArticleDescription($article){ + $description = $article->find('[itemprop=reviewBody]', 0); + if(is_null($description)) + returnServerError('Cannot find article description!'); + + return $this->fixUrl($description->innertext); + } + + /** + * Returns the full description from a given uri + */ + private function extractFullDescription($uri){ + // Load full article + $html = getSimpleHTMLDOMCached($uri); + if($html === false) + returnServerError('Could not load full description!'); + + // Find the article + $article = $html->find('article', 0); + if(is_null($article)) + returnServerError('Cannot find article!'); + + // Luckily they use the same layout for the review overview and full article pages :) + return $this->extractArticleDescription($article); + } +} diff --git a/bridges/LWNprevBridge.php b/bridges/LWNprevBridge.php new file mode 100644 index 0000000..6d71c9d --- /dev/null +++ b/bridges/LWNprevBridge.php @@ -0,0 +1,265 @@ +<?php +class LWNprevBridge extends BridgeAbstract{ + const MAINTAINER = 'Pierre Mazière'; + const NAME = 'LWN Free Weekly Edition'; + const URI = 'https://lwn.net/'; + const CACHE_TIMEOUT = 604800; // 1 week + const DESCRIPTION = 'LWN Free Weekly Edition available one week late'; + + private $editionTimeStamp; + + function getURI(){ + return self::URI.'free/bigpage'; + } + + private function jumpToNextTag(&$node){ + while($node && $node->nodeType === XML_TEXT_NODE) { + $nextNode = $node->nextSibling; + if(!$nextNode) { + break; + } + $node = $nextNode; + } + } + + private function jumpToPreviousTag(&$node){ + while($node && $node->nodeType === XML_TEXT_NODE) { + $previousNode = $node->previousSibling; + if(!$previousNode) { + break; + } + $node = $previousNode; + } + } + + public function collectData(){ + // Because the LWN page is written in loose HTML and not XHTML, + // Simple HTML Dom is not accurate enough for the job + $content = getContents($this->getURI()) + or returnServerError('No results for LWNprev'); + + $contents = explode('<b>Page editor</b>', $content); + + foreach($contents as $content) { + if(strpos($content, '<html>') === false) { + $content = <<<EOD +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd"> +<html><head><title>LWN</title></head><body>{$content}</body></html> +EOD; + } else { + $content = $content.'</body></html>'; + } + + libxml_use_internal_errors(true); + $html = new DOMDocument(); + $html->loadHTML($content); + libxml_clear_errors(); + + $edition = $html->getElementsByTagName('h1'); + if($edition->length !== 0) { + $text = $edition->item(0)->textContent; + $this->editionTimeStamp = strtotime( + substr($text, strpos($text, 'for ') + strlen('for ')) + ); + } + + if(strpos($content, 'Cat1HL') === false) { + $items = $this->getFeatureContents($html); + } elseif(strpos($content, 'Cat3HL') === false) { + $items = $this->getBriefItems($html); + } else { + $items = $this->getAnnouncements($html); + } + + $this->items = array_merge($this->items, $items); + } + } + + private function getArticleContent(&$title){ + $link = $title->firstChild; + $this->jumpToNextTag($link); + $item['uri'] = self::URI; + if($link->nodeName === 'a') { + $item['uri'] .= $link->getAttribute('href'); + } + + $item['timestamp'] = $this->editionTimeStamp; + + $node = $title; + $content = ''; + $contentEnd = false; + while(!$contentEnd) { + $node = $node->nextSibling; + if(!$node || ( + $node->nodeType !== XML_TEXT_NODE && + $node->nodeName === 'h2' || ( + !is_null($node->attributes) && + !is_null($class = $node->attributes->getNamedItem('class')) && + in_array($class->nodeValue, array('Cat1HL','Cat2HL')) + ) + ) + ) { + $contentEnd = true; + } else { + $content .= $node->C14N(); + } + } + $item['content'] = $content; + return $item; + } + + private function getFeatureContents(&$html){ + $items = array(); + foreach($html->getElementsByTagName('h2') as $title) { + if($title->getAttribute('class') !== 'SummaryHL') { + continue; + } + + $item = array(); + + $author = $title->nextSibling; + $this->jumpToNextTag($author); + if($author->getAttribute('class') === 'FeatureByline') { + $item['author'] = $author->getElementsByTagName('b')->item(0)->textContent; + } else { + continue; + } + + $item['title'] = $title->textContent; + + $items[] = array_merge($item, $this->getArticleContent($title)); + } + return $items; + } + + private function getItemPrefix(&$cat, &$cats){ + $cat1 = ''; + $cat2 = ''; + $cat3 = ''; + switch($cat->getAttribute('class')) { + case 'Cat3HL': + $cat3 = $cat->textContent; + $cat = $cat->previousSibling; + $this->jumpToPreviousTag($cat); + $cats[2] = $cat3; + if($cat->getAttribute('class') !== 'Cat2HL') { + break; + } + case 'Cat2HL': + $cat2 = $cat->textContent; + $cat = $cat->previousSibling; + $this->jumpToPreviousTag($cat); + $cats[1] = $cat2; + if(empty($cat3)) { + $cats[2] = ''; + } + if($cat->getAttribute('class') !== 'Cat1HL') { + break; + } + case 'Cat1HL': + $cat1 = $cat->textContent; + $cats[0] = $cat1; + if(empty($cat3)) { + $cats[2] = ''; + } + if(empty($cat2)) { + $cats[1] = ''; + } + break; + default: + break; + } + + $prefix = ''; + if(!empty($cats[0])) { + $prefix .= '['.$cats[0].($cats[1] ? '/'.$cats[1] : '').'] '; + } + return $prefix; + } + + private function getAnnouncements(&$html){ + $items = array(); + $cats = array('','',''); + + foreach($html->getElementsByTagName('p') as $newsletters) { + if($newsletters->getAttribute('class') !== 'Cat3HL') { + continue; + } + + $item = array(); + + $item['uri'] = self::URI.'#'.microtime(true); + + $item['timestamp'] = $this->editionTimeStamp;//+$URICounter; + + $item['author'] = 'LWN'; + + $cat = $newsletters->previousSibling; + $this->jumpToPreviousTag($cat); + $prefix = $this->getItemPrefix($cat, $cats); + $item['title'] = $prefix.' '.$newsletters->textContent; + + $node = $newsletters; + $content = ''; + $contentEnd = false; + while(!$contentEnd) { + $node = $node->nextSibling; + if(!$node || ( + $node->nodeType !== XML_TEXT_NODE && ( + !is_null($node->attributes) && + !is_null($class = $node->attributes->getNamedItem('class')) && + in_array($class->nodeValue, array('Cat1HL','Cat2HL','Cat3HL')) + ) + ) + ) { + $contentEnd = true; + } else { + $content .= $node->C14N(); + } + } + $item['content'] = $content; + $items[] = $item; + } + + foreach($html->getElementsByTagName('h2') as $title) { + if($title->getAttribute('class') !== 'SummaryHL') { + continue; + } + + $item = array(); + + $cat = $title->previousSibling; + $this->jumpToPreviousTag($cat); + $cat = $cat->previousSibling; + $this->jumpToPreviousTag($cat); + $prefix = $this->getItemPrefix($cat, $cats); + $item['title'] = $prefix.' '.$title->textContent; + $items[] = array_merge($item, $this->getArticleContent($title)); + } + + return $items; + } + + private function getBriefItems(&$html){ + $items = array(); + $cats = array('','',''); + foreach($html->getElementsByTagName('h2') as $title) { + if($title->getAttribute('class') !== 'SummaryHL') { + continue; + } + + $item = array(); + + $cat = $title->previousSibling; + $this->jumpToPreviousTag($cat); + $cat = $cat->previousSibling; + $this->jumpToPreviousTag($cat); + $prefix = $this->getItemPrefix($cat, $cats); + $item['title'] = $prefix.' '.$title->textContent; + $items[] = array_merge($item, $this->getArticleContent($title)); + } + + return $items; + } +} +?> diff --git a/bridges/LeBonCoinBridge.php b/bridges/LeBonCoinBridge.php new file mode 100755 index 0000000..d4da546 --- /dev/null +++ b/bridges/LeBonCoinBridge.php @@ -0,0 +1,190 @@ +<?php +class LeBonCoinBridge extends BridgeAbstract { + + const MAINTAINER = '16mhz'; + const NAME = 'LeBonCoin'; + const URI = 'https://www.leboncoin.fr/'; + const DESCRIPTION = 'Returns most recent results from LeBonCoin for a +region, and optionally a category and a keyword .'; + + const PARAMETERS = array( + array( + 'k' => array('name' => 'Mot Clé'), + 'r' => array( + 'name' => 'Région', + 'type' => 'list', + 'values' => array( + 'Toute la France' => 'ile_de_france/occasions', + 'Alsace' => 'alsace', + 'Aquitaine' => 'aquitaine', + 'Auvergne' => 'auvergne', + 'Basse Normandie' => 'basse_normandie', + 'Bourgogne' => 'bourgogne', + 'Bretagne' => 'bretagne', + 'Centre' => 'centre', + 'Champagne Ardenne' => 'champagne_ardenne', + 'Corse' => 'corse', + 'Franche Comté' => 'franche_comte', + 'Haute Normandie' => 'haute_normandie', + 'Ile de France' => 'ile_de_france', + 'Languedoc Roussillon' => 'languedoc_roussillon', + 'Limousin' => 'limousin', + 'Lorraine' => 'lorraine', + 'Midi Pyrénées' => 'midi_pyrenees', + 'Nord Pas De Calais' => 'nord_pas_de_calais', + 'Pays de la Loire' => 'pays_de_la_loire', + 'Picardie' => 'picardie', + 'Poitou Charentes' => 'poitou_charentes', + 'Provence Alpes Côte d\'Azur' => 'provence_alpes_cote_d_azur', + 'Rhône-Alpes' => 'rhone_alpes', + 'Guadeloupe' => 'guadeloupe', + 'Martinique' => 'martinique', + 'Guyane' => 'guyane', + 'Réunion' => 'reunion' + ) + ), + 'c' => array( + 'name' => 'Catégorie', + 'type' => 'list', + 'values' => array( + 'TOUS' => '', + 'EMPLOI' => '_emploi_', + 'VEHICULES' => array( + 'Tous' => '_vehicules_', + 'Voitures' => 'voitures', + 'Motos' => 'motos', + 'Caravaning' => 'caravaning', + 'Utilitaires' => 'utilitaires', + 'Équipement Auto' => 'equipement_auto', + 'Équipement Moto' => 'equipement_moto', + 'Équipement Caravaning' => 'equipement_caravaning', + 'Nautisme' => 'nautisme', + 'Équipement Nautisme' => 'equipement_nautisme' + ), + 'IMMOBILIER' => array( + 'Tous' => '_immobilier_', + 'Ventes immobilières' => 'ventes_immobilieres', + 'Locations' => 'locations', + 'Colocations' => 'colocations', + 'Bureaux & Commerces' => 'bureaux_commerces' + ), + 'VACANCES' => array( + 'Tous' => '_vacances_', + 'Location gîtes' => 'locations_gites', + 'Chambres d\'hôtes' => 'chambres_d_hotes', + 'Campings' => 'campings', + 'Hôtels' => 'hotels', + 'Hébergements insolites' => 'hebergements_insolites' + ), + 'MULTIMEDIA' => array( + 'Tous' => '_multimedia_', + 'Informatique' => 'informatique', + 'Consoles & Jeux vidéo' => 'consoles_jeux_video', + 'Image & Son' => 'image_son', + 'Téléphonie' => 'telephonie' + ), + 'LOISIRS' => array( + 'Tous' => '_loisirs_', + 'DVD / Films' => 'dvd_films', + 'CD / Musique' => 'cd_musique', + 'Livres' => 'livres', + 'Animaux' => 'animaux', + 'Vélos' => 'velos', + 'Sports & Hobbies' => 'sports_hobbies', + 'Instruments de musique' => 'instruments_de_musique', + 'Collection' => 'collection', + 'Jeux & Jouets' => 'jeux_jouets', + 'Vins & Gastronomie' => 'vins_gastronomie' + ), + 'MATÉRIEL PROFESSIONNEL' => array( + 'Tous' => '_materiel_professionnel_', + 'Matériel Agricole' => 'mateiel_agricole', + 'Transport - Manutention' => 'transport_manutention', + 'BTP - Chantier - Gros-œuvre' => 'btp_chantier_gros_oeuvre', + 'Outillage - Matériaux 2nd-œuvre' => 'outillage_materiaux_2nd_oeuvre', + 'Équipements Industriels' => 'equipement_industriels', + 'Restauration - Hôtellerie' => 'restauration_hotellerie', + 'Fournitures de Bureau' => 'fournitures_de_bureau', + 'Commerces & Marchés' => 'commerces_marches', + 'Matériel médical' => 'materiel_medical' + ), + 'SERVICES' => array( + 'Tous' => '_services_', + 'Prestations de services' => 'prestations_de_services', + 'Billetterie' => 'billetterie', + 'Évènements' => 'evenements', + 'Cours particuliers' => 'cours_particuliers', + 'Covoiturage' => 'covoiturage' + ), + 'MAISON' => array( + 'Tous' => '_maison_', + 'Ameublement' => 'ameublement', + 'Électroménager' => 'electromenager', + 'Arts de la table' => 'arts_de_la_table', + 'Décoration' => 'decoration', + 'Linge de maison' => 'linge_de_maison', + 'Bricolage' => 'bricolage', + 'Jardinage' => 'jardinage', + 'Vêtements' => 'vetements', + 'Chaussures' => 'chaussures', + 'Accessoires & Bagagerie' => 'accessoires_bagagerie', + 'Montres & Bijoux' => 'montres_bijoux', + 'Équipement bébé' => 'equipement_bebe', + 'Vêtements bébé' => 'vetements_bebe' + ), + 'AUTRES' => 'autres' + ) + ) + ) + ); + + public function collectData(){ + + $category = $this->getInput('c'); + if(empty($category)) { + $category = 'annonces'; + } + + $html = getSimpleHTMLDOM(self::URI + . $category + . '/offres/' + . $this->getInput('r') + . '/?f=a&th=1&q=' + . urlencode($this->getInput('k'))) + or returnServerError('Could not request LeBonCoin.'); + + $list = $html->find('.tabsContent', 0); + if($list === null) { + return; + } + + $tags = $list->find('li'); + + foreach($tags as $element) { + + $element = $element->find('a', 0); + + $item = array(); + $item['uri'] = $element->href; + $title = html_entity_decode($element->getAttribute('title')); + $content_image = $element->find('div.item_image', 0)->find('.lazyload', 0); + + if($content_image !== null) { + $content = '<img src="' . $content_image->getAttribute('data-imgsrc') . '" alt="thumbnail">'; + } else { + $content = ""; + } + $date = $element->find('aside.item_absolute', 0)->find('p.item_sup', 0); + + $detailsList = $element->find('section.item_infos', 0); + + for($i = 0; $i <= 1; $i++) $content .= $detailsList->find('p.item_supp', $i)->plaintext; + $price = $detailsList->find('h3.item_price', 0); + $content .= $price === null ? '' : $price->plaintext; + + $item['title'] = $title; + $item['content'] = $content . $date; + $this->items[] = $item; + } + } +} diff --git a/bridges/LeMondeInformatiqueBridge.php b/bridges/LeMondeInformatiqueBridge.php new file mode 100644 index 0000000..706752f --- /dev/null +++ b/bridges/LeMondeInformatiqueBridge.php @@ -0,0 +1,44 @@ +<?php +class LeMondeInformatiqueBridge extends FeedExpander { + + const MAINTAINER = 'ORelio'; + const NAME = 'Le Monde Informatique'; + const URI = 'http://www.lemondeinformatique.fr/'; + const CACHE_TIMEOUT = 1800; // 30min + const DESCRIPTION = 'Returns the newest articles.'; + + public function collectData(){ + $this->collectExpandableDatas(self::URI . 'rss/rss.xml', 10); + } + + protected function parseItem($newsItem){ + $item = parent::parseItem($newsItem); + $article_html = getSimpleHTMLDOMCached($item['uri']) + or returnServerError('Could not request LeMondeInformatique: ' . $item['uri']); + $item['content'] = $this->cleanArticle($article_html->find('div#article', 0)->innertext); + $item['title'] = $article_html->find('h1.cleanprint-title', 0)->plaintext; + return $item; + } + + private function stripCDATA($string){ + $string = str_replace('<![CDATA[', '', $string); + $string = str_replace(']]>', '', $string); + return $string; + } + + private function stripWithDelimiters($string, $start, $end){ + while(strpos($string, $start) !== false) { + $section_to_remove = substr($string, strpos($string, $start)); + $section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end)); + $string = str_replace($section_to_remove, '', $string); + } + + return $string; + } + + private function cleanArticle($article_html){ + $article_html = $this->stripWithDelimiters($article_html, '<script', '</script>'); + $article_html = $this->stripWithDelimiters($article_html, '<h1 class="cleanprint-title"', '</h1>'); + return $article_html; + } +} diff --git a/bridges/LegifranceJOBridge.php b/bridges/LegifranceJOBridge.php new file mode 100644 index 0000000..348be8f --- /dev/null +++ b/bridges/LegifranceJOBridge.php @@ -0,0 +1,68 @@ +<?php +class LegifranceJOBridge extends BridgeAbstract { + + const MAINTAINER = 'Pierre Mazière'; + const NAME = 'Journal Officiel de la République Française'; + const URI = 'https://www.legifrance.gouv.fr/affichJO.do'; + const DESCRIPTION = 'Returns the laws and decrees officially registered daily in France'; + + const PARAMETERS = array(); + + private $author; + private $timestamp; + private $uri; + + private function extractItem($section, $subsection = null, $origin = null){ + $item = array(); + $item['author'] = $this->author; + $item['timestamp'] = $this->timestamp; + $item['uri'] = $this->uri . '#' . count($this->items); + $item['title'] = $section->plaintext; + + if(!is_null($origin)) { + $item['title'] = '[ ' . $item['title'] . ' / ' . $subsection->plaintext . ' ] ' . $origin->plaintext; + $data = $origin; + } elseif(!is_null($subsection)) { + $item['title'] = '[ ' . $item['title'] . ' ] ' . $subsection->plaintext; + $data = $subsection; + } else { + $data = $section; + } + + $item['content'] = ''; + foreach($data->nextSibling()->find('a') as $content) { + $text = $content->plaintext; + $href = $content->nextSibling()->getAttribute('resource'); + $item['content'] .= '<p><a href="' . $href . '">' . $text . '</a></p>'; + } + return $item; + } + + public function collectData(){ + $html = getSimpleHTMLDOM(self::URI) + or $this->returnServer('Unable to download ' . self::URI); + + $this->author = trim($html->find('h2.title', 0)->plaintext); + $uri = $html->find('h2.titleELI', 0)->plaintext; + $this->uri = trim(substr($uri, strpos($uri, 'https'))); + $this->timestamp = strtotime(substr($this->uri, strpos($this->uri, 'eli/jo/') + strlen('eli/jo/'))); + + foreach($html->find('h3') as $section) { + $subsections = $section->nextSibling()->find('h4'); + foreach($subsections as $subsection) { + $origins = $subsection->nextSibling()->find('h5'); + foreach($origins as $origin) { + $this->items[] = $this->extractItem($section, $subsection, $origin); + } + if(!empty($origins)) { + continue; + } + $this->items[] = $this->extractItem($section, $subsection); + } + if(!empty($subsections)) { + continue; + } + $this->items[] = $this->extractItem($section); + } + } +} diff --git a/bridges/LesJoiesDuCodeBridge.php b/bridges/LesJoiesDuCodeBridge.php new file mode 100644 index 0000000..34145a1 --- /dev/null +++ b/bridges/LesJoiesDuCodeBridge.php @@ -0,0 +1,45 @@ +<?php +class LesJoiesDuCodeBridge extends BridgeAbstract { + + const MAINTAINER = 'superbaillot.net'; + const NAME = 'Les Joies Du Code'; + const URI = 'http://lesjoiesducode.fr/'; + const CACHE_TIMEOUT = 7200; // 2h + const DESCRIPTION = 'LesJoiesDuCode'; + + public function collectData(){ + $html = getSimpleHTMLDOM(self::URI) + or returnServerError('Could not request LesJoiesDuCode.'); + + foreach($html->find('div.blog-post') as $element) { + $item = array(); + $temp = $element->find('h1 a', 0); + $titre = html_entity_decode($temp->innertext); + $url = $temp->href; + + $temp = $element->find('div.blog-post-content', 0); + + // retrieve .gif instead of static .jpg + $images = $temp->find('p img'); + foreach($images as $image) { + $img_src = str_replace(".jpg", ".gif", $image->src); + $image->src = $img_src; + } + $content = $temp->innertext; + + $auteur = $temp->find('i', 0); + $pos = strpos($auteur->innertext, "by"); + + if($pos > 0) { + $auteur = trim(str_replace("*/", "", substr($auteur->innertext, ($pos + 2)))); + $item['author'] = $auteur; + } + + $item['content'] .= trim($content); + $item['uri'] = $url; + $item['title'] = trim($titre); + + $this->items[] = $item; + } + } +} diff --git a/bridges/LichessBridge.php b/bridges/LichessBridge.php new file mode 100644 index 0000000..bf7369f --- /dev/null +++ b/bridges/LichessBridge.php @@ -0,0 +1,31 @@ +<?php +class LichessBridge extends FeedExpander { + + const MAINTAINER = 'AmauryCarrade'; + const NAME = 'Lichess Blog'; + const URI = 'http://fr.lichess.org/blog'; + const DESCRIPTION = 'Returns the 5 newest posts from the Lichess blog (full text)'; + + public function collectData(){ + $this->collectExpandableDatas(self::URI . '.atom', 5); + } + + protected function parseItem($newsItem){ + $item = parent::parseItem($newsItem); + $item['content'] = $this->retrieveLichessPost($item['uri']); + return $item; + } + + private function retrieveLichessPost($blog_post_uri){ + $blog_post_html = getSimpleHTMLDOMCached($blog_post_uri); + $blog_post_div = $blog_post_html->find('#lichess_blog', 0); + + $post_chapo = $blog_post_div->find('.shortlede', 0)->innertext; + $post_content = $blog_post_div->find('.body', 0)->innertext; + + $content = '<p><em>' . $post_chapo . '</em></p>'; + $content .= '<div>' . $post_content . '</div>'; + + return $content; + } +} diff --git a/bridges/LinkedInCompanyBridge.php b/bridges/LinkedInCompanyBridge.php new file mode 100644 index 0000000..e629211 --- /dev/null +++ b/bridges/LinkedInCompanyBridge.php @@ -0,0 +1,37 @@ +<?php +class LinkedInCompanyBridge extends BridgeAbstract { + + const MAINTAINER = 'regisenguehard'; + const NAME = 'LinkedIn Company'; + const URI = 'https://www.linkedin.com/'; + const CACHE_TIMEOUT = 21600; //6 + const DESCRIPTION = 'Returns most recent actus from Company on LinkedIn. + (https://www.linkedin.com/company/<strong style=\"font-weight:bold;\">apple</strong>)'; + + const PARAMETERS = array( array( + 'c' => array( + 'name' => 'Company name', + 'required' => true + ) + )); + + public function collectData(){ + $html = ''; + $link = self::URI . 'company/' . $this->getInput('c'); + + $html = getSimpleHTMLDOM($link) + or returnServerError('Could not request LinkedIn.'); + + foreach($html->find('//*[@id="my-feed-post"]/li') as $element) { + $title = $element->find('span.share-body', 0)->innertext; + if($title) { + $item = array(); + $item['uri'] = $link; + $item['title'] = mb_substr(strip_tags($element->find('span.share-body', 0)->innertext), 0, 100); + $item['content'] = strip_tags($element->find('span.share-body', 0)->innertext); + $this->items[] = $item; + $i++; + } + } + } +} diff --git a/bridges/LolibooruBridge.php b/bridges/LolibooruBridge.php new file mode 100644 index 0000000..b5bbd75 --- /dev/null +++ b/bridges/LolibooruBridge.php @@ -0,0 +1,11 @@ +<?php +require_once('MoebooruBridge.php'); + +class LolibooruBridge extends MoebooruBridge { + + const MAINTAINER = 'mitsukarenai'; + const NAME = 'Lolibooru'; + const URI = 'https://lolibooru.moe/'; + const DESCRIPTION = 'Returns images from given page and tags'; + +} diff --git a/bridges/MangareaderBridge.php b/bridges/MangareaderBridge.php new file mode 100644 index 0000000..cd7dddc --- /dev/null +++ b/bridges/MangareaderBridge.php @@ -0,0 +1,249 @@ +<?php +class MangareaderBridge extends BridgeAbstract { + + const MAINTAINER = 'logmanoriginal'; + const NAME = 'Mangareader Bridge'; + const URI = 'http://www.mangareader.net'; + const CACHE_TIMEOUT = 10800; // 3h + const DESCRIPTION = 'Returns the latest updates, popular mangas or manga updates (new chapters)'; + + const PARAMETERS = array( + 'Get latest updates' => array(), + 'Get popular mangas' => array( + 'category' => array( + 'name' => 'Category', + 'type' => 'list', + 'required' => true, + 'values' => array( + 'All' => 'all', + 'Action' => 'action', + 'Adventure' => 'adventure', + 'Comedy' => 'comedy', + 'Demons' => 'demons', + 'Drama' => 'drama', + 'Ecchi' => 'ecchi', + 'Fantasy' => 'fantasy', + 'Gender Bender' => 'gender-bender', + 'Harem' => 'harem', + 'Historical' => 'historical', + 'Horror' => 'horror', + 'Josei' => 'josei', + 'Magic' => 'magic', + 'Martial Arts' => 'martial-arts', + 'Mature' => 'mature', + 'Mecha' => 'mecha', + 'Military' => 'military', + 'Mystery' => 'mystery', + 'One Shot' => 'one-shot', + 'Psychological' => 'psychological', + 'Romance' => 'romance', + 'School Life' => 'school-life', + 'Sci-Fi' => 'sci-fi', + 'Seinen' => 'seinen', + 'Shoujo' => 'shoujo', + 'Shoujoai' => 'shoujoai', + 'Shounen' => 'shounen', + 'Shounenai' => 'shounenai', + 'Slice of Life' => 'slice-of-life', + 'Smut' => 'smut', + 'Sports' => 'sports', + 'Super Power' => 'super-power', + 'Supernatural' => 'supernatural', + 'Tragedy' => 'tragedy', + 'Vampire' => 'vampire', + 'Yaoi' => 'yaoi', + 'Yuri' => 'yuri' + ), + 'exampleValue' => 'All', + 'title' => 'Select your category' + ) + ), + 'Get manga updates' => array( + 'path' => array( + 'name' => 'Path', + 'required' => true, + 'pattern' => '[a-zA-Z0-9-_]*', + 'exampleValue' => 'bleach, umi-no-kishidan', + 'title' => 'URL part of desired manga' + ), + 'limit' => array( + 'name' => 'Limit', + 'type' => 'number', + 'defaultValue' => 10, + 'title' => 'Number of items to return [-1 returns all]' + ) + ) + ); + + private $request = ''; + + public function collectData(){ + // We'll use the DOM parser for this as it makes navigation easier + $html = getContents($this->getURI()); + if(!$html) { + returnClientError('Could not receive data for ' . $path . '!'); + } + libxml_use_internal_errors(true); + $doc = new DomDocument; + @$doc->loadHTML($html); + libxml_clear_errors(); + + // Navigate via XPath + $xpath = new DomXPath($doc); + + $this->request = ''; + switch($this->queriedContext) { + case 'Get latest updates': + $this->request = 'Latest updates'; + $this->getLatestUpdates($xpath); + break; + case 'Get popular mangas': + // Find manga name within "Popular mangas for ..." + $pagetitle = $xpath->query(".//*[@id='bodyalt']/h1")->item(0)->nodeValue; + $this->request = substr($pagetitle, 0, strrpos($pagetitle, " -")); + $this->getPopularMangas($xpath); + break; + case 'Get manga updates': + $limit = $this->getInput('limit'); + if(empty($limit)) { + $limit = self::PARAMETERS[$this->queriedContext]['limit']['defaultValue']; + } + + $this->request = $xpath->query(".//*[@id='mangaproperties']//*[@class='aname']") + ->item(0) + ->nodeValue; + + $this->getMangaUpdates($xpath, $limit); + break; + } + + // Return some dummy-data if no content available + if(empty($this->items)) { + $item = array(); + $item['content'] = "<p>No updates available</p>"; + + $this->items[] = $item; + } + } + + private function getLatestUpdates($xpath){ + // Query each item (consists of Manga + chapters) + $nodes = $xpath->query("//*[@id='latestchapters']/table//td"); + + foreach ($nodes as $node) { + // Query the manga + $manga = $xpath->query("a[@class='chapter']", $node)->item(0); + + // Collect the chapters for each Manga + $chapters = $xpath->query("a[@class='chaptersrec']", $node); + + if (isset($manga) && $chapters->length >= 1) { + $item = array(); + $item['uri'] = self::URI . htmlspecialchars($manga->getAttribute('href')); + $item['title'] = htmlspecialchars($manga->nodeValue); + + // Add each chapter to the feed + $item['content'] = ""; + + foreach ($chapters as $chapter) { + if($item['content'] <> "") { + $item['content'] .= "<br>"; + } + $item['content'] .= "<a href='" + . self::URI + . htmlspecialchars($chapter->getAttribute('href')) + . "'>" + . htmlspecialchars($chapter->nodeValue) + . "</a>"; + } + + $this->items[] = $item; + } + } + } + + private function getPopularMangas($xpath){ + // Query all mangas + $mangas = $xpath->query("//*[@id='mangaresults']/*[@class='mangaresultitem']"); + + foreach ($mangas as $manga) { + + // The thumbnail is encrypted in a css-style... + // format: "background-image:url('<the part which is actually interesting>')" + $mangaimgelement = $xpath->query(".//*[@class='imgsearchresults']", $manga) + ->item(0) + ->getAttribute('style'); + $thumbnail = substr($mangaimgelement, 22, strlen($mangaimgelement) - 24); + + $item = array(); + $item['title'] = htmlspecialchars($xpath->query(".//*[@class='manga_name']//a", $manga) + ->item(0) + ->nodeValue); + $item['uri'] = self::URI . $xpath->query(".//*[@class='manga_name']//a", $manga) + ->item(0) + ->getAttribute('href'); + $item['author'] = htmlspecialchars($xpath->query("//*[@class='author_name']", $manga) + ->item(0) + ->nodeValue); + $item['chaptercount'] = $xpath->query(".//*[@class='chapter_count']", $manga) + ->item(0) + ->nodeValue; + $item['genre'] = htmlspecialchars($xpath->query(".//*[@class='manga_genre']", $manga) + ->item(0) + ->nodeValue); + $item['content'] = <<<EOD +<a href="{$item['uri']}"><img src="{$thumbnail}" alt="{$item['title']}" /></a> +<p>{$item['genre']}</p> +<p>{$item['chaptercount']}</p> +EOD; + $this->items[] = $item; + } + } + + private function getMangaUpdates($xpath, $limit){ + $query = "(.//*[@id='listing']//tr)[position() > 1]"; + + if($limit !== -1) { + $query = "(.//*[@id='listing']//tr)[position() > 1][position() > last() - {$limit}]"; + } + + $chapters = $xpath->query($query); + + foreach ($chapters as $chapter) { + $item = array(); + $item['title'] = htmlspecialchars($xpath->query("td[1]", $chapter) + ->item(0) + ->nodeValue); + $item['uri'] = self::URI . $xpath->query("td[1]/a", $chapter) + ->item(0) + ->getAttribute('href'); + $item['timestamp'] = strtotime($xpath->query("td[2]", $chapter) + ->item(0) + ->nodeValue); + array_unshift($this->items, $item); + } + } + + public function getURI(){ + switch($this->queriedContext) { + case 'Get latest updates': + $path = "latest"; + break; + case 'Get popular mangas': + $path = "popular"; + if($this->getInput('category') !== "all") { + $path .= "/" . $this->getInput('category'); + } + break; + case 'Get manga updates': + $path = $this->getInput('path'); + break; + default: return parent::getURI(); + } + return self::URI . '/' . $path; + } + + public function getName(){ + return (!empty($this->request) ? $this->request . ' - ' : '') . 'Mangareader Bridge'; + } +} diff --git a/bridges/MilbooruBridge.php b/bridges/MilbooruBridge.php new file mode 100644 index 0000000..c3b633e --- /dev/null +++ b/bridges/MilbooruBridge.php @@ -0,0 +1,11 @@ +<?php +require_once('Shimmie2Bridge.php'); + +class MilbooruBridge extends Shimmie2Bridge { + + const MAINTAINER = 'mitsukarenai'; + const NAME = 'Milbooru'; + const URI = 'http://sheslostcontrol.net/moe/shimmie/'; + const DESCRIPTION = 'Returns images from given page'; + +} diff --git a/bridges/MixCloudBridge.php b/bridges/MixCloudBridge.php new file mode 100644 index 0000000..aa6340a --- /dev/null +++ b/bridges/MixCloudBridge.php @@ -0,0 +1,52 @@ +<?php + +class MixCloudBridge extends BridgeAbstract { + + const MAINTAINER = 'Alexis CHEMEL'; + const NAME = 'MixCloud'; + const URI = 'https://mixcloud.com/'; + const CACHE_TIMEOUT = 3600; // 1h + const DESCRIPTION = 'Returns latest musics on user stream'; + + const PARAMETERS = array(array( + 'u' => array( + 'name' => 'username', + 'required' => true, + ) + )); + + public function getName(){ + if(!is_null($this->getInput('u'))) { + return 'MixCloud - ' . $this->getInput('u'); + } + + return parent::getName(); + } + + public function collectData(){ + + $html = getSimpleHTMLDOM(self::URI . $this->getInput('u')) + or returnServerError('Could not request MixCloud.'); + + foreach($html->find('section.card') as $element) { + + $item = array(); + + $item['uri'] = self::URI . $element->find('hgroup.card-title h1 a', 0)->getAttribute('href'); + $item['title'] = html_entity_decode( + $element->find('hgroup.card-title h1 a span', 0)->getAttribute('title'), + ENT_QUOTES + ); + + $image = $element->find('a.album-art img', 0); + + if($image) { + $item['content'] = '<img src="' . $image->getAttribute('src') . '" />'; + } + + $item['author'] = trim($element->find('hgroup.card-title h2 a', 0)->innertext); + + $this->items[] = $item; + } + } +} diff --git a/bridges/MoebooruBridge.php b/bridges/MoebooruBridge.php new file mode 100644 index 0000000..9d9a625 --- /dev/null +++ b/bridges/MoebooruBridge.php @@ -0,0 +1,56 @@ +<?php +class MoebooruBridge extends BridgeAbstract { + + const NAME = 'Moebooru'; + const URI = 'https://moe.dev.myconan.net/'; + const CACHE_TIMEOUT = 1800; // 30min + const DESCRIPTION = 'Returns images from given page'; + const MAINTAINER = 'pmaziere'; + + const PARAMETERS = array( array( + 'p' => array( + 'name' => 'page', + 'defaultValue' => 1, + 'type' => 'number' + ), + 't' => array( + 'name' => 'tags' + ) + )); + + protected function getFullURI(){ + return $this->getURI() + . 'post?page=' + . $this->getInput('p') + . '&tags=' + . urlencode($this->getInput('t')); + } + + public function collectData(){ + $html = getSimpleHTMLDOM($this->getFullURI()) + or returnServerError('Could not request ' . $this->getName()); + + $input_json = explode('Post.register(', $html); + foreach($input_json as $element) + $data[] = preg_replace('/}\)(.*)/', '}', $element); + unset($data[0]); + + foreach($data as $datai) { + $json = json_decode($datai, true); + $item = array(); + $item['uri'] = $this->getURI() . '/post/show/' . $json['id']; + $item['postid'] = $json['id']; + $item['timestamp'] = $json['created_at']; + $item['imageUri'] = $json['file_url']; + $item['title'] = $this->getName() . ' | ' . $json['id']; + $item['content'] = '<a href="' + . $item['imageUri'] + . '"><img src="' + . $json['preview_url'] + . '" /></a><br>Tags: ' + . $json['tags']; + + $this->items[] = $item; + } + } +} diff --git a/bridges/MoinMoinBridge.php b/bridges/MoinMoinBridge.php new file mode 100644 index 0000000..5b41924 --- /dev/null +++ b/bridges/MoinMoinBridge.php @@ -0,0 +1,327 @@ +<?php +class MoinMoinBridge extends BridgeAbstract { + + const MAINTAINER = 'logmanoriginal'; + const NAME = 'MoinMoin Bridge'; + const URI = 'https://moinmo.in'; + const DESCRIPTION = 'Generates feeds for pages of a MoinMoin (compatible) wiki'; + const PARAMETERS = array( + array( + 'source' => array( + 'name' => 'Source', + 'type' => 'text', + 'required' => true, + 'title' => 'Insert wiki page URI (e.g.: https://moinmo.in/MoinMoin)', + 'exampleValue' => 'https://moinmo.in/MoinMoin' + ), + 'separator' => array( + 'name' => 'Separator', + 'type' => 'list', + 'requied' => true, + 'title' => 'Defines the separtor for splitting content into feeds', + 'defaultValue' => 'h2', + 'values' => array( + 'Header (h1)' => 'h1', + 'Header (h2)' => 'h2', + 'Header (h3)' => 'h3', + 'List element (li)' => 'li', + 'Anchor (a)' => 'a' + ) + ), + 'limit' => array( + 'name' => 'Limit', + 'type' => 'number', + 'required' => false, + 'title' => 'Number of items to return (from top)', + 'defaultValue' => -1 + ), + 'content' => array( + 'name' => 'Content', + 'type' => 'list', + 'required' => false, + 'title' => 'Defines how feed contents are build', + 'defaultValue' => 'separator', + 'values' => array( + 'By separator' => 'separator', + 'Follow link (only for anchor)' => 'follow', + 'None' => 'none' + ) + ) + ) + ); + + private $title = ''; + + public function collectData(){ + /* MoinMoin uses a rather unpleasent representation of HTML. Instead of + * using tags like <article/>, <navigation/>, <header/>, etc... it uses + * <div/>, <span/> and <p/>. Also each line is literaly identified via + * IDs. The only way to distinguish content is via headers, though not + * in all cases. + * + * Example (indented for the sake of readability): + * ... + * <span class="anchor" id="line-1"></span> + * <span class="anchor" id="line-2"></span> + * <span class="anchor" id="line-3"></span> + * <span class="anchor" id="line-4"></span> + * <span class="anchor" id="line-5"></span> + * <span class="anchor" id="line-6"></span> + * <span class="anchor" id="line-7"></span> + * <span class="anchor" id="line-8"></span> + * <span class="anchor" id="line-9"></span> + * <p class="line867">MoinMoin is a Wiki software implemented in + * <a class="interwiki" href="/Python" title="MoinMoin">Python</a> + * and distributed as Free Software under + * <a class="interwiki" href="/GPL" title="MoinMoin">GNU GPL license</a>. + * ... + */ + $html = getSimpleHTMLDOM($this->getInput('source')) + or returnServerError('Could not load ' . $this->getInput('source')); + + // Some anchors link to local sites or local IDs (both don't work well + // in feeds) + $html = $this->fixAnchors($html); + + $this->title = $html->find('title', 0)->innertext . ' | ' . self::NAME; + + // Here we focus on simple author and timestamp information from the given + // page. Later we update this information in case the anchor is followed. + $author = $this->findAuthor($html); + $timestamp = $this->findTimestamp($html); + + $sections = $this->splitSections($html); + + foreach($sections as $section) { + $item = array(); + + $item['uri'] = $this->findSectionAnchor($section[0]); + + switch($this->getInput('content')) { + case 'none': // Do not return any content + break; + case 'follow': // Follow the anchor + // We can only follow anchors (use default otherwise) + if($this->getInput('separator') === 'a') { + $content = $this->followAnchor($item['uri']); + + // Return only actual content + $item['content'] = $content->find('div#page', 0)->innertext; + + // Each page could have its own author and timestamp + $author = $this->findAuthor($content); + $timestamp = $this->findTimestamp($content); + + break; + } + case 'separator': + default: // Use contents from the current page + $item['content'] = $this->cleanArticle($section[2]); + } + + if(!is_null($author)) $item['author'] = $author; + if(!is_null($timestamp)) $item['timestamp'] = $timestamp; + $item['title'] = strip_tags($section[1]); + + // Skip items with empty title + if(empty(trim($item['title']))) { + continue; + } + + $this->items[] = $item; + + if($this->getInput('limit') > 0 + && count($this->items) >= $this->getInput('limit')) { + break; + } + } + } + + public function getName(){ + return $this->title ?: parent::getName(); + } + + public function getURI(){ + return $this->getInput('source') ?: parent::getURI(); + } + + /** + * Splits the html into sections. + * + * Returns an array with one element per section. Each element consists of: + * [0] The entire section + * [1] The section title + * [2] The section content + */ + private function splitSections($html){ + $content = $html->find('div#page', 0)->innertext + or returnServerError('Unable to find <div id="page"/>!'); + + $sections = array(); + + $regex = implode( + '', + array( + "\<{$this->getInput('separator')}.+?(?=\>)\>", + "(.+?)(?=\<\/{$this->getInput('separator')}\>)", + "\<\/{$this->getInput('separator')}\>", + "(.+?)((?=\<{$this->getInput('separator')})|(?=\<div\sid=\"pagebottom\")){1}" + ) + ); + + preg_match_all( + '/' . $regex . '/m', + $content, + $sections, + PREG_SET_ORDER + ); + + // Some pages don't use headers, return page as one feed + if(count($sections) === 0) { + return array( + array( + $content, + $html->find('title', 0)->innertext, + $content + ) + ); + } + + return $sections; + } + + /** + * Returns the anchor for a given section + */ + private function findSectionAnchor($section){ + $html = str_get_html($section); + + // For IDs + $anchor = $html->find($this->getInput('separator') . '[id=]', 0); + if(!is_null($anchor)) { + return $this->getInput('source') . '#' . $anchor->id; + } + + // For actual anchors + $anchor = $html->find($this->getInput('separator') . '[href=]', 0); + if(!is_null($anchor)) { + return $anchor->href; + } + + // Nothing found + return $this->getInput('source'); + } + + /** + * Returns the author + * + * Notice: Some pages don't provide author information + */ + private function findAuthor($html){ + /* Example: + * <p id="pageinfo" class="info" dir="ltr" lang="en">MoinMoin: LocalSpellingWords + * (last edited 2017-02-16 15:36:31 by <span title="??? @ hosted-by.leaseweb.com + * [178.162.199.143]">hosted-by</span>)</p> + */ + $pageinfo = $html->find('[id="pageinfo"]', 0); + + if(is_null($pageinfo)) { + return null; + } else { + $author = $pageinfo->find('[title=]', 0); + if(is_null($author)) { + return null; + } else { + return trim(explode('@', $author->title)[0]); + } + } + } + + /** + * Returns the time of last edit + * + * Notice: Some pages don't provide this information + */ + private function findTimestamp($html){ + // See example of findAuthor() + $pageinfo = $html->find('[id="pageinfo"]', 0); + + if(is_null($pageinfo)) { + return null; + } else { + $timestamp = $pageinfo->innertext; + $matches = array(); + preg_match('/.+?(?=\().+?(?=\d)([0-9\-\s\:]+)/m', $pageinfo, $matches); + return strtotime($matches[1]); + } + } + + /** + * Returns the original HTML with all anchors fixed (makes relative anchors + * absolute) + */ + private function fixAnchors($html, $source = null){ + + $source = $source ?: $this->getURI(); + + foreach($html->find('a') as $anchor) { + switch(substr($anchor->href, 0, 1)) { + case 'h': // http or https, no actions required + break; + case '/': // some relative path + $anchor->href = $this->findDomain($source) . $anchor->href; + break; + case '#': // it's an ID + default: // probably something like ? or &, skip empty ones + if(!isset($anchor->href)) + break; + $anchor->href = $source . $anchor->href; + } + } + + return $html; + } + + /** + * Loads the full article of a given anchor (if the anchor is from the same + * wiki domain) + */ + private function followAnchor($anchor){ + if(strrpos($anchor, $this->findDomain($this->getInput('source')) === false)) { + return null; + } + + $html = getSimpleHTMLDOMCached($anchor); + if(!$html) { // Cannot load article + return null; + } + + return $this->fixAnchors($html, $anchor); + } + + /** + * Finds the domain for a given URI + */ + private function findDomain($uri){ + $matches = array(); + preg_match('/(http[s]{0,1}:\/\/.+?(?=\/))/', $uri, $matches); + return $matches[1]; + } + + /* This function is a copy from CNETBridge */ + private function stripWithDelimiters($string, $start, $end){ + while(strpos($string, $start) !== false) { + $section_to_remove = substr($string, strpos($string, $start)); + $section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end)); + $string = str_replace($section_to_remove, '', $string); + } + + return $string; + } + + /* This function is based on CNETBridge */ + private function cleanArticle($article_html){ + $article_html = $this->stripWithDelimiters($article_html, '<script', '</script>'); + return $article_html; + } +} diff --git a/bridges/MondeDiploBridge.php b/bridges/MondeDiploBridge.php new file mode 100644 index 0000000..85f771e --- /dev/null +++ b/bridges/MondeDiploBridge.php @@ -0,0 +1,26 @@ +<?php +class MondeDiploBridge extends BridgeAbstract { + + const MAINTAINER = 'Pitchoule'; + const NAME = 'Monde Diplomatique'; + const URI = 'http://www.monde-diplomatique.fr/'; + const CACHE_TIMEOUT = 21600; //6h + const DESCRIPTION = 'Returns most recent results from MondeDiplo.'; + + public function collectData(){ + $html = getSimpleHTMLDOM(self::URI) + or returnServerError('Could not request MondeDiplo. for : ' . self::URI); + + foreach($html->find('div.unarticle') as $article) { + $element = $article->parent(); + $item = array(); + $item['uri'] = self::URI . $element->href; + $item['title'] = $element->find('h3', 0)->plaintext; + $item['content'] = $element->find('div.dates_auteurs', 0)->plaintext + . '<br>' + . strstr($element->find('div', 0)->plaintext, $element->find('div.dates_auteurs', 0)->plaintext, true); + + $this->items[] = $item; + } + } +} diff --git a/bridges/MsnMondeBridge.php b/bridges/MsnMondeBridge.php new file mode 100644 index 0000000..12d3d2f --- /dev/null +++ b/bridges/MsnMondeBridge.php @@ -0,0 +1,35 @@ +<?php +class MsnMondeBridge extends BridgeAbstract { + + const MAINTAINER = 'kranack'; + const NAME = 'MSN Actu Monde'; + const URI = 'http://www.msn.com/'; + const DESCRIPTION = 'Returns the 10 newest posts from MSN Actualités (full text)'; + + public function getURI(){ + return self::URI . 'fr-fr/actualite/monde'; + } + + private function msnMondeExtractContent($url, &$item){ + $html2 = getSimpleHTMLDOM($url); + $item['content'] = $html2->find('#content', 0)->find('article', 0)->find('section', 0)->plaintext; + $item['timestamp'] = strtotime($html2->find('.authorinfo-txt', 0)->find('time', 0)->datetime); + } + + public function collectData(){ + $html = getSimpleHTMLDOM($this->getURI()) + or returnServerError('Could not request MsnMonde.'); + + $limit = 0; + foreach($html->find('.smalla') as $article) { + if($limit < 10) { + $item = array(); + $item['title'] = utf8_decode($article->find('h4', 0)->innertext); + $item['uri'] = self::URI . utf8_decode($article->find('a', 0)->href); + $this->msnMondeExtractContent($item['uri'], $item); + $this->items[] = $item; + $limit++; + } + } + } +} diff --git a/bridges/MspabooruBridge.php b/bridges/MspabooruBridge.php new file mode 100644 index 0000000..00a7bd7 --- /dev/null +++ b/bridges/MspabooruBridge.php @@ -0,0 +1,12 @@ +<?php +require_once('GelbooruBridge.php'); + +class MspabooruBridge extends GelbooruBridge { + + const MAINTAINER = 'mitsukarenai'; + const NAME = 'Mspabooru'; + const URI = 'http://mspabooru.com/'; + const DESCRIPTION = 'Returns images from given page'; + const PIDBYPAGE = 50; + +} diff --git a/bridges/NasaApodBridge.php b/bridges/NasaApodBridge.php new file mode 100644 index 0000000..74fd219 --- /dev/null +++ b/bridges/NasaApodBridge.php @@ -0,0 +1,44 @@ +<?php +class NasaApodBridge extends BridgeAbstract { + + const MAINTAINER = 'corenting'; + const NAME = 'NASA APOD Bridge'; + const URI = 'https://apod.nasa.gov/apod/'; + const CACHE_TIMEOUT = 43200; // 12h + const DESCRIPTION = 'Returns the 3 latest NASA APOD pictures and explanations'; + + public function collectData(){ + + $html = getSimpleHTMLDOM(self::URI . 'archivepix.html') + or returnServerError('Error while downloading the website content'); + + $list = explode("<br>", $html->find('b', 0)->innertext); + + for($i = 0; $i < 3; $i++) { + $line = $list[$i]; + $item = array(); + + $uri_page = $html->find('a', $i + 3)->href; + $uri = self::URI . $uri_page; + $item['uri'] = $uri; + + $picture_html = getSimpleHTMLDOM($uri); + $picture_html_string = $picture_html->innertext; + + //Extract image and explanation + $media = $picture_html->find('p', 1)->innertext; + $media = strstr($media, '<br>'); + $media = preg_replace('/<br>/', '', $media, 1); + $explanation = $picture_html->find('p', 2)->innertext; + + //Extract date from the picture page + $date = explode(" ", $picture_html->find('p', 1)->innertext); + $item['timestamp'] = strtotime($date[4] . $date[3] . $date[2]); + + //Other informations + $item['content'] = $media . '<br />' . $explanation; + $item['title'] = $picture_html->find('b', 0)->innertext; + $this->items[] = $item; + } + } +} diff --git a/bridges/NeuviemeArtBridge.php b/bridges/NeuviemeArtBridge.php new file mode 100644 index 0000000..d0954fc --- /dev/null +++ b/bridges/NeuviemeArtBridge.php @@ -0,0 +1,57 @@ +<?php +class NeuviemeArtBridge extends FeedExpander { + + const MAINTAINER = 'ORelio'; + const NAME = '9ème Art Bridge'; + const URI = 'http://www.9emeart.fr/'; + const DESCRIPTION = 'Returns the newest articles.'; + + private function stripWithDelimiters($string, $start, $end){ + while(strpos($string, $start) !== false) { + $section_to_remove = substr($string, strpos($string, $start)); + $section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end)); + $string = str_replace($section_to_remove, '', $string); + } + + return $string; + } + + protected function parseItem($item){ + $item = parent::parseItem($item); + + $article_html = getSimpleHTMLDOMCached($item['uri']); + if(!$article_html) { + $item['content'] = 'Could not request 9eme Art: ' . $item['uri']; + return $item; + } + + $article_image = ''; + foreach ($article_html->find('img.img_full') as $img) { + if ($img->alt == $item['title']) { + $article_image = self::URI . $img->src; + break; + } + } + + $article_content = ''; + if($article_image) { + $article_content = '<p><img src="' . $article_image . '" /></p>'; + } + $article_content .= str_replace( + 'src="/', 'src="' . self::URI, + $article_html->find('div.newsGenerique_con', 0)->innertext + ); + $article_content = $this->stripWithDelimiters($article_content, '<script', '</script>'); + $article_content = $this->stripWithDelimiters($article_content, '<style', '</style>'); + $article_content = $this->stripWithDelimiters($article_content, '<link', '>'); + + $item['content'] = $article_content; + + return $item; + } + + public function collectData(){ + $feedUrl = self::URI . '9emeart.rss'; + $this->collectExpandableDatas($feedUrl); + } +} diff --git a/bridges/NextInpactBridge.php b/bridges/NextInpactBridge.php new file mode 100644 index 0000000..5de5c8b --- /dev/null +++ b/bridges/NextInpactBridge.php @@ -0,0 +1,34 @@ +<?php +class NextInpactBridge extends FeedExpander { + + const MAINTAINER = 'qwertygc'; + const NAME = 'NextInpact Bridge'; + const URI = 'https://www.nextinpact.com/'; + const DESCRIPTION = 'Returns the newest articles.'; + + public function collectData(){ + $this->collectExpandableDatas(self::URI . 'rss/news.xml', 10); + } + + protected function parseItem($newsItem){ + $item = parent::parseItem($newsItem); + $item['content'] = $this->extractContent($item['uri']); + return $item; + } + + private function extractContent($url){ + $html2 = getSimpleHTMLDOMCached($url); + $text = '<p><em>' + . $html2->find('span.sub_title', 0)->innertext + . '</em></p><p><img src="' + . $html2->find('div.container_main_image_article', 0)->find('img.dedicated', 0)->src + . '" alt="-" /></p><div>' + . $html2->find('div[itemprop=articleBody]', 0)->innertext + . '</div>'; + + $premium_article = $html2->find('h2.title_reserve_article', 0); + if (is_object($premium_article)) + $text = $text . '<p><em>' . $premium_article->innertext . '</em></p>'; + return $text; + } +} diff --git a/bridges/NextgovBridge.php b/bridges/NextgovBridge.php new file mode 100644 index 0000000..370b0bf --- /dev/null +++ b/bridges/NextgovBridge.php @@ -0,0 +1,74 @@ +<?php +class NextgovBridge extends FeedExpander { + + const MAINTAINER = 'ORelio'; + const NAME = 'Nextgov Bridge'; + const URI = 'https://www.nextgov.com/'; + const DESCRIPTION = 'USA Federal technology news, best practices, and web 2.0 tools.'; + + const PARAMETERS = array( array( + 'category' => array( + 'name' => 'Category', + 'type' => 'list', + 'values' => array( + 'All' => 'all', + 'Technology News' => 'technology-news', + 'CIO Briefing' => 'cio-briefing', + 'Emerging Tech' => 'emerging-tech', + 'Cloud' => 'cloud-computing', + 'Cybersecurity' => 'cybersecurity', + 'Mobile' => 'mobile', + 'Health' => 'health', + 'Defense' => 'defense', + 'Big Data' => 'big-data' + ) + ) + )); + + public function collectData(){ + $this->collectExpandableDatas(self::URI . 'rss/' . $this->getInput('category') . '/', 10); + } + + protected function parseItem($newsItem){ + $item = parent::parseItem($newsItem); + + $item['content'] = ''; + + $namespaces = $newsItem->getNamespaces(true); + if(isset($namespaces['media'])) { + $media = $newsItem->children($namespaces['media']); + if(isset($media->content)) { + $attributes = $media->content->attributes(); + $item['content'] = '<img src="' . $attributes['url'] . '">'; + } + } + + $item['content'] .= $this->extractContent($item['uri']); + return $item; + } + + private function stripWithDelimiters($string, $start, $end){ + while (strpos($string, $start) !== false) { + $section_to_remove = substr($string, strpos($string, $start)); + $section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end)); + $string = str_replace($section_to_remove, '', $string); + } + + return $string; + } + + private function extractContent($url){ + $article = getSimpleHTMLDOMCached($url) + or returnServerError('Could not request Nextgov: ' . $url); + + $contents = $article->find('div.wysiwyg', 0)->innertext; + $contents = $this->stripWithDelimiters($contents, '<div class="ad-container">', '</div>'); + $contents = $this->stripWithDelimiters($contents, '<div', '</div>'); //ad outer div + return $this->stripWithDelimiters($contents, '<script', '</script>'); + $contents = ($article_thumbnail == '' ? '' : '<p><img src="' . $article_thumbnail . '" /></p>') + . '<p><b>' + . $article_subtitle + . '</b></p>' + . trim($contents); + } +} diff --git a/bridges/NiceMatinBridge.php b/bridges/NiceMatinBridge.php new file mode 100644 index 0000000..117c779 --- /dev/null +++ b/bridges/NiceMatinBridge.php @@ -0,0 +1,32 @@ +<?php +class NiceMatinBridge extends FeedExpander { + + const MAINTAINER = 'pit-fgfjiudghdf'; + const NAME = 'NiceMatin'; + const URI = 'http://www.nicematin.com/'; + const DESCRIPTION = 'Returns the 10 newest posts from NiceMatin (full text)'; + + public function collectData(){ + $this->collectExpandableDatas(self::URI . 'derniere-minute/rss', 10); + } + + protected function parseItem($newsItem){ + $item = parent::parseItem($newsItem); + $item['content'] = $this->extractContent($item['uri']); + return $item; + } + + private function extractContent($url){ + $html = getSimpleHTMLDOMCached($url); + if(!$html) + return 'Could not acquire content from url: ' . $url . '!'; + + $content = $html->find('article', 0); + if(!$content) + return 'Could not find \'section\'!'; + + $text = preg_replace('#<script(.*?)>(.*?)</script>#is', '', $content->innertext); + $text = strip_tags($text, '<p><a><img>'); + return $text; + } +} diff --git a/bridges/NovelUpdatesBridge.php b/bridges/NovelUpdatesBridge.php new file mode 100644 index 0000000..729eb48 --- /dev/null +++ b/bridges/NovelUpdatesBridge.php @@ -0,0 +1,69 @@ +<?php +class NovelUpdatesBridge extends BridgeAbstract { + + const MAINTAINER = 'albirew'; + const NAME = 'Novel Updates'; + const URI = 'http://www.novelupdates.com/'; + const CACHE_TIMEOUT = 21600; // 6h + const DESCRIPTION = 'Returns releases from Novel Updates'; + const PARAMETERS = array( array( + 'n' => array( + 'name' => 'Novel name as found in the url', + 'exampleValue' => 'spirit-realm', + 'required' => true + ) + )); + + private $seriesTitle = ''; + + public function getURI(){ + if(!is_null($this->getInput('n'))) { + return static::URI . '/series/' . $this->getInput('n') . '/'; + } + + return parent::getURI(); + } + + public function collectData(){ + $fullhtml = getSimpleHTMLDOM($this->getURI()) + or returnServerError('Could not request NovelUpdates, novel "' . $this->getInput('n') . '" not found'); + + $this->seriesTitle = $fullhtml->find('h4.seriestitle', 0)->plaintext; + // dirty fix for nasty simpledom bug: https://github.com/sebsauvage/rss-bridge/issues/259 + // forcefully removes tbody + $html = $fullhtml->find('table#myTable', 0)->innertext; + $html = stristr($html, '<tbody>'); //strip thead + $html = stristr($html, '<tr>'); //remove tbody + $html = str_get_html(stristr($html, '</tbody>', true)); //remove last tbody and get back as an array + foreach($html->find('tr') as $element) { + $item = array(); + $item['uri'] = $element->find('td', 2)->find('a', 0)->href; + $item['title'] = $element->find('td', 2)->find('a', 0)->plaintext; + $item['team'] = $element->find('td', 1)->innertext; + $item['timestamp'] = strtotime($element->find('td', 0)->plaintext); + $item['content'] = '<a href="' + . $item['uri'] + . '">' + . $this->seriesTitle + . ' - ' + . $item['title'] + . '</a> by ' + . $item['team'] + . '<br><a href="' + . $item['uri'] + . '">' + . $fullhtml->find('div.seriesimg', 0)->innertext + . '</a>'; + + $this->items[] = $item; + } + } + + public function getName(){ + if(!empty($this->seriesTitle)) { + return $this->seriesTitle . ' - ' . static::NAME; + } + + return parent::getName(); + } +} diff --git a/bridges/OpenClassroomsBridge.php b/bridges/OpenClassroomsBridge.php new file mode 100644 index 0000000..5f0daca --- /dev/null +++ b/bridges/OpenClassroomsBridge.php @@ -0,0 +1,49 @@ +<?php +class OpenClassroomsBridge extends BridgeAbstract { + + const MAINTAINER = 'sebsauvage'; + const NAME = 'OpenClassrooms Bridge'; + const URI = 'https://openclassrooms.com/'; + const CACHE_TIMEOUT = 21600; // 6h + const DESCRIPTION = 'Returns latest tutorials from OpenClassrooms.'; + + const PARAMETERS = array( array( + 'u' => array( + 'name' => 'Catégorie', + 'type' => 'list', + 'required' => true, + 'values' => array( + 'Arts & Culture' => 'arts', + 'Code' => 'code', + 'Design' => 'design', + 'Entreprise' => 'business', + 'Numérique' => 'digital', + 'Sciences' => 'sciences', + 'Sciences Humaines' => 'humainities', + 'Systèmes d\'information' => 'it', + 'Autres' => 'others' + ) + ) + )); + + public function getURI(){ + if(!is_null($this->getInput('u'))) { + return self::URI . '/courses?categories=' . $this->getInput('u') . '&title=&sort=updatedAt+desc'; + } + + return parent::getURI(); + } + + public function collectData(){ + $html = getSimpleHTMLDOM($this->getURI()) + or returnServerError('Could not request OpenClassrooms.'); + + foreach($html->find('.courseListItem') as $element) { + $item = array(); + $item['uri'] = self::URI . $element->find('a', 0)->href; + $item['title'] = $element->find('h3', 0)->plaintext; + $item['content'] = $element->find('slidingItem__descriptionContent', 0)->plaintext; + $this->items[] = $item; + } + } +} diff --git a/bridges/ParuVenduImmoBridge.php b/bridges/ParuVenduImmoBridge.php new file mode 100644 index 0000000..a2e2b33 --- /dev/null +++ b/bridges/ParuVenduImmoBridge.php @@ -0,0 +1,102 @@ +<?php +class ParuVenduImmoBridge extends BridgeAbstract { + + const MAINTAINER = 'polo2ro'; + const NAME = 'Paru Vendu Immobilier'; + const URI = 'http://www.paruvendu.fr'; + const CACHE_TIMEOUT = 10800; // 3h + const DESCRIPTION = 'Returns the ads from the first page of search result.'; + + const PARAMETERS = array( array( + 'minarea' => array( + 'name' => 'Minimal surface m²', + 'type' => 'number' + ), + 'maxprice' => array( + 'name' => 'Max price', + 'type' => 'number' + ), + 'pa' => array( + 'name' => 'Country code', + 'exampleValue' => 'FR' + ), + 'lo' => array( + 'name' => 'department numbers or postal codes, comma-separated' + ) + )); + + public function collectData(){ + $html = getSimpleHTMLDOM($this->getURI()) + or returnServerError('Could not request paruvendu.'); + + foreach($html->find('div.annonce a') as $element) { + + if(!$element->title) { + continue; + } + + $img = ''; + foreach($element->find('span.img img') as $img) { + if($img->original) { + $img = '<img src="' . $img->original . '" />'; + } + } + + $desc = $element->find('span.desc')[0]->innertext; + $desc = str_replace("voir l'annonce", '', $desc); + + $price = $element->find('span.price')[0]->innertext; + + list($href) = explode('#', $element->href); + + $item = array(); + $item['uri'] = self::URI . $href; + $item['title'] = $element->title; + $item['content'] = $img . $desc . $price; + $this->items[] = $item; + } + } + + public function getURI(){ + $appartment = '&tbApp=1&tbDup=1&tbChb=1&tbLof=1&tbAtl=1&tbPla=1'; + $maison = '&tbMai=1&tbVil=1&tbCha=1&tbPro=1&tbHot=1&tbMou=1&tbFer=1'; + $link = self::URI + . '/immobilier/annonceimmofo/liste/listeAnnonces?tt=1' + . $appartment + . $maison; + + if($this->getInput('minarea')) { + $link .= '&sur0=' . urlencode($this->getInput('minarea')); + } + + if($this->getInput('maxprice')) { + $link .= '&px1=' . urlencode($this->getInput('maxprice')); + } + + if($this->getInput('pa')) { + $link .= '&pa=' . urlencode($this->getInput('pa')); + } + + if($this->getInput('lo')) { + $link .= '&lo=' . urlencode($this->getInput('lo')); + } + return $link; + } + + public function getName(){ + if(!is_null($this->getInput('minarea'))) { + $request = ''; + $minarea = $this->getInput('minarea'); + if(!empty($minarea)) { + $request .= ' ' . $minarea . ' m2'; + } + $location = $this->getInput('lo'); + if(!empty($location)) { + $request .= ' In: ' . $location; + } + return 'Paru Vendu Immobilier' . $request; + } + + return parent::getName(); + } +} diff --git a/bridges/PickyWallpapersBridge.php b/bridges/PickyWallpapersBridge.php new file mode 100644 index 0000000..6c26df7 --- /dev/null +++ b/bridges/PickyWallpapersBridge.php @@ -0,0 +1,101 @@ +<?php +class PickyWallpapersBridge extends BridgeAbstract { + + const MAINTAINER = 'nel50n'; + const NAME = 'PickyWallpapers Bridge'; + const URI = 'http://www.pickywallpapers.com/'; + const CACHE_TIMEOUT = 43200; // 12h + const DESCRIPTION = 'Returns the latests wallpapers from PickyWallpapers'; + + const PARAMETERS = array( array( + 'c' => array( + 'name' => 'category', + 'required' => true + ), + 's' => array( + 'name' => 'subcategory' + ), + 'm' => array( + 'name' => 'Max number of wallpapers', + 'defaultValue' => 12, + 'type' => 'number' + ), + 'r' => array( + 'name' => 'resolution', + 'exampleValue' => '1920x1200, 1680x1050,…', + 'defaultValue' => '1920x1200', + 'pattern' => '[0-9]{3,4}x[0-9]{3,4}' + ) + )); + + public function collectData(){ + $lastpage = 1; + $num = 0; + $max = $this->getInput('m'); + $resolution = $this->getInput('r'); // Wide wallpaper default + + for($page = 1; $page <= $lastpage; $page++) { + $html = getSimpleHTMLDOM($this->getURI() . '/page-' . $page . '/') + or returnServerError('No results for this query.'); + + if($page === 1) { + preg_match('/page-(\d+)\/$/', $html->find('.pages li a', -2)->href, $matches); + $lastpage = min($matches[1], ceil($max / 12)); + } + + foreach($html->find('.items li img') as $element) { + $item = array(); + $item['uri'] = str_replace('www', 'wallpaper', self::URI) + . '/' + . $resolution + . '/' + . basename($element->src); + + $item['timestamp'] = time(); + $item['title'] = $element->alt; + $item['content'] = $item['title'] + . '<br><a href="' + . $item['uri'] + . '">' + . $element + . '</a>'; + + $this->items[] = $item; + + $num++; + if ($num >= $max) + break 2; + } + } + } + + public function getURI(){ + if(!is_null($this->getInput('s')) && !is_null($this->getInput('r')) && !is_null($this->getInput('c'))) { + $subcategory = $this->getInput('s'); + $link = self::URI + . $this->getInput('r') + . '/' + . $this->getInput('c') + . '/' + . $subcategory; + + return $link; + } + + return parent::getURI(); + } + + public function getName(){ + if(!is_null($this->getInput('s'))) { + $subcategory = $this->getInput('s'); + return 'PickyWallpapers - ' + . $this->getInput('c') + . ($subcategory ? ' > ' . $subcategory : '') + . ' [' + . $this->getInput('r') + . ']'; + } + + return parent::getName(); + } +} diff --git a/bridges/PinterestBridge.php b/bridges/PinterestBridge.php new file mode 100644 index 0000000..c5282ff --- /dev/null +++ b/bridges/PinterestBridge.php @@ -0,0 +1,163 @@ +<?php +class PinterestBridge extends FeedExpander { + + const MAINTAINER = 'pauder'; + const NAME = 'Pinterest Bridge'; + const URI = 'https://www.pinterest.com'; + const DESCRIPTION = 'Returns the newest images on a board'; + + const PARAMETERS = array( + 'By username and board' => array( + 'u' => array( + 'name' => 'username', + 'required' => true + ), + 'b' => array( + 'name' => 'board', + 'required' => true + ), + 'r' => array( + 'name' => 'Use custom RSS', + 'type' => 'checkbox', + 'required' => false, + 'title' => 'Uncheck to return data via custom filters (more data)' + ) + ), + 'From search' => array( + 'q' => array( + 'name' => 'Keyword', + 'required' => true + ) + ) + ); + + public function collectData(){ + switch($this->queriedContext) { + case 'By username and board': + if($this->getInput('r')) { + $html = getSimpleHTMLDOMCached($this->getURI()); + $this->getUserResults($html); + } else { + $this->collectExpandableDatas($this->getURI() . '.rss'); + } + break; + case 'From search': + default: + $html = getSimpleHTMLDOMCached($this->getURI()); + $this->getSearchResults($html); + } + } + + private function getUserResults($html){ + $json = json_decode($html->find('#jsInit1', 0)->innertext, true); + $results = $json['tree']['children'][0]['children'][0]['children'][0]['options']['props']['data']['board_feed']; + $username = $json['resourceDataCache'][0]['data']['owner']['username']; + $fullname = $json['resourceDataCache'][0]['data']['owner']['full_name']; + $avatar = $json['resourceDataCache'][0]['data']['owner']['image_small_url']; + + foreach($results as $result) { + $item = array(); + + $item['uri'] = $result['link']; + + // Some use regular titles, others provide 'advanced' infos, a few + // provide even less info. Thus we attempt multiple options. + $item['title'] = trim($result['title']); + + if($item['title'] === "") + $item['title'] = trim($result['rich_summary']['display_name']); + + if($item['title'] === "") + $item['title'] = trim($result['description']); + + $item['timestamp'] = strtotime($result['created_at']); + $item['username'] = $username; + $item['fullname'] = $fullname; + $item['avatar'] = $avatar; + $item['author'] = $item['username'] . ' (' . $item['fullname'] . ')'; + $item['content'] = '<img align="left" style="margin: 2px 4px;" src="' + . htmlentities($item['avatar']) + . '" /><p><strong>' + . $item['username'] + . '</strong><br>' + . $item['fullname'] + . '</p><br><img src="' + . $result['images']['736x']['url'] + . '" alt="" /><br><p>' + . $result['description'] + . '</p>'; + + $item['enclosures'] = array($result['images']['orig']['url']); + + $this->items[] = $item; + } + } + + private function getSearchResults($html){ + $json = json_decode($html->find('#jsInit1', 0)->innertext, true); + $results = $json['resourceDataCache'][0]['data']['results']; + + foreach($results as $result) { + $item = array(); + + $item['uri'] = self::URI . $result['board']['url']; + + // Some use regular titles, others provide 'advanced' infos, a few + // provide even less info. Thus we attempt multiple options. + $item['title'] = trim($result['title']); + + if($item['title'] === "") + $item['title'] = trim($result['rich_summary']['display_name']); + + if($item['title'] === "") + $item['title'] = trim($result['grid_description']); + + $item['timestamp'] = strtotime($result['created_at']); + $item['username'] = $result['pinner']['username']; + $item['fullname'] = $result['pinner']['full_name']; + $item['avatar'] = $result['pinner']['image_small_url']; + $item['author'] = $item['username'] . ' (' . $item['fullname'] . ')'; + $item['content'] = '<img align="left" style="margin: 2px 4px;" src="' + . htmlentities($item['avatar']) + . '" /><p><strong>' + . $item['username'] + . '</strong><br>' + . $item['fullname'] + . '</p><br><img src="' + . $result['images']['736x']['url'] + . '" alt="" /><br><p>' + . $result['description'] + . '</p>'; + + $item['enclosures'] = array($result['images']['orig']['url']); + + $this->items[] = $item; + } + } + + public function getURI(){ + switch($this->queriedContext) { + case 'By username and board': + $uri = self::URI . '/' . urlencode($this->getInput('u')) . '/' . urlencode($this->getInput('b'));// . '.rss'; + break; + case 'From search': + $uri = self::URI . '/search/?q=' . urlencode($this->getInput('q')); + break; + default: return parent::getURI(); + } + return $uri; + } + + public function getName(){ + switch($this->queriedContext) { + case 'By username and board': + $specific = $this->getInput('u') . ' - ' . $this->getInput('b'); + break; + case 'From search': + $specific = $this->getInput('q'); + break; + default: return parent::getName(); + } + return $specific . ' - ' . self::NAME; + } +} diff --git a/bridges/PlanetLibreBridge.php b/bridges/PlanetLibreBridge.php new file mode 100644 index 0000000..03a6024 --- /dev/null +++ b/bridges/PlanetLibreBridge.php @@ -0,0 +1,38 @@ +<?php +class PlanetLibreBridge extends BridgeAbstract { + + const MAINTAINER = 'pit-fgfjiudghdf'; + const NAME = 'PlanetLibre'; + const URI = 'http://www.planet-libre.org'; + const DESCRIPTION = 'Returns the 5 newest posts from PlanetLibre (full text)'; + + private function extractContent($url){ + $html2 = getSimpleHTMLDOM($url); + $text = $html2->find('div[class="post-text"]', 0)->innertext; + return $text; + } + + public function collectData(){ + $html = getSimpleHTMLDOM(self::URI) + or returnServerError('Could not request PlanetLibre.'); + $limit = 0; + foreach($html->find('div.post') as $element) { + if($limit < 5) { + $item = array(); + $item['title'] = $element->find('h1', 0)->plaintext; + $item['uri'] = $element->find('a', 0)->href; + $item['timestamp'] = strtotime( + str_replace( + '/', + '-', + $element->find('div[class="post-date"]', 0)->plaintext + ) + ); + + $item['content'] = $this->extractContent($item['uri']); + $this->items[] = $item; + $limit++; + } + } + } +} diff --git a/bridges/RTBFBridge.php b/bridges/RTBFBridge.php new file mode 100644 index 0000000..22cdaf4 --- /dev/null +++ b/bridges/RTBFBridge.php @@ -0,0 +1,66 @@ +<?php +class RTBFBridge extends BridgeAbstract { + const NAME = 'RTBF Bridge'; + const URI = 'http://www.rtbf.be/auvio/'; + const CACHE_TIMEOUT = 21600; //6h + const DESCRIPTION = 'Returns the newest RTBF videos by series ID'; + const MAINTAINER = 'Frenzie'; + + const PARAMETERS = array( array( + 'c' => array( + 'name' => 'series id', + 'exampleValue' => 9500, + 'required' => true + ) + )); + + public function collectData(){ + $html = ''; + $limit = 10; + $count = 0; + + $html = getSimpleHTMLDOM($this->getURI()) + or returnServerError('Could not request RTBF.'); + + foreach($html->find('section[id!=widget-ml-avoiraussi-] .rtbf-media-grid article') as $element) { + if($count >= $limit) { + break; + } + + $item = array(); + $item['id'] = $element->getAttribute('data-id'); + $item['uri'] = self::URI . 'detail?id=' . $item['id']; + $thumbnailUriSrcSet = explode( + ',', + $element->find('figure .www-img-16by9 img', 0)->getAttribute('data-srcset') + ); + + $thumbnailUriLastSrc = end($thumbnailUriSrcSet); + $thumbnailUri = explode(' ', $thumbnailUriLastSrc)[0]; + $item['title'] = trim($element->find('h3', 0)->plaintext) + . ' - ' + . trim($element->find('h4', 0)->plaintext); + + $item['timestamp'] = strtotime($element->find('time', 0)->getAttribute('datetime')); + $item['content'] = '<a href="' . $item['uri'] . '"><img src="' . $thumbnailUri . '" /></a>'; + $this->items[] = $item; + $count++; + } + } + + public function getURI(){ + if(!is_null($this->getInput('c'))) { + return self::URI . 'emissions/detail?id=' . $this->getInput('c'); + } + + return parent::getURI() . 'emissions/'; + } + + public function getName(){ + if(!is_null($this->getInput('c'))) { + return $this->getInput('c') .' - RTBF Bridge'; + } + + return parent::getName(); + } +} diff --git a/bridges/RainbowSixSiegeBridge.php b/bridges/RainbowSixSiegeBridge.php new file mode 100644 index 0000000..302bb89 --- /dev/null +++ b/bridges/RainbowSixSiegeBridge.php @@ -0,0 +1,36 @@ +<?php +class RainbowSixSiegeBridge extends BridgeAbstract { + + const MAINTAINER = 'corenting'; + const NAME = 'Rainbow Six Siege Blog'; + const URI = 'https://rainbow6.ubisoft.com/siege/en-us/news/'; + const CACHE_TIMEOUT = 7200; // 2h + const DESCRIPTION = 'Latest articles from the Rainbow Six Siege blog'; + + public function collectData(){ + $dlUrl = "https://prod-tridionservice.ubisoft.com/live/v1/News/Latest?templateId=tcm%3A152-7677"; + $dlUrl .= "8-32&pageIndex=0&pageSize=10&language=en-US&detailPageId=tcm%3A152-194572-64"; + $dlUrl .= "&keywordList=175426&siteId=undefined&useSeoFriendlyUrl=true"; + $jsonString = getContents($dlUrl) or returnServerError('Error while downloading the website content'); + + $json = json_decode($jsonString, true); + $json = $json['items']; + + // Start at index 2 to remove highlighted articles + for($i = 0; $i < count($json); $i++) { + $jsonItem = $json[$i]['Content']; + $article = str_get_html($jsonItem); + + $item = array(); + + $uri = $article->find('h3 a', 0)->href; + $uri = 'https://rainbow6.ubisoft.com' . $uri; + $item['uri'] = $uri; + $item['title'] = $article->find('h3', 0)->plaintext; + $item['content'] = $article->find('img', 0)->outertext . '<br />' . $article->find('strong', 0)->plaintext; + $item['timestamp'] = strtotime($article->find('p.news_date', 0)->plaintext); + + $this->items[] = $item; + } + } +} diff --git a/bridges/ReadComicsBridge.php b/bridges/ReadComicsBridge.php new file mode 100644 index 0000000..33c8ed9 --- /dev/null +++ b/bridges/ReadComicsBridge.php @@ -0,0 +1,44 @@ +<?php +class ReadComicsBridge extends BridgeAbstract { + + const MAINTAINER = 'niawag'; + const NAME = 'Read Comics'; + const URI = 'http://www.readcomics.tv/'; + const DESCRIPTION = 'Enter the comics as they appear in the website uri, + separated by semicolons, ex: good-comic-1;good-comic-2; ...'; + + const PARAMETERS = array( array( + 'q' => array( + 'name' => 'keywords, separated by semicolons', + 'exampleValue' => 'first list;second list;...', + 'required' => true + ), + )); + + public function collectData(){ + + function parseDateTimestamp($element){ + $guessedDate = $element->find('span', 0)->plaintext; + $guessedDate = strptime($guessedDate, '%m/%d/%Y'); + $timestamp = mktime(0, 0, 0, $guessedDate['tm_mon'] + 1, $guessedDate['tm_mday'], date('Y')); + + return $timestamp; + } + + $keywordsList = explode(";", $this->getInput('q')); + foreach($keywordsList as $keywords) { + $html = $this->getSimpleHTMLDOM(self::URI . 'comic/' . rawurlencode($keywords)) + or $this->returnServerError('Could not request readcomics.tv.'); + + foreach($html->find('li') as $element) { + $item = array(); + $item['uri'] = $element->find('a.ch-name', 0)->href; + $item['id'] = $item['uri']; + $item['timestamp'] = parseDateTimestamp($element); + $item['title'] = $element->find('a.ch-name', 0)->plaintext; + if(isset($item['title'])) + $this->items[] = $item; + } + } + } +} diff --git a/bridges/Releases3DSBridge.php b/bridges/Releases3DSBridge.php new file mode 100644 index 0000000..a7e1778 --- /dev/null +++ b/bridges/Releases3DSBridge.php @@ -0,0 +1,136 @@ +<?php +class Releases3DSBridge extends BridgeAbstract { + + const MAINTAINER = 'ORelio'; + const NAME = '3DS Scene Releases'; + const URI = 'http://www.3dsdb.com/'; + const CACHE_TIMEOUT = 10800; // 3h + const DESCRIPTION = 'Returns the newest scene releases.'; + + public function collectData(){ + + function extractFromDelimiters($string, $start, $end){ + if(strpos($string, $start) !== false) { + $section_retrieved = substr($string, strpos($string, $start) + strlen($start)); + $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end)); + return $section_retrieved; + } + + return false; + } + + function typeToString($type){ + switch($type) { + case 1: return '3DS Game'; + case 4: return 'eShop'; + default: return '??? (' . $type . ')'; + } + } + + function cardToString($card){ + switch($card) { + case 1: return 'Regular (CARD1)'; + case 2: return 'NAND (CARD2)'; + default: return '??? (' . $card . ')'; + } + } + + $dataUrl = self::URI . 'xml.php'; + $xml = getContents($dataUrl) + or returnServerError('Could not request 3dsdb: ' . $dataUrl); + $limit = 0; + + foreach(array_reverse(explode('<release>', $xml)) as $element) { + if($limit >= 5) { + break; + } + + if(strpos($element, '</release>') === false) { + continue; + } + + $releasename = extractFromDelimiters($element, '<releasename>', '</releasename>'); + if(empty($releasename)) { + continue; + } + + $id = extractFromDelimiters($element, '<id>', '</id>'); + $name = extractFromDelimiters($element, '<name>', '</name>'); + $publisher = extractFromDelimiters($element, '<publisher>', '</publisher>'); + $region = extractFromDelimiters($element, '<region>', '</region>'); + $group = extractFromDelimiters($element, '<group>', '</group>'); + $imagesize = extractFromDelimiters($element, '<imagesize>', '</imagesize>'); + $serial = extractFromDelimiters($element, '<serial>', '</serial>'); + $titleid = extractFromDelimiters($element, '<titleid>', '</titleid>'); + $imgcrc = extractFromDelimiters($element, '<imgcrc>', '</imgcrc>'); + $filename = extractFromDelimiters($element, '<filename>', '</filename>'); + $trimmedsize = extractFromDelimiters($element, '<trimmedsize>', '</trimmedsize>'); + $firmware = extractFromDelimiters($element, '<firmware>', '</firmware>'); + $type = extractFromDelimiters($element, '<type>', '</type>'); + $card = extractFromDelimiters($element, '<card>', '</card>'); + + //Retrieve cover art and short desc from IGN? + $ignResult = false; + $ignDescription = ''; + $ignLink = ''; + $ignDate = time(); + $ignCoverArt = ''; + + $ignSearchUrl = 'http://www.ign.com/search?q=' . urlencode($name); + if($ignResult = getSimpleHTMLDOM($ignSearchUrl)) { + $ignCoverArt = $ignResult->find('div.search-item-media', 0)->find('img', 0)->src; + $ignDesc = $ignResult->find('div.search-item-description', 0)->plaintext; + $ignLink = $ignResult->find('div.search-item-sub-title', 0)->find('a', 1)->href; + $ignDate = strtotime(trim($ignResult->find('span.publish-date', 0)->plaintext)); + $ignDescription = '<div><img src="' + . $ignCoverArt + . '" /></div><div>' + . $ignDesc + . ' <a href="' + . $ignLink + . '">More at IGN</a></div>'; + } + + //Main section : Release description from 3DS database + $releaseDescription = '<h3>Release Details</h3><b>Release ID: </b>' . $id + . '<br /><b>Game Name: </b>' . $name + . '<br /><b>Publisher: </b>' . $publisher + . '<br /><b>Region: </b>' . $region + . '<br /><b>Group: </b>' . $group + . '<br /><b>Image size: </b>' . (intval($imagesize) / 8) + . 'MB<br /><b>Serial: </b>' . $serial + . '<br /><b>Title ID: </b>' . $titleid + . '<br /><b>Image CRC: </b>' . $imgcrc + . '<br /><b>File Name: </b>' . $filename + . '<br /><b>Release Name: </b>' . $releasename + . '<br /><b>Trimmed size: </b>' . intval(intval($trimmedsize) / 1048576) + . 'MB<br /><b>Firmware: </b>' . $firmware + . '<br /><b>Type: </b>' . typeToString($type) + . '<br /><b>Card: </b>' . cardToString($card) + . '<br />'; + + //Build search links section to facilitate release search using search engines + $releaseNameEncoded = urlencode(str_replace(' ', '+', $releasename)); + $searchLinkGoogle = 'https://google.com/?q=' . $releaseNameEncoded; + $searchLinkDuckDuckGo = 'https://duckduckgo.com/?q=' . $releaseNameEncoded; + $searchLinkQwant = 'https://lite.qwant.com/?q=' . $releaseNameEncoded . '&t=web'; + $releaseSearchLinks = '<h3>Search this release</h3><ul><li><a href="' + . $searchLinkGoogle + . '">Search using Google</a></li><li><a href="' + . $searchLinkDuckDuckGo + . '">Search using DuckDuckGo</a></li><li><a href="' + . $searchLinkQwant + . '">Search using Qwant</a></li></ul>'; + + //Build and add final item with the above three sections + $item = array(); + $item['title'] = $name; + $item['author'] = $publisher; + $item['timestamp'] = $ignDate; + $item['uri'] = empty($ignLink) ? $searchLinkDuckDuckGo : $ignLink; + $item['content'] = $ignDescription . $releaseDescription . $releaseSearchLinks; + $this->items[] = $item; + $limit++; + } + } +} diff --git a/bridges/ReporterreBridge.php b/bridges/ReporterreBridge.php new file mode 100644 index 0000000..db1104c --- /dev/null +++ b/bridges/ReporterreBridge.php @@ -0,0 +1,47 @@ +<?php +class ReporterreBridge extends BridgeAbstract { + + const MAINTAINER = 'nyutag'; + const NAME = 'Reporterre Bridge'; + const URI = 'http://www.reporterre.net/'; + const DESCRIPTION = 'Returns the newest articles.'; + + private function extractContent($url){ + $html2 = getSimpleHTMLDOM($url); + + foreach($html2->find('div[style=text-align:justify]') as $e) { + $text = $e->outertext; + } + + $html2->clear(); + unset($html2); + + // Replace all relative urls with absolute ones + $text = preg_replace( + '/(href|src)(\=[\"\'])(?!http)([^"\']+)/ims', + "$1$2" . self::URI . "$3", + $text + ); + + $text = strip_tags($text, '<p><br><a><img>'); + return $text; + } + + public function collectData(){ + $html = getSimpleHTMLDOM(self::URI . 'spip.php?page=backend') + or returnServerError('Could not request Reporterre.'); + $limit = 0; + + foreach($html->find('item') as $element) { + if($limit < 5) { + $item = array(); + $item['title'] = html_entity_decode($element->find('title', 0)->plaintext); + $item['timestamp'] = strtotime($element->find('dc:date', 0)->plaintext); + $item['uri'] = $element->find('guid', 0)->innertext; + $item['content'] = html_entity_decode($this->extractContent($item['uri'])); + $this->items[] = $item; + $limit++; + } + } + } +} diff --git a/bridges/Rue89Bridge.php b/bridges/Rue89Bridge.php new file mode 100644 index 0000000..6599122 --- /dev/null +++ b/bridges/Rue89Bridge.php @@ -0,0 +1,25 @@ +<?php +class Rue89Bridge extends FeedExpander { + + const MAINTAINER = 'pit-fgfjiudghdf'; + const NAME = 'Rue89'; + const URI = 'http://rue89.nouvelobs.com/'; + const DESCRIPTION = 'Returns the 5 newest posts from Rue89 (full text)'; + + protected function parseItem($item){ + $item = parent::parseItem($item); + + $url = "http://api.rue89.nouvelobs.com/export/mobile2/node/" + . str_replace(" ", "", substr($item['uri'], -8)) + . "/full"; + + $datas = json_decode(getContents($url), true); + $item['content'] = $datas['node']['body']; + + return $item; + } + + public function collectData(){ + $this->collectExpandableDatas('http://api.rue89.nouvelobs.com/feed'); + } +} diff --git a/bridges/Rule34Bridge.php b/bridges/Rule34Bridge.php new file mode 100644 index 0000000..b46ec00 --- /dev/null +++ b/bridges/Rule34Bridge.php @@ -0,0 +1,12 @@ +<?php +require_once('GelbooruBridge.php'); + +class Rule34Bridge extends GelbooruBridge { + + const MAINTAINER = 'mitsukarenai'; + const NAME = 'Rule34'; + const URI = 'http://rule34.xxx/'; + const DESCRIPTION = 'Returns images from given page'; + + const PIDBYPAGE = 50; +} diff --git a/bridges/Rule34pahealBridge.php b/bridges/Rule34pahealBridge.php new file mode 100644 index 0000000..1a74616 --- /dev/null +++ b/bridges/Rule34pahealBridge.php @@ -0,0 +1,10 @@ +<?php +require_once('Shimmie2Bridge.php'); + +class Rule34pahealBridge extends Shimmie2Bridge { + + const MAINTAINER = 'mitsukarenai'; + const NAME = 'Rule34paheal'; + const URI = 'http://rule34.paheal.net/'; + const DESCRIPTION = 'Returns images from given page'; +} diff --git a/bridges/SafebooruBridge.php b/bridges/SafebooruBridge.php new file mode 100644 index 0000000..d95e557 --- /dev/null +++ b/bridges/SafebooruBridge.php @@ -0,0 +1,12 @@ +<?php +require_once('GelbooruBridge.php'); + +class SafebooruBridge extends GelbooruBridge { + + const MAINTAINER = 'mitsukarenai'; + const NAME = 'Safebooru'; + const URI = 'http://safebooru.org/'; + const DESCRIPTION = 'Returns images from given page'; + + const PIDBYPAGE = 40; +} diff --git a/bridges/SakugabooruBridge.php b/bridges/SakugabooruBridge.php new file mode 100644 index 0000000..1d6cee0 --- /dev/null +++ b/bridges/SakugabooruBridge.php @@ -0,0 +1,11 @@ +<?php +require_once('MoebooruBridge.php'); + +class SakugabooruBridge extends MoebooruBridge { + + const MAINTAINER = 'mitsukarenai'; + const NAME = 'Sakugabooru'; + const URI = 'http://sakuga.yshi.org/'; + const DESCRIPTION = 'Returns images from given page'; + +} diff --git a/bridges/ScmbBridge.php b/bridges/ScmbBridge.php new file mode 100644 index 0000000..2107aa3 --- /dev/null +++ b/bridges/ScmbBridge.php @@ -0,0 +1,39 @@ +<?php +class ScmbBridge extends BridgeAbstract { + + const MAINTAINER = 'Astalaseven'; + const NAME = 'Se Coucher Moins Bête Bridge'; + const URI = 'http://secouchermoinsbete.fr'; + const CACHE_TIMEOUT = 21600; // 6h + const DESCRIPTION = 'Returns the newest anecdotes.'; + + public function collectData(){ + $html = ''; + $html = getSimpleHTMLDOM(self::URI) + or returnServerError('Could not request Se Coucher Moins Bete.'); + + foreach($html->find('article') as $article) { + $item = array(); + $item['uri'] = self::URI . $article->find('p.summary a', 0)->href; + $item['title'] = $article->find('header h1 a', 0)->innertext; + + // remove text "En savoir plus" from anecdote content + $article->find('span.read-more', 0)->outertext = ''; + $content = $article->find('p.summary a', 0)->innertext; + + // remove superfluous spaces at the end + $content = substr($content, 0, strlen($content) - 17); + + // get publication date + $str_date = $article->find('time', 0)->datetime; + list($date, $time) = explode(' ', $str_date); + list($y, $m, $d) = explode('-', $date); + list($h, $i) = explode(':', $time); + $timestamp = mktime($h, $i, 0, $m, $d, $y); + $item['timestamp'] = $timestamp; + + $item['content'] = $content; + $this->items[] = $item; + } + } +} diff --git a/bridges/ScoopItBridge.php b/bridges/ScoopItBridge.php new file mode 100644 index 0000000..997837d --- /dev/null +++ b/bridges/ScoopItBridge.php @@ -0,0 +1,42 @@ +<?php +class ScoopItBridge extends BridgeAbstract { + + const MAINTAINER = 'Pitchoule'; + const NAME = 'ScoopIt'; + const URI = 'http://www.scoop.it/'; + const CACHE_TIMEOUT = 21600; // 6h + const DESCRIPTION = 'Returns most recent results from ScoopIt.'; + + const PARAMETERS = array( array( + 'u' => array( + 'name' => 'keyword', + 'required' => true + ) + )); + + public function collectData(){ + $this->request = $this->getInput('u'); + $link = self::URI . 'search?q=' . urlencode($this->getInput('u')); + + $html = getSimpleHTMLDOM($link) + or returnServerError('Could not request ScoopIt. for : ' . $link); + + foreach($html->find('div.post-view') as $element) { + $item = array(); + $item['uri'] = $element->find('a', 0)->href; + $item['title'] = preg_replace( + '~[[:cntrl:]]~', + '', + $element->find('div.tCustomization_post_title', 0)->plaintext + ); + + $item['content'] = preg_replace( + '~[[:cntrl:]]~', + '', + $element->find('div.tCustomization_post_description', 0)->plaintext + ); + + $this->items[] = $item; + } + } +} diff --git a/bridges/SensCritiqueBridge.php b/bridges/SensCritiqueBridge.php new file mode 100644 index 0000000..7ac35f2 --- /dev/null +++ b/bridges/SensCritiqueBridge.php @@ -0,0 +1,97 @@ +<?php +class SensCritiqueBridge extends BridgeAbstract { + + const MAINTAINER = 'kranack'; + const NAME = 'Sens Critique'; + const URI = 'http://www.senscritique.com/'; + const CACHE_TIMEOUT = 21600; // 6h + const DESCRIPTION = 'Sens Critique news'; + + const PARAMETERS = array( array( + 'm' => array( + 'name' => 'Movies', + 'type' => 'checkbox' + ), + 's' => array( + 'name' => 'Series', + 'type' => 'checkbox' + ), + 'g' => array( + 'name' => 'Video Games', + 'type' => 'checkbox' + ), + 'b' => array( + 'name' => 'Books', + 'type' => 'checkbox' + ), + 'bd' => array( + 'name' => 'BD', + 'type' => 'checkbox' + ), + 'mu' => array( + 'name' => 'Music', + 'type' => 'checkbox' + ) + )); + + public function collectData(){ + $categories = array(); + foreach(self::PARAMETERS[$this->queriedContext] as $category => $properties) { + if($this->getInput($category)) { + $uri = self::URI; + switch($category) { + case 'm': $uri .= 'films/cette-semaine'; + break; + case 's': $uri .= 'series/actualite'; + break; + case 'g': $uri .= 'jeuxvideo/actualite'; + break; + case 'b': $uri .= 'livres/actualite'; + break; + case 'bd': $uri .= 'bd/actualite'; + break; + case 'mu': $uri .= 'musique/actualite'; + break; + } + $html = getSimpleHTMLDOM($uri) + or returnServerError('No results for this query.'); + $list = $html->find('ul.elpr-list', 0); + + $this->extractDataFromList($list); + } + } + } + + private function extractDataFromList($list){ + if($list === null) { + returnClientError('Cannot extract data from list'); + } + + foreach($list->find('li') as $movie) { + $item = array(); + $item['author'] = htmlspecialchars_decode($movie->find('.elco-title a', 0)->plaintext, ENT_QUOTES) + . ' ' + . $movie->find('.elco-date', 0)->plaintext; + + $item['title'] = $movie->find('.elco-title a', 0)->plaintext + . ' ' + . $movie->find('.elco-date', 0)->plaintext; + + $item['content'] = '<em>' + . $movie->find('.elco-original-title', 0)->plaintext + . '</em><br><br>' + . $movie->find('.elco-baseline', 0)->plaintext + . '<br>' + . $movie->find('.elco-baseline', 1)->plaintext + . '<br><br>' + . $movie->find('.elco-description', 0)->plaintext + . '<br><br>' + . trim($movie->find('.erra-ratings .erra-global', 0)->plaintext) + . ' / 10'; + + $item['id'] = $this->getURI() . $movie->find('.elco-title a', 0)->href; + $item['uri'] = $this->getURI() . $movie->find('.elco-title a', 0)->href; + $this->items[] = $item; + } + } +} diff --git a/bridges/SexactuBridge.php b/bridges/SexactuBridge.php new file mode 100644 index 0000000..5bc552a --- /dev/null +++ b/bridges/SexactuBridge.php @@ -0,0 +1,88 @@ +<?php +class SexactuBridge extends BridgeAbstract { + + const MAINTAINER = 'Riduidel'; + const NAME = 'Sexactu'; + const AUTHOR = 'Maïa Mazaurette'; + const URI = 'http://www.gqmagazine.fr'; + const CACHE_TIMEOUT = 7200; // 2h + const DESCRIPTION = 'Sexactu via rss-bridge'; + + const REPLACED_ATTRIBUTES = array( + 'href' => 'href', + 'src' => 'src', + 'data-original' => 'src' + ); + + public function getURI(){ + return self::URI . '/sexactu'; + } + + public function collectData(){ + $html = getSimpleHTMLDOM($this->getURI()) + or returnServerError('Could not request ' . $this->getURI()); + + $sexactu = $html->find('.container_sexactu', 0); + $rowList = $sexactu->find('.row'); + foreach($rowList as $row) { + // only use first list as second one only contains pages numbers + + $title = $row->find('.title', 0); + if($title) { + $item = array(); + $item['author'] = self::AUTHOR; + $item['title'] = $title->plaintext; + $urlAttribute = "data-href"; + $uri = $title->$urlAttribute; + if($uri === false) + continue; + if(substr($uri, 0, 1) === 'h') { // absolute uri + $item['uri'] = $uri; + } else if(substr($uri, 0, 1) === '/') { // domain relative url + $item['uri'] = self::URI . $uri; + } else { + $item['uri'] = $this->getURI() . $uri; + } + $article = $this->loadFullArticle($item['uri']); + $item['content'] = $this->replaceUriInHtmlElement($article->find('.article_content', 0)); + + $publicationDate = $article->find('time[itemprop=datePublished]', 0); + $short_date = $publicationDate->datetime; + $item['timestamp'] = strtotime($short_date); + } else { + // Sometimes we get rubbish, ignore. + continue; + } + $this->items[] = $item; + } + } + + /** + * Loads the full article and returns the contents + * @param $uri The article URI + * @return The article content + */ + private function loadFullArticle($uri){ + $html = getSimpleHTMLDOMCached($uri); + + $content = $html->find('#article', 0); + if($content) { + return $content; + } + + return null; + } + + /** + * Replaces all relative URIs with absolute ones + * @param $element A simplehtmldom element + * @return The $element->innertext with all URIs replaced + */ + private function replaceUriInHtmlElement($element){ + $returned = $element->innertext; + foreach (self::REPLACED_ATTRIBUTES as $initial => $final) { + $returned = str_replace($initial . '="/', $final . '="' . self::URI . '/', $returned); + } + return $returned; + } +} diff --git a/bridges/ShanaprojectBridge.php b/bridges/ShanaprojectBridge.php new file mode 100644 index 0000000..e86f772 --- /dev/null +++ b/bridges/ShanaprojectBridge.php @@ -0,0 +1,123 @@ +<?php +class ShanaprojectBridge extends BridgeAbstract { + const MAINTAINER = 'logmanoriginal'; + const NAME = 'Shanaproject Bridge'; + const URI = 'http://www.shanaproject.com'; + const DESCRIPTION = 'Returns a list of anime from the current Season Anime List'; + + // Returns an html object for the Season Anime List (latest season) + private function loadSeasonAnimeList(){ + // First we need to find the URI to the latest season from the + // 'seasons' page searching for 'Season Anime List' + $html = getSimpleHTMLDOM($this->getURI() . '/seasons'); + if(!$html) + returnServerError('Could not load \'seasons\' page!'); + + $season = $html->find('div.follows_menu/a', 1); + if(!$season) + returnServerError('Could not find \'Season Anime List\'!'); + + $html = getSimpleHTMLDOM($this->getURI() . $season->href); + if(!$html) + returnServerError( + 'Could not load \'Season Anime List\' from \'' + . $season->innertext + . '\'!' + ); + + return $html; + } + + // Extracts the anime title + private function extractAnimeTitle($anime){ + $title = $anime->find('a', 0); + if(!$title) + returnServerError('Could not find anime title!'); + return trim($title->innertext); + } + + // Extracts the anime URI + private function extractAnimeUri($anime){ + $uri = $anime->find('a', 0); + if(!$uri) + returnServerError('Could not find anime URI!'); + return $this->getURI() . $uri->href; + } + + // Extracts the anime release date (timestamp) + private function extractAnimeTimestamp($anime){ + $timestamp = $anime->find('span.header_info_block', 1); + if(!$timestamp) + return null; + return strtotime($timestamp->innertext); + } + + // Extracts the anime studio name (author) + private function extractAnimeAuthor($anime){ + $author = $anime->find('span.header_info_block', 2); + if(!$author) + return; // Sometimes the studio is unknown, so leave empty + return trim($author->innertext); + } + + // Extracts the episode information (x of y released) + private function extractAnimeEpisodeInformation($anime){ + $episode = $anime->find('div.header_info_episode', 0); + if(!$episode) + returnServerError('Could not find anime episode information!'); + return preg_replace('/\r|\n/', ' ', $episode->plaintext); + } + + // Extracts the background image + private function extractAnimeBackgroundImage($anime){ + // Getting the picture is a little bit tricky as it is part of the style. + // Luckily the style is part of the parent div :) + + if(preg_match("/url\(\/\/([^\)]+)\)/i", $anime->parent->style, $matches)) + return $matches[1]; + + returnServerError('Could not extract background image!'); + } + + // Builds an URI to search for a specific anime (subber is left empty) + private function buildAnimeSearchUri($anime){ + return $this->getURI() + . '/search/?title=' + . urlencode($this->extractAnimeTitle($anime)) + . '&subber='; + } + + // Builds the content string for a given anime + private function buildAnimeContent($anime){ + // We'll use a template string to place our contents + return '<a href="' + . $this->extractAnimeUri($anime) + . '"><img src="http://' + . $this->extractAnimeBackgroundImage($anime) + . '" alt="' + . htmlspecialchars($this->extractAnimeTitle($anime)) + . '" style="border: 1px solid black"></a><br><p>' + . $this->extractAnimeEpisodeInformation($anime) + . '</p><br><p><a href="' + . $this->buildAnimeSearchUri($anime) + . '">Search episodes</a></p>'; + } + + public function collectData(){ + $html = $this->loadSeasonAnimeList(); + + $animes = $html->find('div.header_display_box_info'); + if(!$animes) + returnServerError('Could not find anime headers!'); + + foreach($animes as $anime) { + $item = array(); + $item['title'] = $this->extractAnimeTitle($anime); + $item['author'] = $this->extractAnimeAuthor($anime); + $item['uri'] = $this->extractAnimeUri($anime); + $item['timestamp'] = $this->extractAnimeTimestamp($anime); + $item['content'] = $this->buildAnimeContent($anime); + $this->items[] = $item; + } + } +} diff --git a/bridges/Shimmie2Bridge.php b/bridges/Shimmie2Bridge.php new file mode 100644 index 0000000..efbcd9b --- /dev/null +++ b/bridges/Shimmie2Bridge.php @@ -0,0 +1,39 @@ +<?php +require_once('DanbooruBridge.php'); + +class Shimmie2Bridge extends DanbooruBridge { + + const NAME = 'Shimmie v2'; + const URI = 'http://shimmie.shishnet.org/v2/'; + const DESCRIPTION = 'Returns images from given page'; + + const PATHTODATA = '.shm-thumb-link'; + const IDATTRIBUTE = 'data-post-id'; + + protected function getFullURI(){ + return $this->getURI() + . 'post/list/' + . $this->getInput('t') + . '/' + . $this->getInput('p'); + } + + protected function getItemFromElement($element){ + $item = array(); + $item['uri'] = $this->getURI() . $element->href; + $item['id'] = (int)preg_replace("/[^0-9]/", '', $element->getAttribute(static::IDATTRIBUTE)); + $item['timestamp'] = time(); + $thumbnailUri = $this->getURI() . $element->find('img', 0)->src; + $item['tags'] = $element->getAttribute('data-tags'); + $item['title'] = $this->getName() . ' | ' . $item['id']; + $item['content'] = '<a href="' + . $item['uri'] + . '"><img src="' + . $thumbnailUri + . '" /></a><br>Tags: ' + . $item['tags']; + + return $item; + } + +} diff --git a/bridges/SoundcloudBridge.php b/bridges/SoundcloudBridge.php new file mode 100644 index 0000000..92d77da --- /dev/null +++ b/bridges/SoundcloudBridge.php @@ -0,0 +1,64 @@ +<?php +class SoundCloudBridge extends BridgeAbstract { + + const MAINTAINER = 'kranack'; + const NAME = 'Soundcloud Bridge'; + const URI = 'https://soundcloud.com/'; + const CACHE_TIMEOUT = 600; // 10min + const DESCRIPTION = 'Returns 10 newest music from user profile'; + + const PARAMETERS = array( array( + 'u' => array( + 'name' => 'username', + 'required' => true + ) + )); + + const CLIENT_ID = '0aca19eae3843844e4053c6d8fdb7875'; + + public function collectData(){ + + $res = json_decode(getContents( + 'https://api.soundcloud.com/resolve?url=http://www.soundcloud.com/' + . urlencode($this->getInput('u')) + . '&client_id=' + . self::CLIENT_ID + )) or returnServerError('No results for this query'); + + $tracks = json_decode(getContents( + 'https://api.soundcloud.com/users/' + . urlencode($res->id) + . '/tracks?client_id=' + . self::CLIENT_ID + )) or returnServerError('No results for this user'); + + for($i = 0; $i < 10; $i++) { + $item = array(); + $item['author'] = $tracks[$i]->user->username . ' - ' . $tracks[$i]->title; + $item['title'] = $tracks[$i]->user->username . ' - ' . $tracks[$i]->title; + $item['content'] = '<audio src="' + . $tracks[$i]->uri + . '/stream?client_id=' + . self::CLIENT_ID + . '">'; + + $item['id'] = self::URI + . urlencode($this->getInput('u')) + . '/' + . urlencode($tracks[$i]->permalink); + $item['uri'] = self::URI + . urlencode($this->getInput('u')) + . '/' + . urlencode($tracks[$i]->permalink); + $this->items[] = $item; + } + + } + public function getName(){ + if(!is_null($this->getInput('u'))) { + return self::NAME . ' - ' . $this->getInput('u'); + } + + return parent::getName(); + } +} diff --git a/bridges/SteamBridge.php b/bridges/SteamBridge.php new file mode 100644 index 0000000..b0f1033 --- /dev/null +++ b/bridges/SteamBridge.php @@ -0,0 +1,129 @@ +<?php +class SteamBridge extends BridgeAbstract { + + const NAME = 'Steam Bridge'; + const URI = 'https://steamcommunity.com/'; + const CACHE_TIMEOUT = 3600; // 1h + const DESCRIPTION = 'Returns games list'; + const MAINTAINER = 'jacknumber'; + const PARAMETERS = array( + 'Wishlist' => array( + 'username' => array( + 'name' => 'Username', + 'required' => true, + ), + 'currency' => array( + 'name' => 'Currency', + 'type' => 'list', + 'values' => array( + // source: http://steam.steamlytics.xyz/currencies + 'USD' => 'us', + 'GBP' => 'gb', + 'EUR' => 'fr', + 'CHF' => 'ch', + 'RUB' => 'ru', + 'BRL' => 'br', + 'JPY' => 'jp', + 'SEK' => 'se', + 'IDR' => 'id', + 'MYR' => 'my', + 'PHP' => 'ph', + 'SGD' => 'sg', + 'THB' => 'th', + 'KRW' => 'kr', + 'TRY' => 'tr', + 'MXN' => 'mx', + 'CAD' => 'ca', + 'NZD' => 'nz', + 'CNY' => 'cn', + 'INR' => 'in', + 'CLP' => 'cl', + 'PEN' => 'pe', + 'COP' => 'co', + 'ZAR' => 'za', + 'HKD' => 'hk', + 'TWD' => 'tw', + 'SRD' => 'sr', + 'AED' => 'ae', + ), + ), + 'sort' => array( + 'name' => 'Sort by', + 'type' => 'list', + 'values' => array( + 'Rank' => 'rank', + 'Date Added' => 'added', + 'Name' => 'name', + 'Price' => 'price', + ) + ), + 'only_discount' => array( + 'name' => 'Only discount', + 'type' => 'checkbox', + ) + ) + ); + + public function collectData(){ + + $username = $this->getInput('username'); + $params = array( + 'sort' => $this->getInput('sort'), + 'cc' => $this->getInput('currency') + ); + + $url = self::URI . 'id/' . $username . '/wishlist?' . http_build_query($params); + + $html = ''; + $html = getSimpleHTMLDOM($url) + or returnServerError("Could not request Steam Wishlist. Tried:\n - $url"); + + foreach($html->find('#wishlist_items .wishlistRow') as $element) { + + $gameTitle = $element->find('h4', 0)->plaintext; + $gameUri = $element->find('.storepage_btn_ctn a', 0)->href; + $gameImg = $element->find('.gameListRowLogo img', 0)->src; + + $discountBlock = $element->find('.discount_block', 0); + + if($element->find('.discount_block', 0)) { + $gameHasPromo = 1; + } else { + + if($this->getInput('only_discount')) { + continue; + } + + $gameHasPromo = 0; + + } + + if($gameHasPromo) { + + $gamePromoValue = $discountBlock->find('.discount_pct', 0)->plaintext; + $gameOldPrice = $discountBlock->find('.discount_original_price', 0)->plaintext; + $gameNewPrice = $discountBlock->find('.discount_final_price', 0)->plaintext; + $gamePrice = $gameNewPrice; + + } else { + $gamePrice = $element->find('.gameListPriceData .price', 0)->plaintext; + } + + $item = array(); + $item['uri'] = $gameUri; + $item['title'] = $gameTitle; + $item['price'] = $gamePrice; + $item['hasPromo'] = $gameHasPromo; + + if($gameHasPromo) { + + $item['promoValue'] = $gamePromoValue; + $item['oldPrice'] = $gameOldPrice; + $item['newPrice'] = $gameNewPrice; + + } + + $this->items[] = $item; + } + } +} diff --git a/bridges/StripeAPIChangeLogBridge.php b/bridges/StripeAPIChangeLogBridge.php new file mode 100644 index 0000000..22ef381 --- /dev/null +++ b/bridges/StripeAPIChangeLogBridge.php @@ -0,0 +1,23 @@ +<?php +class StripeAPIChangeLogBridge extends BridgeAbstract { + const MAINTAINER = 'Pierre Mazière'; + const NAME = 'Stripe API Changelog'; + const URI = 'https://stripe.com/docs/upgrades'; + const CACHE_TIMEOUT = 86400; // 24h + const DESCRIPTION = 'Returns the changes made to the stripe.com API'; + + public function collectData(){ + $html = getSimpleHTMLDOM(self::URI) + or returnServerError('No results for Stripe API Changelog'); + + foreach($html->find('h3') as $change) { + $item = array(); + $item['title'] = trim($change->plaintext); + $item['uri'] = self::URI . '#' . $item['title']; + $item['author'] = 'stripe'; + $item['content'] = $change->nextSibling()->outertext; + $item['timestamp'] = strtotime($item['title']); + $this->items[] = $item; + } + } +} diff --git a/bridges/SuperbWallpapersBridge.php b/bridges/SuperbWallpapersBridge.php new file mode 100644 index 0000000..610dd32 --- /dev/null +++ b/bridges/SuperbWallpapersBridge.php @@ -0,0 +1,70 @@ +<?php +class SuperbWallpapersBridge extends BridgeAbstract { + + const MAINTAINER = 'nel50n'; + const NAME = 'Superb Wallpapers Bridge'; + const URI = 'http://www.superbwallpapers.com/'; + const CACHE_TIMEOUT = 43200; // 12h + const DESCRIPTION = 'Returns the latests wallpapers from SuperbWallpapers'; + + const PARAMETERS = array( array( + 'c' => array( + 'name' => 'category', + 'required' => true + ), + 'm' => array( + 'name' => 'Max number of wallpapers', + 'type' => 'number' + ), + 'r' => array( + 'name' => 'resolution', + 'exampleValue' => '1920x1200, 1680x1050,…', + 'defaultValue' => '1920x1200' + ) + )); + + public function collectData(){ + $category = $this->getInput('c'); + $resolution = $this->getInput('r'); // Wide wallpaper default + + $num = 0; + $max = $this->getInput('m') ?: 36; + $lastpage = 1; + + // Get last page number + $link = self::URI . '/' . $category . '/9999.html'; + $html = getSimpleHTMLDOM($link) + or returnServerError('Could not load ' . $link); + + $lastpage = min($html->find('.paging .cpage', 0)->innertext(), ceil($max / 36)); + + for($page = 1; $page <= $lastpage; $page++) { + $link = self::URI . '/' . $category . '/' . $page . '.html'; + $html = getSimpleHTMLDOM($link) + or returnServerError('No results for this query.'); + + foreach($html->find('.wpl .i a') as $element) { + $thumbnail = $element->find('img', 0); + + $item = array(); + $item['uri'] = str_replace('200x125', $this->resolution, $thumbnail->src); + $item['timestamp'] = time(); + $item['title'] = $element->title; + $item['content'] = $item['title'] . '<br><a href="' . $item['uri'] . '">' . $thumbnail . '</a>'; + $this->items[] = $item; + + $num++; + if ($num >= $max) + break 2; + } + } + } + + public function getName(){ + if(!is_null($this->getInput('c')) && !is_null($this->getInput('r'))) { + return self::NAME . '- ' . $this->getInput('c') . ' [' . $this->getInput('r') . ']'; + } + + return parent::getName(); + } +} diff --git a/bridges/T411Bridge.php b/bridges/T411Bridge.php new file mode 100644 index 0000000..15cf628 --- /dev/null +++ b/bridges/T411Bridge.php @@ -0,0 +1,96 @@ +<?php +class T411Bridge extends BridgeAbstract { + + const MAINTAINER = 'ORelio'; + const NAME = 'T411 Bridge'; + const URI = 'https://www.t411.al/'; + const DESCRIPTION = 'Returns the 10 newest torrents with specified search + terms <br /> Use url part after "?" mark when using their search engine.'; + + const PARAMETERS = array( array( + 'search' => array( + 'name' => 'Search criteria', + 'required' => true + ) + )); + + public function collectData(){ + + //Utility function for retrieving text based on start and end delimiters + function extractFromDelimiters($string, $start, $end){ + if(strpos($string, $start) !== false) { + $section_retrieved = substr($string, strpos($string, $start) + strlen($start)); + $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end)); + return $section_retrieved; + } + + return false; + } + + //Retrieve torrent listing from search results, which does not contain torrent description + $url = self::URI + . 'torrents/search/?search=' + . urlencode($this->getInput('search')) + . '&order=added&type=desc'; + + $html = getSimpleHTMLDOM($url) + or returnServerError('Could not request t411: ' . $url); + + $results = $html->find('table.results', 0); + if (is_null($results)) + returnServerError('No results from t411: ' . $url); + $limit = 0; + + //Process each item individually + foreach($results->find('tr') as $element) { + + //Limit total amount of requests and ignore table header + if($limit >= 10) { + break; + } + if(is_object($element->find('th', 0))) { + continue; + } + + //Requests are rate-limited + usleep(500000); //So we need to wait (500ms) + + //Retrieve data from RSS entry + $item_uri = self::URI + . 'torrents/details/?id=' + . extractFromDelimiters($element->find('a.nfo', 0)->outertext, '?id=', '"'); + + $item_title = extractFromDelimiters($element->outertext, '" title="', '"'); + $item_date = strtotime($element->find('dd', 0)->plaintext); + + //Retrieve full description from torrent page + $item_html = getSimpleHTMLDOM($item_uri); + if(!$item_html) { + continue; + } + + //Retrieve data from page contents + $item_desc = $item_html->find('div.description', 0); + $item_author = $item_html->find('a.profile', 0)->innertext; + + //Cleanup advertisments + $divs = explode('<div class="align-center">', $item_desc->innertext); + $item_desc = ''; + foreach ($divs as $text) + if (strpos($text, 'adprovider.adlure.net') === false) + $item_desc = $item_desc . '<div class="align-center">' . $text; + + $item_desc = preg_replace('/<h2 class="align-center">LIENS DE T..?L..?CHARGEMENT<\/h2>/i', '', $item_desc); + + //Build and add final item + $item = array(); + $item['uri'] = $item_uri; + $item['title'] = $item_title; + $item['author'] = $item_author; + $item['timestamp'] = $item_date; + $item['content'] = $item_desc; + $this->items[] = $item; + $limit++; + } + } +} diff --git a/bridges/TagBoardBridge.php b/bridges/TagBoardBridge.php new file mode 100644 index 0000000..b79847e --- /dev/null +++ b/bridges/TagBoardBridge.php @@ -0,0 +1,49 @@ +<?php +class TagBoardBridge extends BridgeAbstract { + + const MAINTAINER = 'Pitchoule'; + const NAME = 'TagBoard'; + const URI = 'http://www.TagBoard.com/'; + const CACHE_TIMEOUT = 21600; // 6h + const DESCRIPTION = 'Returns most recent results from TagBoard.'; + + const PARAMETERS = array( array( + 'u' => array( + 'name' => 'keyword', + 'required' => true + ) + )); + + public function collectData(){ + $link = 'https://post-cache.tagboard.com/search/' . $this->getInput('u'); + + $html = getSimpleHTMLDOM($link) + or returnServerError('Could not request TagBoard for : ' . $link); + $parsed_json = json_decode($html); + + foreach($parsed_json->{'posts'} as $element) { + $item = array(); + $item['uri'] = $element->{'permalink'}; + $item['title'] = $element->{'text'}; + $thumbnailUri = $element->{'photos'}[0]->{'m'}; + if(isset($thumbnailUri)) { + $item['content'] = '<a href="' + . $item['uri'] + . '"><img src="' + . $thumbnailUri + . '" /></a>'; + } else { + $item['content'] = $element->{'html'}; + } + $this->items[] = $item; + } + } + + public function getName(){ + if(!is_null($this->getInput('u'))) { + return 'tagboard - ' . $this->getInput('u'); + } + + return parent::getName(); + } +} diff --git a/bridges/TbibBridge.php b/bridges/TbibBridge.php new file mode 100644 index 0000000..edb761e --- /dev/null +++ b/bridges/TbibBridge.php @@ -0,0 +1,12 @@ +<?php +require_once('GelbooruBridge.php'); + +class TbibBridge extends GelbooruBridge { + + const MAINTAINER = 'mitsukarenai'; + const NAME = 'Tbib'; + const URI = 'http://tbib.org/'; + const DESCRIPTION = 'Returns images from given page'; + + const PIDBYPAGE = 50; +} diff --git a/bridges/TheCodingLoveBridge.php b/bridges/TheCodingLoveBridge.php new file mode 100644 index 0000000..2a639e3 --- /dev/null +++ b/bridges/TheCodingLoveBridge.php @@ -0,0 +1,46 @@ +<?php +class TheCodingLoveBridge extends BridgeAbstract { + + const MAINTAINER = 'superbaillot.net'; + const NAME = 'The Coding Love'; + const URI = 'http://thecodinglove.com/'; + const CACHE_TIMEOUT = 7200; // 2h + const DESCRIPTION = 'The Coding Love'; + + public function collectData(){ + $html = getSimpleHTMLDOM(self::URI) + or returnServerError('Could not request The Coding Love.'); + + foreach($html->find('div.post') as $element) { + $item = array(); + $temp = $element->find('h3 a', 0); + + $titre = $temp->innertext; + $url = $temp->href; + + $temp = $element->find('div.bodytype', 0); + + // retrieve .gif instead of static .jpg + $images = $temp->find('p.e img'); + foreach($images as $image) { + $img_src = str_replace('.jpg', '.gif', $image->src); + $image->src = $img_src; + } + $content = $temp->innertext; + + $auteur = $temp->find('i', 0); + $pos = strpos($auteur->innertext, 'by'); + + if($pos > 0) { + $auteur = trim(str_replace('*/', '', substr($auteur->innertext, ($pos + 2)))); + $item['author'] = $auteur; + } + + $item['content'] .= trim($content); + $item['uri'] = $url; + $item['title'] = trim($titre); + + $this->items[] = $item; + } + } +} diff --git a/bridges/TheHackerNewsBridge.php b/bridges/TheHackerNewsBridge.php new file mode 100644 index 0000000..4106658 --- /dev/null +++ b/bridges/TheHackerNewsBridge.php @@ -0,0 +1,80 @@ +<?php +class TheHackerNewsBridge extends BridgeAbstract { + + const MAINTAINER = 'ORelio'; + const NAME = 'The Hacker News Bridge'; + const URI = 'https://thehackernews.com/'; + const DESCRIPTION = 'Cyber Security, Hacking, Technology News.'; + + public function collectData(){ + + function stripWithDelimiters($string, $start, $end){ + while(strpos($string, $start) !== false) { + $section_to_remove = substr($string, strpos($string, $start)); + $section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end)); + $string = str_replace($section_to_remove, '', $string); + } + + return $string; + } + + function stripRecursiveHtmlSection($string, $tag_name, $tag_start){ + $open_tag = '<' . $tag_name; + $close_tag = '</' . $tag_name . '>'; + $close_tag_length = strlen($close_tag); + if(strpos($tag_start, $open_tag) === 0) { + while(strpos($string, $tag_start) !== false) { + $max_recursion = 100; + $section_to_remove = null; + $section_start = strpos($string, $tag_start); + $search_offset = $section_start; + do { + $max_recursion--; + $section_end = strpos($string, $close_tag, $search_offset); + $search_offset = $section_end + $close_tag_length; + $section_to_remove = substr( + $string, + $section_start, + $section_end - $section_start + $close_tag_length + ); + + $open_tag_count = substr_count($section_to_remove, $open_tag); + $close_tag_count = substr_count($section_to_remove, $close_tag); + } while($open_tag_count > $close_tag_count && $max_recursion > 0); + $string = str_replace($section_to_remove, '', $string); + } + } + return $string; + } + + $html = getSimpleHTMLDOM($this->getURI()) + or returnServerError('Could not request TheHackerNews: ' . $this->getURI()); + $limit = 0; + + foreach($html->find('article') as $element) { + if($limit < 5) { + + $article_url = $element->find('a.entry-title', 0)->href; + $article_author = trim($element->find('span.vcard', 0)->plaintext); + $article_title = $element->find('a.entry-title', 0)->plaintext; + $article_timestamp = strtotime($element->find('span.updated', 0)->plaintext); + $article = getSimpleHTMLDOM($article_url) + or returnServerError('Could not request TheHackerNews: ' . $article_url); + + $contents = $article->find('div.articlebodyonly', 0)->innertext; + $contents = stripRecursiveHtmlSection($contents, 'div', '<div class=\'clear\''); + $contents = stripWithDelimiters($contents, '<script', '</script>'); + + $item = array(); + $item['uri'] = $article_url; + $item['title'] = $article_title; + $item['author'] = $article_author; + $item['timestamp'] = $article_timestamp; + $item['content'] = trim($contents); + $this->items[] = $item; + $limit++; + } + } + + } +} diff --git a/bridges/ThePirateBayBridge.php b/bridges/ThePirateBayBridge.php new file mode 100644 index 0000000..103737d --- /dev/null +++ b/bridges/ThePirateBayBridge.php @@ -0,0 +1,174 @@ +<?php +class ThePirateBayBridge extends BridgeAbstract { + + const MAINTAINER = 'mitsukarenai'; + const NAME = 'The Pirate Bay'; + const URI = 'https://thepiratebay.org/'; + const DESCRIPTION = 'Returns results for the keywords. You can put several + list of keywords by separating them with a semicolon (e.g. "one show;another + show"). Category based search needs the category number as input. User based + search takes the Uploader name. Search can be done in a specified category'; + + const PARAMETERS = array( array( + 'q' => array( + 'name' => 'keywords, separated by semicolons', + 'exampleValue' => 'first list;second list;…', + 'required' => true + ), + 'crit' => array( + 'type' => 'list', + 'name' => 'Search type', + 'values' => array( + 'search' => 'search', + 'category' => 'cat', + 'user' => 'usr' + ) + ), + 'cat_check' => array( + 'type' => 'checkbox', + 'name' => 'Specify category for normal search ?', + ), + 'cat' => array( + 'name' => 'Category number', + 'exampleValue' => '100, 200… See TPB for category number' + ), + 'trusted' => array( + 'type' => 'checkbox', + 'name' => 'Only get results from Trusted or VIP users ?', + ), + )); + + public function collectData(){ + + function parseDateTimestamp($element){ + $guessedDate = $element->find('font', 0)->plaintext; + $guessedDate = explode('Uploaded ', $guessedDate)[1]; + $guessedDate = explode(',', $guessedDate)[0]; + + if(count(explode(':', $guessedDate)) == 1) { + $guessedDate = strptime($guessedDate, '%m-%d %Y'); + $timestamp = mktime( + 0, + 0, + 0, + $guessedDate['tm_mon'] + 1, + $guessedDate['tm_mday'], + 1900 + $guessedDate['tm_year'] + ); + } elseif(explode(' ', $guessedDate)[0] == 'Today') { + $guessedDate = strptime( + explode(' ', $guessedDate)[1], '%H:%M' + ); + + $timestamp = mktime( + $guessedDate['tm_hour'], + $guessedDate['tm_min'], + 0, + date('m'), + date('d'), + date('Y') + ); + } elseif(explode(' ', $guessedDate)[0] == 'Y-day') { + $guessedDate = strptime( + explode(' ', $guessedDate)[1], '%H:%M' + ); + + $timestamp = mktime( + $guessedDate['tm_hour'], + $guessedDate['tm_min'], + 0, + date('m', time() - 24 * 60 * 60), + date('d', time() - 24 * 60 * 60), + date('Y', time() - 24 * 60 * 60) + ); + } else { + $guessedDate = strptime($guessedDate, '%m-%d %H:%M'); + $timestamp = mktime( + $guessedDate['tm_hour'], + $guessedDate['tm_min'], + 0, + $guessedDate['tm_mon'] + 1, + $guessedDate['tm_mday'], + date('Y')); + } + return $timestamp; + } + + $catBool = $this->getInput('cat_check'); + if($catBool) { + $catNum = $this->getInput('cat'); + } + $critList = $this->getInput('crit'); + + $trustedBool = $this->getInput('trusted'); + $keywordsList = explode(';', $this->getInput('q')); + foreach($keywordsList as $keywords) { + switch($critList) { + case 'search': + if($catBool == false) { + $html = getSimpleHTMLDOM( + self::URI . + 'search/' . + rawurlencode($keywords) . + '/0/3/0' + ) or returnServerError('Could not request TPB.'); + } else { + $html = getSimpleHTMLDOM( + self::URI . + 'search/' . + rawurlencode($keywords) . + '/0/3/' . + rawurlencode($catNum) + ) or returnServerError('Could not request TPB.'); + } + break; + case 'cat': + $html = getSimpleHTMLDOM( + self::URI . + 'browse/' . + rawurlencode($keywords) . + '/0/3/0' + ) or returnServerError('Could not request TPB.'); + break; + case 'usr': + $html = getSimpleHTMLDOM( + self::URI . + 'user/' . + rawurlencode($keywords) . + '/0/3/0' + ) or returnServerError('Could not request TPB.'); + break; + } + + if ($html->find('table#searchResult', 0) == false) + returnServerError('No result for query ' . $keywords); + + foreach($html->find('tr') as $element) { + + if(!$trustedBool + || !is_null($element->find('img[alt=VIP]', 0)) + || !is_null($element->find('img[alt=Trusted]', 0))) { + $item = array(); + $item['uri'] = $element->find('a', 3)->href; + $item['id'] = self::URI . $element->find('a.detLink', 0)->href; + $item['timestamp'] = parseDateTimestamp($element); + $item['author'] = $element->find('a.detDesc', 0)->plaintext; + $item['title'] = $element->find('a.detLink', 0)->plaintext; + $item['seeders'] = (int)$element->find('td', 2)->plaintext; + $item['leechers'] = (int)$element->find('td', 3)->plaintext; + $item['content'] = $element->find('font', 0)->plaintext + . '<br>seeders: ' + . $item['seeders'] + . ' | leechers: ' + . $item['leechers'] + . '<br><a href="' + . $item['id'] + . '">info page</a>'; + + if(isset($item['title'])) + $this->items[] = $item; + } + } + } + } +} diff --git a/bridges/TheTVDBBridge.php b/bridges/TheTVDBBridge.php new file mode 100644 index 0000000..63af1ea --- /dev/null +++ b/bridges/TheTVDBBridge.php @@ -0,0 +1,205 @@ +<?php + +class TheTVDBBridge extends BridgeAbstract { + + const MAINTAINER = 'Astyan'; + const NAME = 'TheTVDB'; + const URI = 'http://thetvdb.com/'; + const APIURI = 'https://api.thetvdb.com/'; + const CACHE_TIMEOUT = 43200; // 12h + const DESCRIPTION = 'Returns latest episodes of a serie with theTVDB api. You can contribute to theTVDB.'; + const PARAMETERS = array( + array( + 'serie_id' => array( + 'type' => 'number', + 'name' => 'ID', + 'required' => true, + ), + 'nb_episode' => array( + 'type' => 'number', + 'name' => 'Number of episodes', + 'defaultValue' => 10, + 'required' => true, + ), + ) + ); + const APIACCOUNT = 'RSSBridge'; + const APIKEY = '76DE1887EA401C9A'; + const APIUSERKEY = 'B52869AC6005330F'; + + private function getApiUri(){ + return self::APIURI; + } + + private function getToken(){ + //login and get token, don't use curlJob to do less adaptations + $login_array = array( + 'apikey' => self::APIKEY, + 'username' => self::APIACCOUNT, + 'userkey' => self::APIUSERKEY + ); + + $login_json = json_encode($login_array); + $ch = curl_init($this->getApiUri() . 'login'); + curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'POST'); + curl_setopt($ch, CURLOPT_POSTFIELDS, $login_json); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_HTTPHEADER, array( + 'Content-Type: application/json', + 'Accept: application/json' + ) + ); + + curl_setopt($ch, CURLOPT_TIMEOUT, 5); + curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5); + $result = curl_exec($ch); + curl_close($ch); + $token_json = (array)json_decode($result); + if(isset($token_json['Error'])) { + throw new Exception($token_json['Error']); + die; + } + $token = $token_json['token']; + return $token; + } + + private function curlJob($token, $url){ + $token_header = 'Authorization: Bearer ' . $token; + $ch = curl_init($url); + curl_setopt($ch, CURLOPT_CUSTOMREQUEST, 'GET'); + curl_setopt($ch, CURLOPT_RETURNTRANSFER, true); + curl_setopt($ch, CURLOPT_HTTPHEADER, array( + 'Accept: application/json', + $token_header + ) + ); + curl_setopt($ch, CURLOPT_TIMEOUT, 5); + curl_setopt($ch, CURLOPT_CONNECTTIMEOUT, 5); + $result = curl_exec($ch); + curl_close($ch); + $result_array = (array)json_decode($result); + if(isset($result_array['Error'])) { + throw new Exception($result_array['Error']); + die; + } + return $result_array; + } + + private function getLatestSeasonNumber($token, $serie_id){ + // get the last season + $url = $this->getApiUri() . 'series/' . $serie_id . '/episodes/summary'; + $summary = $this->curlJob($token, $url); + return max($summary['data']->airedSeasons); + } + + private function getSerieName($token, $serie_id){ + $url = $this->getApiUri() . 'series/' . $serie_id; + $serie = $this->curlJob($token, $url); + return $serie['data']->seriesName; + } + + private function getSeasonEpisodes($token, + $serie_id, + $season, + $seriename, + &$episodelist, + $nbepisodemin, + $page = 1){ + $url = $this->getApiUri() + . 'series/' + . $serie_id + . '/episodes/query?airedSeason=' + . $season + . '?page=' + . $page; + + $episodes = $this->curlJob($token, $url); + // we don't check the number of page because we assume there is less + //than 100 episodes in every season + $episodes = (array)$episodes['data']; + $episodes = array_slice($episodes, -$nbepisodemin, $nbepisodemin); + foreach($episodes as $episode) { + $episodedata = array(); + $episodedata['uri'] = $this->getURI() + . '?tab=episode&seriesid=' + . $serie_id + . '&seasonid=' + . $episode->airedSeasonID + . '&id=' + . $episode->id; + + // check if the absoluteNumber exist + if(isset($episode->absoluteNumber)) { + $episodedata['title'] = 'S' + . $episode->airedSeason + . 'E' + . $episode->airedEpisodeNumber + . '(' + . $episode->absoluteNumber + . ') : ' + . $episode->episodeName; + } else { + $episodedata['title'] = 'S' + . $episode->airedSeason + . 'E' + . $episode->airedEpisodeNumber + . ' : ' + . $episode->episodeName; + } + $episodedata['author'] = $seriename; + $date = DateTime::createFromFormat( + 'Y-m-d H:i:s', + $episode->firstAired . ' 00:00:00' + ); + + $episodedata['timestamp'] = $date->getTimestamp(); + $episodedata['content'] = $episode->overview; + $episodelist[] = $episodedata; + } + } + + public function collectData(){ + $serie_id = $this->getInput('serie_id'); + $nbepisode = $this->getInput('nb_episode'); + $episodelist = array(); + $token = $this->getToken(); + $maxseason = $this->getLatestSeasonNumber($token, $serie_id); + $seriename = $this->getSerieName($token, $serie_id); + $season = $maxseason; + while(sizeof($episodelist) < $nbepisode && $season >= 1) { + $nbepisodetmp = $nbepisode - sizeof($episodelist); + $this->getSeasonEpisodes( + $token, + $serie_id, + $season, + $seriename, + $episodelist, + $nbepisodetmp + ); + + $season = $season - 1; + } + // add the 10 last specials episodes + try { // catch to avoid error if empty + $this->getSeasonEpisodes( + $token, + $serie_id, + 0, + $seriename, + $episodelist, + $nbepisode + ); + } catch(Exception $e) { + unset($e); + } + // sort and keep the 10 last episodes, works bad with the netflix serie + // (all episode lauch at once) + usort( + $episodelist, + function ($a, $b){ + return $a['timestamp'] < $b['timestamp']; + } + ); + $this->items = array_slice($episodelist, 0, $nbepisode); + } +} diff --git a/bridges/Torrent9Bridge.php b/bridges/Torrent9Bridge.php new file mode 100644 index 0000000..742e777 --- /dev/null +++ b/bridges/Torrent9Bridge.php @@ -0,0 +1,102 @@ +<?php +class Torrent9Bridge extends BridgeAbstract { + + const MAINTAINER = 'lagaisse'; + const NAME = 'Torrent9 Bridge'; + const URI = 'http://www.torrent9.biz'; + const CACHE_TIMEOUT = 86400; // 24h = 86400s + const DESCRIPTION = 'Returns latest torrents'; + + const PAGE_SERIES = 'torrents_series'; + const PAGE_SERIES_VOSTFR = 'torrents_series_vostfr'; + const PAGE_SERIES_FR = 'torrents_series_french'; + + const PARAMETERS = array( + 'From search' => array( + 'q' => array( + 'name' => 'Search', + 'required' => true, + 'title' => 'Type your search' + ) + ), + 'By page' => array( + 'page' => array( + 'name' => 'Page', + 'type' => 'list', + 'required' => false, + 'values' => array( + 'Series' => self::PAGE_SERIES, + 'Series VOST' => self::PAGE_SERIES_VOSTFR, + 'Series FR' => self::PAGE_SERIES_FR, + ), + 'defaultValue' => self::PAGE_SERIES + ) + ) + ); + + public function collectData(){ + + if($this->queriedContext === 'From search') { + $request = str_replace(' ', '-', trim($this->getInput('q'))); + $page = self::URI . '/search_torrent/' . urlencode($request) . '.html'; + } else { + $request = $this->getInput('page'); + $page = self::URI . '/' . $request . '.html'; + } + + $html = getSimpleHTMLDOM($page) + or returnServerError('No results for this query.'); + + foreach($html->find('table', 0)->find('tr') as $episode) { + if($episode->parent->tag == 'tbody') { + + $urlepisode = self::URI . $episode->find('a', 0)->getAttribute('href'); + + //30 years = forever + $htmlepisode = getSimpleHTMLDOMCached($urlepisode, 86400 * 366 * 30); + + $item = array(); + $item['author'] = $episode->find('a', 0)->text(); + $item['title'] = $episode->find('a', 0)->text(); + $item['id'] = $episode->find('a', 0)->getAttribute('href'); + $item['pubdate'] = $this->getCachedDate($urlepisode); + + $textefiche = $htmlepisode->find('.movie-information', 0)->find('p', 1); + if(isset($textefiche)) { + $item['content'] = $textefiche->text(); + } else { + $p = $htmlepisode->find('.movie-information', 0)->find('p'); + if(!empty($p)) { + $item['content'] = $htmlepisode->find('.movie-information', 0)->find('p', 0)->text(); + } + } + + $item['id'] = $episode->find('a', 0)->getAttribute('href'); + $item['uri'] = self::URI . $htmlepisode->find('.download', 0)->getAttribute('href'); + + $this->items[] = $item; + } + } + } + + + public function getName(){ + if(!is_null($this->getInput('q'))) { + return $this->getInput('q') . ' : ' . self::NAME; + } + + return parent::getName(); + } + + private function getCachedDate($url){ + debugMessage('getting pubdate from url ' . $url . ''); + // Initialize cache + $cache = Cache::create('FileCache'); + $cache->setPath(CACHE_DIR . '/pages'); + $params = [$url]; + $cache->setParameters($params); + // Get cachefile timestamp + $time = $cache->getTime(); + return ($time !== false ? $time : time()); + } +} diff --git a/bridges/TwitterBridge.php b/bridges/TwitterBridge.php new file mode 100644 index 0000000..d588f6b --- /dev/null +++ b/bridges/TwitterBridge.php @@ -0,0 +1,287 @@ +<?php +class TwitterBridge extends BridgeAbstract { + const NAME = 'Twitter Bridge'; + const URI = 'https://twitter.com/'; + const CACHE_TIMEOUT = 300; // 5min + const DESCRIPTION = 'returns tweets'; + const MAINTAINER = 'pmaziere'; + const PARAMETERS = array( + 'global' => array( + 'nopic' => array( + 'name' => 'Hide profile pictures', + 'type' => 'checkbox', + 'title' => 'Activate to hide profile pictures in content' + ), + 'noimg' => array( + 'name' => 'Hide images in tweets', + 'type' => 'checkbox', + 'title' => 'Activate to hide images in tweets' + ) + ), + 'By keyword or hashtag' => array( + 'q' => array( + 'name' => 'Keyword or #hashtag', + 'required' => true, + 'exampleValue' => 'rss-bridge, #rss-bridge', + 'title' => 'Insert a keyword or hashtag' + ) + ), + 'By username' => array( + 'u' => array( + 'name' => 'username', + 'required' => true, + 'exampleValue' => 'sebsauvage', + 'title' => 'Insert a user name' + ), + 'norep' => array( + 'name' => 'Without replies', + 'type' => 'checkbox', + 'title' => 'Only return initial tweets' + ), + 'noretweet' => array( + 'name' => 'Without retweets', + 'required' => false, + 'type' => 'checkbox', + 'title' => 'Hide retweets' + ) + ) + ); + + public function getName(){ + switch($this->queriedContext) { + case 'By keyword or hashtag': + $specific = 'search '; + $param = 'q'; + break; + case 'By username': + $specific = '@'; + $param = 'u'; + break; + default: return parent::getName(); + } + return 'Twitter ' . $specific . $this->getInput($param); + } + + public function getURI(){ + switch($this->queriedContext) { + case 'By keyword or hashtag': + return self::URI + . 'search?q=' + . urlencode($this->getInput('q')) + . '&f=tweets'; + case 'By username': + return self::URI + . urlencode($this->getInput('u')); + // Always return without replies! + // . ($this->getInput('norep') ? '' : '/with_replies'); + default: return parent::getURI(); + } + } + + public function collectData(){ + $html = ''; + + $html = getSimpleHTMLDOM($this->getURI()); + if(!$html) { + switch($this->queriedContext) { + case 'By keyword or hashtag': + returnServerError('No results for this query.'); + case 'By username': + returnServerError('Requested username can\'t be found.'); + } + } + + $hidePictures = $this->getInput('nopic'); + + foreach($html->find('div.js-stream-tweet') as $tweet) { + + // Skip retweets? + if($this->getInput('noretweet') + && $tweet->getAttribute('data-screen-name') !== $this->getInput('u')) { + continue; + } + + // remove 'invisible' content + foreach($tweet->find('.invisible') as $invisible) { + $invisible->outertext = ''; + } + + // Skip protmoted tweets + $heading = $tweet->previousSibling(); + if(!is_null($heading) && + $heading->getAttribute('class') === 'promoted-tweet-heading' + ) { + continue; + } + + $item = array(); + // extract username and sanitize + $item['username'] = $tweet->getAttribute('data-screen-name'); + // extract fullname (pseudonym) + $item['fullname'] = $tweet->getAttribute('data-name'); + // get author + $item['author'] = $item['fullname'] . ' (@' . $item['username'] . ')'; + // get avatar link + $item['avatar'] = $tweet->find('img', 0)->src; + // get TweetID + $item['id'] = $tweet->getAttribute('data-tweet-id'); + // get tweet link + $item['uri'] = self::URI . substr($tweet->find('a.js-permalink', 0)->getAttribute('href'), 1); + // extract tweet timestamp + $item['timestamp'] = $tweet->find('span.js-short-timestamp', 0)->getAttribute('data-time'); + // generate the title + $item['title'] = strip_tags($this->fixAnchorSpacing($tweet->find('p.js-tweet-text', 0), '<a>')); + + $this->processContentLinks($tweet); + $this->processEmojis($tweet); + + // get tweet text + $cleanedTweet = str_replace( + 'href="/', + 'href="' . self::URI, + $tweet->find('p.js-tweet-text', 0)->innertext + ); + + // fix anchors missing spaces in-between + $cleanedTweet = $this->fixAnchorSpacing($cleanedTweet); + + // Add picture to content + $picture_html = ''; + if(!$hidePictures) { + $picture_html = <<<EOD +<a href="https://twitter.com/{$item['username']}"> +<img + style="align:top; width:75px; border:1px solid black;" + alt="{$item['username']}" + src="{$item['avatar']}" + title="{$item['fullname']}" /> +</a> +EOD; + } + + // Add embeded image to content + $image_html = ''; + $image = $this->getImageURI($tweet); + if(!$this->getInput('noimg') && !is_null($image)) { + // add enclosures + $item['enclosures'] = array($image . ':orig'); + + $image_html = <<<EOD +<a href="{$image}:orig"> +<img + style="align:top; max-width:558px; border:1px solid black;" + src="{$image}:thumb" /> +</a> +EOD; + } + + // add content + $item['content'] = <<<EOD +<div style="display: inline-block; vertical-align: top;"> + {$picture_html} +</div> +<div style="display: inline-block; vertical-align: top;"> + <blockquote>{$cleanedTweet}</blockquote> +</div> +<div style="display: block; vertical-align: top;"> + <blockquote>{$image_html}</blockquote> +</div> +EOD; + + // add quoted tweet + $quotedTweet = $tweet->find('div.QuoteTweet', 0); + if($quotedTweet) { + // get tweet text + $cleanedQuotedTweet = str_replace( + 'href="/', + 'href="' . self::URI, + $quotedTweet->find('div.tweet-text', 0)->innertext + ); + + $this->processContentLinks($quotedTweet); + $this->processEmojis($quotedTweet); + + // Add embeded image to content + $quotedImage_html = ''; + $quotedImage = $this->getQuotedImageURI($tweet); + if(!$this->getInput('noimg') && !is_null($quotedImage)) { + // add enclosures + $item['enclosures'] = array($quotedImage . ':orig'); + + $quotedImage_html = <<<EOD +<a href="{$quotedImage}:orig"> +<img + style="align:top; max-width:558px; border:1px solid black;" + src="{$quotedImage}:thumb" /> +</a> +EOD; + } + + $item['content'] = <<<EOD +<div style="display: inline-block; vertical-align: top;"> + <blockquote>{$cleanedQuotedTweet}</blockquote> +</div> +<div style="display: block; vertical-align: top;"> + <blockquote>{$quotedImage_html}</blockquote> +</div> +<hr> +{$item['content']} +EOD; + } + + // put out + $this->items[] = $item; + } + } + + private function processEmojis($tweet){ + // process emojis (reduce size) + foreach($tweet->find('img.Emoji') as $img) { + $img->style .= ' height: 1em;'; + } + } + + private function processContentLinks($tweet){ + // processing content links + foreach($tweet->find('a') as $link) { + if($link->hasAttribute('data-expanded-url')) { + $link->href = $link->getAttribute('data-expanded-url'); + } + $link->removeAttribute('data-expanded-url'); + $link->removeAttribute('data-query-source'); + $link->removeAttribute('rel'); + $link->removeAttribute('class'); + $link->removeAttribute('target'); + $link->removeAttribute('title'); + } + } + + private function fixAnchorSpacing($content){ + // fix anchors missing spaces in-between + return str_replace( + '<a', + ' <a', + $content + ); + } + + private function getImageURI($tweet){ + // Find media in tweet + $container = $tweet->find('div.AdaptiveMedia-container', 0); + if($container && $container->find('img', 0)) { + return $container->find('img', 0)->src; + } + + return null; + } + + private function getQuotedImageURI($tweet){ + // Find media in tweet + $container = $tweet->find('div.QuoteMedia-container', 0); + if($container && $container->find('img', 0)) { + return $container->find('img', 0)->src; + } + + return null; + } +} diff --git a/bridges/UnsplashBridge.php b/bridges/UnsplashBridge.php new file mode 100644 index 0000000..ee1040a --- /dev/null +++ b/bridges/UnsplashBridge.php @@ -0,0 +1,77 @@ +<?php +class UnsplashBridge extends BridgeAbstract { + + const MAINTAINER = 'nel50n'; + const NAME = 'Unsplash Bridge'; + const URI = 'http://unsplash.com/'; + const CACHE_TIMEOUT = 43200; // 12h + const DESCRIPTION = 'Returns the latests photos from Unsplash'; + + const PARAMETERS = array( array( + 'm' => array( + 'name' => 'Max number of photos', + 'type' => 'number', + 'defaultValue' => 20 + ), + 'w' => array( + 'name' => 'Width', + 'exampleValue' => '1920, 1680, …', + 'defaultValue' => '1920' + ), + 'q' => array( + 'name' => 'JPEG quality', + 'type' => 'number', + 'defaultValue' => 75 + ) + )); + + public function collectData(){ + $width = $this->getInput('w'); + $num = 0; + $max = $this->getInput('m'); + $quality = $this->getInput('q'); + $lastpage = 1; + + for($page = 1; $page <= $lastpage; $page++) { + $link = self::URI . '/grid?page=' . $page; + $html = getSimpleHTMLDOM($link) + or returnServerError('No results for this query.'); + + if($page === 1) { + preg_match( + '/=(\d+)$/', + $html->find('.pagination > a[!class]', -1)->href, + $matches + ); + + $lastpage = min($matches[1], ceil($max / 40)); + } + + foreach($html->find('.photo') as $element) { + $thumbnail = $element->find('img', 0); + $thumbnail->src = str_replace('https://', 'http://', $thumbnail->src); + + $item = array(); + $item['uri'] = str_replace( + array('q=75', 'w=400'), + array("q=$quality", "w=$width"), + $thumbnail->src).'.jpg'; // '.jpg' only for format hint + + $item['timestamp'] = time(); + $item['title'] = $thumbnail->alt; + $item['content'] = $item['title'] + . '<br><a href="' + . $item['uri'] + . '"><img src="' + . $thumbnail->src + . '" /></a>'; + + $this->items[] = $item; + + $num++; + if ($num >= $max) + break 2; + } + } + } +} diff --git a/bridges/UsbekEtRicaBridge.php b/bridges/UsbekEtRicaBridge.php new file mode 100644 index 0000000..4d5ba16 --- /dev/null +++ b/bridges/UsbekEtRicaBridge.php @@ -0,0 +1,110 @@ +<?php +class UsbekEtRicaBridge extends BridgeAbstract { + + const MAINTAINER = 'logmanoriginal'; + const NAME = 'Usbek & Rica Bridge'; + const URI = 'https://usbeketrica.com'; + const DESCRIPTION = 'Returns latest articles from the front page'; + + const PARAMETERS = array( + array( + 'limit' => array( + 'name' => 'Number of articles to return', + 'type' => 'number', + 'required' => false, + 'title' => 'Specifies the maximum number of articles to return', + 'defaultValue' => -1 + ), + 'fullarticle' => array( + 'name' => 'Load full article', + 'type' => 'checkbox', + 'required' => false, + 'title' => 'Activate to load full articles', + ) + ) + ); + + public function collectData(){ + $limit = $this->getInput('limit'); + $fullarticle = $this->getInput('fullarticle'); + $html = getSimpleHTMLDOM($this->getURI()); + + $articles = $html->find('div.details'); + + foreach($articles as $article) { + $item = array(); + + $title = $article->find('div.card-title', 0); + if($title) { + $item['title'] = $title->plaintext; + } else { + // Sometimes we get rubbish, ignore. + continue; + } + + $author = $article->find('div.author span', 0); + if($author) { + $item['author'] = $author->plaintext; + } + + $uri = $article->find('a.read', 0)->href; + if(substr($uri, 0, 1) === 'h') { // absolute uri + $item['uri'] = $uri; + } else { // relative uri + $item['uri'] = $this->getURI() . $uri; + } + + if($fullarticle) { + $content = $this->loadFullArticle($item['uri']); + } + + if($fullarticle && !is_null($content)) { + $item['content'] = $content; + } else { + $excerpt = $article->find('div.card-excerpt', 0); + if($excerpt) { + $item['content'] = $excerpt->plaintext; + } + } + + $image = $article->find('div.card-img img', 0); + if($image) { + $item['enclosures'] = array( + $image->src + ); + } + + $this->items[] = $item; + + if($limit > 0 && count($this->items) >= $limit) { + break; + } + } + } + + /** + * Loads the full article and returns the contents + * @param $uri The article URI + * @return The article content + */ + private function loadFullArticle($uri){ + $html = getSimpleHTMLDOMCached($uri); + + $content = $html->find('section.main', 0); + if($content) { + return $this->replaceUriInHtmlElement($content); + } + + return null; + } + + /** + * Replaces all relative URIs with absolute ones + * @param $element A simplehtmldom element + * @return The $element->innertext with all URIs replaced + */ + private function replaceUriInHtmlElement($element){ + return str_replace('href="/', 'href="' . $this->getURI() . '/', $element->innertext); + } + +} diff --git a/bridges/ViadeoCompanyBridge.php b/bridges/ViadeoCompanyBridge.php new file mode 100644 index 0000000..3f76188 --- /dev/null +++ b/bridges/ViadeoCompanyBridge.php @@ -0,0 +1,37 @@ +<?php +class ViadeoCompanyBridge extends BridgeAbstract { + + const MAINTAINER = 'regisenguehard'; + const NAME = 'Viadeo Company'; + const URI = 'https://www.viadeo.com/'; + const CACHE_TIMEOUT = 21600; // 6h + const DESCRIPTION = 'Returns most recent actus from Company on Viadeo. + (http://www.viadeo.com/fr/company/<strong style="font-weight:bold;">apple</strong>)'; + + const PARAMETERS = array( array( + 'c' => array( + 'name' => 'Company name', + 'required' => true + ) + )); + + public function collectData(){ + $html = ''; + $link = self::URI . 'fr/company/' . $this->getInput('c'); + + $html = getSimpleHTMLDOM($link) + or returnServerError('Could not request Viadeo.'); + + foreach($html->find('//*[@id="company-newsfeed"]/ul/li') as $element) { + $title = $element->find('p', 0)->innertext; + if($title) { + $item = array(); + $item['uri'] = $link; + $item['title'] = mb_substr($element->find('p', 0)->innertext, 0, 100); + $item['content'] = $element->find('p', 0)->innertext;; + $this->items[] = $item; + $i++; + } + } + } +} diff --git a/bridges/VineBridge.php b/bridges/VineBridge.php new file mode 100644 index 0000000..61534a0 --- /dev/null +++ b/bridges/VineBridge.php @@ -0,0 +1,40 @@ +<?php +class VineBridge extends BridgeAbstract { + + const MAINTAINER = 'ckiw'; + const NAME = 'Vine bridge'; + const URI = 'http://vine.co/'; + const DESCRIPTION = 'Returns the latests vines from vine user page'; + + const PARAMETERS = array( array( + 'u' => array( + 'name' => 'User id', + 'required' => true + ) + )); + + public function collectData(){ + $html = ''; + $uri = self::URI . '/u/' . $this->getInput('u') . '?mode=list'; + + $html = getSimpleHTMLDOM($uri) + or returnServerError('No results for this query.'); + + foreach($html->find('.post') as $element) { + $a = $element->find('a', 0); + $a->href = str_replace('https://', 'http://', $a->href); + $time = strtotime(ltrim($element->find('p', 0)->plaintext, ' Uploaded at ')); + $video = $element->find('video', 0); + $video->controls = 'true'; + $element->find('h2', 0)->outertext = ''; + + $item = array(); + $item['uri'] = $a->href; + $item['timestamp'] = $time; + $item['title'] = $a->plaintext; + $item['content'] = $element; + + $this->items[] = $item; + } + } +} diff --git a/bridges/VkBridge.php b/bridges/VkBridge.php new file mode 100644 index 0000000..9981da1 --- /dev/null +++ b/bridges/VkBridge.php @@ -0,0 +1,66 @@ +<?php +class VkBridge extends BridgeAbstract { + + const MAINTAINER = 'ahiles3005'; + const NAME = 'VK.com'; + const URI = 'http://vk.com/'; + const CACHE_TIMEOUT = 300; // 5min + const DESCRIPTION = 'Working with open pages'; + const PARAMETERS = array( + array( + 'u' => array( + 'name' => 'Group or user name', + 'required' => true + ) + ) + ); + + public function getURI(){ + if(!is_null($this->getInput('u'))) { + return static::URI . urlencode($this->getInput('u')); + } + + return parent::getURI(); + } + + public function collectData(){ + + ini_set('user-agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:53.0) Gecko/20100101 Firefox/53.0'); + + $text_html = getContents($this->getURI()) + or returnServerError('No results for group or user name "' . $this->getInput('u') . '".'); + + $text_html = iconv('windows-1251', 'utf-8', $text_html); + $html = str_get_html($text_html); + + foreach($html->find('.post') as $post) { + + if(is_object($post->find('a.wall_post_more', 0))) { + //delete link "show full" in content + $post->find('a.wall_post_more', 0)->outertext = ''; + } + $item = array(); + $item['content'] = strip_tags(backgroundToImg($post->find('div.wall_text', 0)->innertext), '<br><img>'); + if(is_object($post->find('a.page_media_link_title', 0))) { + $link = $post->find('a.page_media_link_title', 0)->getAttribute('href'); + + //external link in the post + $item['content'] .= "\n\rExternal link: " + . str_replace('/away.php?to=', '', urldecode($link)); + } + + //get video on post + if(is_object($post->find('span.post_video_title_content', 0))) { + $titleVideo = $post->find('span.post_video_title_content', 0)->plaintext; + $linkToVideo = self::URI . $post->find('a.page_post_thumb_video', 0)->getAttribute('href'); + $item['content'] .= "\n\r {$titleVideo}: {$linkToVideo}"; + } + + // get post link + $item['uri'] = self::URI . $post->find('a.post_link', 0)->getAttribute('href'); + $item['date'] = $post->find('span.rel_date', 0)->plaintext; + $this->items[] = $item; + // var_dump($item['date']); + } + } +} diff --git a/bridges/WallpaperStopBridge.php b/bridges/WallpaperStopBridge.php new file mode 100644 index 0000000..3578e71 --- /dev/null +++ b/bridges/WallpaperStopBridge.php @@ -0,0 +1,107 @@ +<?php +class WallpaperStopBridge extends BridgeAbstract { + + const MAINTAINER = 'nel50n'; + const NAME = 'WallpaperStop Bridge'; + const URI = 'http://www.wallpaperstop.com'; + const CACHE_TIMEOUT = 43200; // 12h + const DESCRIPTION = 'Returns the latests wallpapers from WallpaperStop'; + + const PARAMETERS = array( array( + 'c' => array( + 'name' => 'Category' + ), + 's' => array( + 'name' => 'subcategory' + ), + 'm' => array( + 'name' => 'Max number of wallpapers', + 'type' => 'number', + 'defaultValue' => 20 + ), + 'r' => array( + 'name' => 'resolution', + 'exampleValue' => '1920x1200, 1680x1050,…', + 'defaultValue' => '1920x1200' + ) + )); + + public function collectData(){ + $category = $this->getInput('c'); + $subcategory = $this->getInput('s'); + $resolution = $this->getInput('r'); + + $num = 0; + $max = $this->getInput('m'); + $lastpage = 1; + + for($page = 1; $page <= $lastpage; $page++) { + $link = self::URI + . '/' + . $category + . '-wallpaper/' + . (!empty($subcategory) ? $subcategory . '-wallpaper/' : '') + . 'desktop-wallpaper-' + . $page + . '.html'; + + $html = getSimpleHTMLDOM($link) + or returnServerError('No results for this query.'); + + if($page === 1) { + preg_match('/-(\d+)\.html$/', $html->find('.pagination > .last', 0)->href, $matches); + $lastpage = min($matches[1], ceil($max / 20)); + } + + foreach($html->find('article.item') as $element) { + $wplink = $element->getAttribute('data-permalink'); + if(preg_match('%^' . self::URI . '/(.+)/([^/]+)-(\d+)\.html$%', $wplink, $matches)) { + $thumbnail = $element->find('img', 0); + + $item = array(); + $item['uri'] = self::URI + . '/wallpapers/' + . str_replace('wallpaper', 'wallpapers', $matches[1]) + . '/' + . $matches[2] + . '-' + . $resolution + . '-' + . $matches[3] + . '.jpg'; + + $item['id'] = $matches[3]; + $item['timestamp'] = time(); + $item['title'] = $thumbnail->title; + $item['content'] = $item['title'] + . '<br><a href="' + . $wplink + . '"><img src="' + . self::URI + . $thumbnail->src + . '" /></a>'; + + $this->items[] = $item; + + $num++; + if ($num >= $max) + break 2; + } + } + } + } + + public function getName(){ + if(!is_null($this->getInput('s')) && !is_null($this->getInput('c')) && !is_null($this->getInput('r'))) { + $subcategory = $this->getInput('s'); + return 'WallpaperStop - ' + . $this->getInput('c') + . (!empty($subcategory) ? ' > ' . $subcategory : '') + . ' [' + . $this->getInput('r') + . ']'; + } + + return parent::getName(); + } +} diff --git a/bridges/WeLiveSecurityBridge.php b/bridges/WeLiveSecurityBridge.php new file mode 100644 index 0000000..466a4b2 --- /dev/null +++ b/bridges/WeLiveSecurityBridge.php @@ -0,0 +1,45 @@ +<?php +class WeLiveSecurityBridge extends FeedExpander { + + const MAINTAINER = 'ORelio'; + const NAME = 'We Live Security'; + const URI = 'http://www.welivesecurity.com/'; + const DESCRIPTION = 'Returns the newest articles.'; + + private function stripWithDelimiters($string, $start, $end){ + while(strpos($string, $start) !== false) { + $section_to_remove = substr($string, strpos($string, $start)); + $section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end)); + $string = str_replace($section_to_remove, '', $string); + } + + return $string; + } + + + protected function parseItem($item){ + $item = parent::parseItem($item); + + $article_html = getSimpleHTMLDOMCached($item['uri']); + if(!$article_html) { + $item['content'] .= '<p>Could not request ' . $this->getName() . ': ' . $item['uri'] . '</p>'; + return $item; + } + + $article_content = $article_html->find('div.wlistingsingletext', 0)->innertext; + $article_content = $this->stripWithDelimiters($article_content, '<script', '</script>'); + $article_content = '<p><b>' + . $item['content'] + . '</b></p>' + . trim($article_content); + + $item['content'] = $article_content; + + return $item; + } + + public function collectData(){ + $feed = static::URI . 'feed/'; + $this->collectExpandableDatas($feed); + } +} diff --git a/bridges/WebfailBridge.php b/bridges/WebfailBridge.php new file mode 100644 index 0000000..2a63740 --- /dev/null +++ b/bridges/WebfailBridge.php @@ -0,0 +1,149 @@ +<?php +class WebfailBridge extends BridgeAbstract { + const MAINTAINER = 'logmanoriginal'; + const URI = 'https://webfail.com'; + const NAME = 'Webfail'; + const DESCRIPTION = 'Returns the latest fails'; + const PARAMETERS = array( + 'By content type' => array( + 'language' => array( + 'name' => 'Language', + 'type' => 'list', + 'title' => 'Select your language', + 'values' => array( + 'English' => 'en', + 'German' => 'de' + ), + 'defaultValue' => 'English' + ), + 'type' => array( + 'name' => 'Type', + 'type' => 'list', + 'title' => 'Select your content type', + 'values' => array( + 'None' => '/', + 'Facebook' => '/ffdts', + 'Images' => '/images', + 'Videos' => '/videos', + 'Gifs' => '/gifs' + ), + 'defaultValue' => 'None' + ) + ) + ); + + public function getURI(){ + if(is_null($this->getInput('language'))) + return parent::getURI(); + + // e.g.: https://en.webfail.com + return 'https://' . $this->getInput('language') . '.webfail.com'; + } + + public function collectData(){ + + ini_set('user_agent', 'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:53.0) Gecko/20100101 Firefox/53.0'); + + $html = getSimpleHTMLDOM($this->getURI() . $this->getInput('type')); + + $type = array_search($this->getInput('type'), + self::PARAMETERS[$this->queriedContext]['type']['values']); + + switch(strtolower($type)) { + case 'facebook': + case 'videos': + $this->extractNews($html, $type); + break; + case 'none': + case 'images': + case 'gifs': + $this->extractArticle($html); + break; + default: returnClientError('Unknown type: ' . $type); + } + } + + private function extractNews($html, $type){ + $news = $html->find('#main', 0)->find('a.wf-list-news'); + foreach($news as $element) { + $item = array(); + $item['title'] = $this->fixTitle($element->find('div.wf-news-title', 0)->innertext); + $item['uri'] = $this->getURI() . $element->href; + + $img = $element->find('img.wf-image', 0)->src; + // Load high resolution image for 'facebook' + switch(strtolower($type)) { + case 'facebook': + $img = $this->getImageHiResUri($item['uri']); + break; + default: + } + + $description = ''; + if(!is_null($element->find('div.wf-news-description', 0))) { + $description = $element->find('div.wf-news-description', 0)->innertext; + } + + $item['content'] = '<p>' + . $description + . '</p><br><a href="' + . $item['uri'] + . '"><img src="' + . $img + . '"></a>'; + + $this->items[] = $item; + } + } + + private function extractArticle($html){ + $articles = $html->find('article'); + foreach($articles as $article) { + $item = array(); + $item['title'] = $this->fixTitle($article->find('a', 1)->innertext); + + // Images, videos and gifs are provided in their own unique way + if(!is_null($article->find('img.wf-image', 0))) { // Image type + $item['uri'] = $this->getURI() . $article->find('a', 2)->href; + $item['content'] = '<a href="' + . $item['uri'] + . '"><img src="' + . $article->find('img.wf-image', 0)->src + . '"></a>'; + } elseif(!is_null($article->find('div.wf-video', 0))) { // Video type + $videoId = $this->getVideoId($article->find('div.wf-play', 0)->onclick); + $item['uri'] = 'https://youtube.com/watch?v=' . $videoId; + $item['content'] = '<a href="' + . $item['uri'] + . '"><img src="http://img.youtube.com/vi/' + . $videoId + . '/0.jpg"></a>'; + } elseif(!is_null($article->find('video[id*=gif-]', 0))) { // Gif type + $item['uri'] = $this->getURI() . $article->find('a', 2)->href; + $item['content'] = '<video controls src="' + . $article->find('video[id*=gif-]', 0)->src + . '" poster="' + . $article->find('video[id*=gif-]', 0)->poster + . '"></video>'; + } + + $this->items[] = $item; + } + } + + private function fixTitle($title){ + // This fixes titles that include umlauts (in German language) + return html_entity_decode($title, ENT_QUOTES | ENT_HTML401, 'UTF-8'); + } + + private function getVideoId($onclick){ + return substr($onclick, 21, 11); + } + + private function getImageHiResUri($url){ + // https://de.webfail.com/ef524fae509?tag=ffdt + // http://cdn.webfail.com/upl/img/ef524fae509/post2.jpg + $id = substr($url, strrpos($url, '/') + 1, strlen($url) - strrpos($url, '?') + 2); + return 'http://cdn.webfail.com/upl/img/' . $id . '/post2.jpg'; + } +} diff --git a/bridges/WhydBridge.php b/bridges/WhydBridge.php new file mode 100644 index 0000000..accdb75 --- /dev/null +++ b/bridges/WhydBridge.php @@ -0,0 +1,56 @@ +<?php +class WhydBridge extends BridgeAbstract { + + const MAINTAINER = 'kranack'; + const NAME = 'Whyd Bridge'; + const URI = 'http://www.whyd.com/'; + const CACHE_TIMEOUT = 600; // 10min + const DESCRIPTION = 'Returns 10 newest music from user profile'; + + const PARAMETERS = array( array( + 'u' => array( + 'name' => 'username/id', + 'required' => true + ) + )); + + private $userName = ''; + + public function collectData(){ + $html = ''; + if(strlen(preg_replace("/[^0-9a-f]/", '', $this->getInput('u'))) == 24) { + // is input the userid ? + $html = getSimpleHTMLDOM( + self::URI . 'u/' . preg_replace("/[^0-9a-f]/", '', $this->getInput('u')) + ) or returnServerError('No results for this query.'); + } else { // input may be the username + $html = getSimpleHTMLDOM( + self::URI . 'search?q=' . urlencode($this->getInput('u')) + ) or returnServerError('No results for this query.'); + + for($j = 0; $j < 5; $j++) { + if(strtolower($html->find('div.user', $j)->find('a', 0)->plaintext) == strtolower($this->getInput('u'))) { + $html = getSimpleHTMLDOM( + self::URI . $html->find('div.user', $j)->find('a', 0)->getAttribute('href') + ) or returnServerError('No results for this query'); + break; + } + } + } + $this->userName = $html->find('div#profileTop', 0)->find('h1', 0)->plaintext; + + for($i = 0; $i < 10; $i++) { + $track = $html->find('div.post', $i); + $item = array(); + $item['author'] = $track->find('h2', 0)->plaintext; + $item['title'] = $track->find('h2', 0)->plaintext; + $item['content'] = $track->find('a.thumb', 0) . '<br/>' . $track->find('h2', 0)->plaintext; + $item['id'] = self::URI . $track->find('a.no-ajaxy', 0)->getAttribute('href'); + $item['uri'] = self::URI . $track->find('a.no-ajaxy', 0)->getAttribute('href'); + $this->items[] = $item; + } + } + public function getName(){ + return (!empty($this->userName) ? $this->userName . ' - ' : '') . 'Whyd Bridge'; + } +} diff --git a/bridges/WikiLeaksBridge.php b/bridges/WikiLeaksBridge.php new file mode 100644 index 0000000..c5b9bb6 --- /dev/null +++ b/bridges/WikiLeaksBridge.php @@ -0,0 +1,129 @@ +<?php +class WikiLeaksBridge extends BridgeAbstract { + const NAME = 'WikiLeaks'; + const URI = 'https://wikileaks.org'; + const DESCRIPTION = 'Returns the latest news or articles from WikiLeaks'; + const MAINTAINER = 'logmanoriginal'; + const PARAMETERS = array( + array( + 'category' => array( + 'name' => 'Category', + 'type' => 'list', + 'required' => true, + 'title' => 'Select your category', + 'values' => array( + 'News' => '-News-', + 'Leaks' => array( + 'All' => '-Leaks-', + 'Intelligence' => '+-Intelligence-+', + 'Global Economy' => '+-Global-Economy-+', + 'International Politics' => '+-International-Politics-+', + 'Corporations' => '+-Corporations-+', + 'Government' => '+-Government-+', + 'War & Military' => '+-War-Military-+' + ) + ), + 'defaultValue' => 'news' + ), + 'teaser' => array( + 'name' => 'Show teaser', + 'type' => 'checkbox', + 'required' => false, + 'title' => 'If checked feeds will display the teaser', + 'defaultValue' => true + ) + ) + ); + + public function collectData(){ + $html = getSimpleHTMLDOM($this->getURI()); + + // News are presented differently + switch($this->getInput('category')) { + case '-News-': + $this->loadNewsItems($html); + break; + default: + $this->loadLeakItems($html); + } + } + + public function getURI(){ + if(!is_null($this->getInput('category'))) { + return static::URI . '/' . $this->getInput('category') . '.html'; + } + + return parent::getURI(); + } + + public function getName(){ + if(!is_null($this->getInput('category'))) { + $category = array_search( + $this->getInput('category'), + static::PARAMETERS[0]['category']['values'] + ); + + if($category === false) { + $category = array_search( + $this->getInput('category'), + static::PARAMETERS[0]['category']['values']['Leaks'] + ); + } + + return $category . ' - ' . static::NAME; + } + + return parent::getName(); + } + + private function loadNewsItems($html){ + $articles = $html->find('div.news-articles ul li'); + + if(is_null($articles) || count($articles) === 0) { + return; + } + + foreach($articles as $article) { + $item = array(); + + $item['title'] = $article->find('h3', 0)->plaintext; + $item['uri'] = static::URI . $article->find('h3 a', 0)->href; + $item['content'] = $article->find('div.introduction', 0)->plaintext; + $item['timestamp'] = strtotime($article->find('div.timestamp', 0)->plaintext); + + $this->items[] = $item; + } + } + + private function loadLeakItems($html){ + $articles = $html->find('li.tile'); + + if(is_null($articles) || count($articles) === 0) { + return; + } + + foreach($articles as $article) { + $item = array(); + + $item['title'] = $article->find('h2', 0)->plaintext; + $item['uri'] = static::URI . $article->find('a', 0)->href; + + $teaser = static::URI . '/' . $article->find('div.teaser img', 0)->src; + + if($this->getInput('teaser')) { + $item['content'] = '<img src="' + . $teaser + . '" /><p>' + . $article->find('div.intro', 0)->plaintext + . '</p>'; + } else { + $item['content'] = $article->find('div.intro', 0)->plaintext; + } + + $item['timestamp'] = strtotime($article->find('div.timestamp', 0)->plaintext); + $item['enclosures'] = array($teaser); + + $this->items[] = $item; + } + } +} diff --git a/bridges/WikipediaBridge.php b/bridges/WikipediaBridge.php new file mode 100644 index 0000000..6b53440 --- /dev/null +++ b/bridges/WikipediaBridge.php @@ -0,0 +1,304 @@ +<?php + +define('WIKIPEDIA_SUBJECT_TFA', 0); // Today's featured article +define('WIKIPEDIA_SUBJECT_DYK', 1); // Did you know... + +class WikipediaBridge extends BridgeAbstract { + const MAINTAINER = 'logmanoriginal'; + const NAME = 'Wikipedia bridge for many languages'; + const URI = 'https://www.wikipedia.org/'; + const DESCRIPTION = 'Returns articles for a language of your choice'; + + const PARAMETERS = array( array( + 'language' => array( + 'name' => 'Language', + 'type' => 'list', + 'required' => true, + 'title' => 'Select your language', + 'exampleValue' => 'English', + 'values' => array( + 'English' => 'en', + 'Dutch' => 'nl', + 'Esperanto' => 'eo', + 'French' => 'fr', + 'German' => 'de', + ) + ), + 'subject' => array( + 'name' => 'Subject', + 'type' => 'list', + 'required' => true, + 'title' => 'What subject are you interested in?', + 'exampleValue' => 'Today\'s featured article', + 'values' => array( + 'Today\'s featured article' => 'tfa', + 'Did you know…' => 'dyk' + ) + ), + 'fullarticle' => array( + 'name' => 'Load full article', + 'type' => 'checkbox', + 'title' => 'Activate to always load the full article' + ) + )); + + public function getURI(){ + if(!is_null($this->getInput('language'))) { + return 'https://' + . strtolower($this->getInput('language')) + . '.wikipedia.org'; + } + + return parent::getURI(); + } + + public function getName(){ + switch($this->getInput('subject')) { + case 'tfa': + $subject = WIKIPEDIA_SUBJECT_TFA; + break; + case 'dyk': + $subject = WIKIPEDIA_SUBJECT_DYK; + break; + default: return parent::getName(); + } + + switch($subject) { + case WIKIPEDIA_SUBJECT_TFA: + $name = 'Today\'s featured article from ' + . strtolower($this->getInput('language')) + . '.wikipedia.org'; + break; + case WIKIPEDIA_SUBJECT_DYK: + $name = 'Did you know? - articles from ' + . strtolower($this->getInput('language')) + . '.wikipedia.org'; + break; + default: + $name = 'Articles from ' + . strtolower($this->getInput('language')) + . '.wikipedia.org'; + break; + } + return $name; + } + + public function collectData(){ + + switch($this->getInput('subject')) { + case 'tfa': + $subject = WIKIPEDIA_SUBJECT_TFA; + break; + case 'dyk': + $subject = WIKIPEDIA_SUBJECT_DYK; + break; + default: + $subject = WIKIPEDIA_SUBJECT_TFA; + break; + } + + $fullArticle = $this->getInput('fullarticle'); + + // This will automatically send us to the correct main page in any language (try it!) + $html = getSimpleHTMLDOM($this->getURI() . '/wiki'); + + if(!$html) + returnServerError('Could not load site: ' . $this->getURI() . '!'); + + /* + * Now read content depending on the language (make sure to create one function per language!) + * We build the function name automatically, just make sure you create a private function ending + * with your desired language code, where the language code is upper case! (en -> getContentsEN). + */ + $function = 'getContents' . ucfirst(strtolower($this->getInput('language'))); + + if(!method_exists($this, $function)) + returnServerError('A function to get the contents for your language is missing (\'' . $function . '\')!'); + + /* + * The method takes care of creating all items. + */ + $this->$function($html, $subject, $fullArticle); + } + + /** + * Replaces all relative URIs with absolute ones + * @param $element A simplehtmldom element + * @return The $element->innertext with all URIs replaced + */ + private function replaceUriInHtmlElement($element){ + return str_replace('href="/', 'href="' . $this->getURI() . '/', $element->innertext); + } + + /* + * Adds a new item to $items using a generic operation (should work for most + * (all?) wikis) $anchorText can be specified if the wiki in question doesn't + * use '...' (like Dutch, French and Italian) $anchorFallbackIndex can be + * used to specify a different fallback link than the first + * (e.g., -1 for the last) + */ + private function addTodaysFeaturedArticleGeneric($element, + $fullArticle, + $anchorText = '...', + $anchorFallbackIndex = 0){ + // Clean the bottom of the featured article + if ($element->find('div', -1)) + $element->find('div', -1)->outertext = ''; + + // The title and URI of the article can be found in an anchor containing + // the string '...' in most wikis ('full article ...') + $target = $element->find('p/a', $anchorFallbackIndex); + foreach($element->find('//a') as $anchor) { + if(strpos($anchor->innertext, $anchorText) !== false) { + $target = $anchor; + break; + } + } + + $item = array(); + $item['uri'] = $this->getURI() . $target->href; + $item['title'] = $target->title; + + if(!$fullArticle) + $item['content'] = strip_tags($this->replaceUriInHtmlElement($element), '<a><p><br><img>'); + else + $item['content'] = $this->loadFullArticle($item['uri']); + + $this->items[] = $item; + } + + /* + * Adds a new item to $items using a generic operation (should work for most (all?) wikis) + */ + private function addDidYouKnowGeneric($element, $fullArticle){ + foreach($element->find('ul', 0)->find('li') as $entry) { + $item = array(); + + // We can only use the first anchor, there is no way of finding the 'correct' one if there are multiple + $item['uri'] = $this->getURI() . $entry->find('a', 0)->href; + $item['title'] = strip_tags($entry->innertext); + + if(!$fullArticle) + $item['content'] = $this->replaceUriInHtmlElement($entry); + else + $item['content'] = $this->loadFullArticle($item['uri']); + + $this->items[] = $item; + } + } + + /** + * Loads the full article from a given URI + */ + private function loadFullArticle($uri){ + $content_html = getSimpleHTMLDOMCached($uri); + + if(!$content_html) + returnServerError('Could not load site: ' . $uri . '!'); + + $content = $content_html->find('#mw-content-text', 0); + + if(!$content) + returnServerError('Could not find content in page: ' . $uri . '!'); + + // Let's remove a couple of things from the article + $table = $content->find('#toc', 0); // Table of contents + if(!$table === false) + $table->outertext = ''; + + foreach($content->find('ol.references') as $reference) // References + $reference->outertext = ''; + + return str_replace('href="/', 'href="' . $this->getURI() . '/', $content->innertext); + } + + /** + * Implementation for de.wikipedia.org + */ + private function getContentsDe($html, $subject, $fullArticle){ + switch($subject) { + case WIKIPEDIA_SUBJECT_TFA: + $element = $html->find('div[id=mf-tfa]', 0); + $this->addTodaysFeaturedArticleGeneric($element, $fullArticle); + break; + case WIKIPEDIA_SUBJECT_DYK: + $element = $html->find('div[id=mf-dyk]', 0); + $this->addDidYouKnowGeneric($element, $fullArticle); + break; + default: + break; + } + } + + /** + * Implementation for fr.wikipedia.org + */ + private function getContentsFr($html, $subject, $fullArticle){ + switch($subject) { + case WIKIPEDIA_SUBJECT_TFA: + $element = $html->find('div[class=accueil_2017_cadre]', 0); + $this->addTodaysFeaturedArticleGeneric($element, $fullArticle, 'Lire la suite'); + break; + case WIKIPEDIA_SUBJECT_DYK: + $element = $html->find('div[class=accueil_2017_cadre]', 2); + $this->addDidYouKnowGeneric($element, $fullArticle); + break; + default: + break; + } + } + + /** + * Implementation for en.wikipedia.org + */ + private function getContentsEn($html, $subject, $fullArticle){ + switch($subject) { + case WIKIPEDIA_SUBJECT_TFA: + $element = $html->find('div[id=mp-tfa]', 0); + $this->addTodaysFeaturedArticleGeneric($element, $fullArticle); + break; + case WIKIPEDIA_SUBJECT_DYK: + $element = $html->find('div[id=mp-dyk]', 0); + $this->addDidYouKnowGeneric($element, $fullArticle); + break; + default: + break; + } + } + + /** + * Implementation for eo.wikipedia.org + */ + private function getContentsEo($html, $subject, $fullArticle){ + switch($subject) { + case WIKIPEDIA_SUBJECT_TFA: + $element = $html->find('div[id=mf-artikolo-de-la-semajno]', 0); + $this->addTodaysFeaturedArticleGeneric($element, $fullArticle); + break; + case WIKIPEDIA_SUBJECT_DYK: + $element = $html->find('div[id=mw-content-text]', 0)->find('table', 4)->find('td', 4); + $this->addDidYouKnowGeneric($element, $fullArticle); + break; + default: + break; + } + } + + /** + * Implementation for nl.wikipedia.org + */ + private function getContentsNl($html, $subject, $fullArticle){ + switch($subject) { + case WIKIPEDIA_SUBJECT_TFA: + $element = $html->find('div[id=mf-uitgelicht]', 0); + $this->addTodaysFeaturedArticleGeneric($element, $fullArticle, 'Lees meer'); + break; + case WIKIPEDIA_SUBJECT_DYK: + $element = $html->find('div[id=mw-content-text]', 0)->find('table', 4)->find('td', 2); + $this->addDidYouKnowGeneric($element, $fullArticle); + break; + default: + break; + } + } +} diff --git a/bridges/WordPressBridge.php b/bridges/WordPressBridge.php new file mode 100644 index 0000000..b367adc --- /dev/null +++ b/bridges/WordPressBridge.php @@ -0,0 +1,76 @@ +<?php +class WordPressBridge extends FeedExpander { + const MAINTAINER = 'aledeg'; + const NAME = 'Wordpress Bridge'; + const URI = 'https://wordpress.org/'; + const CACHE_TIMEOUT = 10800; // 3h + const DESCRIPTION = 'Returns the newest full posts of a Wordpress powered website'; + + const PARAMETERS = array( array( + 'url' => array( + 'name' => 'Blog URL', + 'required' => true + ) + )); + + private function clearContent($content){ + $content = preg_replace('/<script[^>]*>[^<]*<\/script>/', '', $content); + $content = preg_replace('/<div class="wpa".*/', '', $content); + $content = preg_replace('/<form.*\/form>/', '', $content); + return $content; + } + + protected function parseItem($newItem){ + $item = parent::parseItem($newItem); + + $article_html = getSimpleHTMLDOMCached($item['uri']); + + $article = null; + switch(true) { + case !is_null($article_html->find('article', 0)): + // most common content div + $article = $article_html->find('article', 0); + break; + case !is_null($article_html->find('.single-content', 0)): + // another common content div + $article = $article_html->find('.single-content', 0); + break; + case !is_null($article_html->find('.post-content', 0)): + // another common content div + $article = $article_html->find('.post-content', 0); + break; + + case !is_null($article_html->find('.post', 0)): + // for old WordPress themes without HTML5 + $article = $article_html->find('.post', 0); + break; + } + + if(!is_null($article)) { + $item['content'] = $this->clearContent($article->innertext); + } + + return $item; + } + + public function getURI(){ + $url = $this->getInput('url'); + if(empty($url)) { + $url = parent::getURI(); + } + return $url; + } + + public function collectData(){ + if($this->getInput('url') && substr($this->getInput('url'), 0, strlen('http')) !== 'http') { + // just in case someone find a way to access local files by playing with the url + returnClientError('The url parameter must either refer to http or https protocol.'); + } + try{ + $this->collectExpandableDatas($this->getURI() . '/feed/atom/'); + } catch (HttpException $e) { + $this->collectExpandableDatas($this->getURI() . '/?feed=atom'); + } + + } +} diff --git a/bridges/WordPressPluginUpdateBridge.php b/bridges/WordPressPluginUpdateBridge.php new file mode 100644 index 0000000..cb57df8 --- /dev/null +++ b/bridges/WordPressPluginUpdateBridge.php @@ -0,0 +1,87 @@ +<?php +class WordPressPluginUpdateBridge extends BridgeAbstract { + + const MAINTAINER = 'teromene'; + const NAME = 'WordPress Plugins Update Bridge'; + const URI = 'https://wordpress.org/plugins/'; + const CACHE_TIMEOUT = 86400; // 24h = 86400s + const DESCRIPTION = 'Returns latest updates of WordPress.com plugins.'; + + const PARAMETERS = array( + array( + 'pluginUrl' => array( + 'name' => 'URL to the plugin', + 'required' => true + ) + ) + ); + + public function collectData(){ + + $request = str_replace('/', '', $this->getInput('pluginUrl')); + $page = self::URI . $request . '/changelog/'; + + $html = getSimpleHTMLDOM($page) + or returnServerError('No results for this query.'); + + $content = $html->find('.block-content', 0); + + $item = array(); + $item['content'] = ''; + $version = null; + + foreach($content->children() as $element) { + + if($element->tag != 'h4') { + + $item['content'] .= $element; + + } else { + + if($version == null) { + + $version = $element; + + } else { + + $item['title'] = $version; + $item['uri'] = 'https://downloads.wordpress.org/plugin/' . $request . '.' . strip_tags($version) . '.zip'; + $this->items[] = $item; + + $version = $element; + $item = array(); + $item['content'] = ''; + + } + + } + + } + + $item['uri'] = 'https://downloads.wordpress.org/plugin/' . $request . '.' . strip_tags($version) . '.zip'; + $item['title'] = $version; + $this->items[] = $item; + + } + + + public function getName(){ + if(!is_null($this->getInput('q'))) { + return $this->getInput('q') . ' : ' . self::NAME; + } + + return parent::getName(); + } + + private function getCachedDate($url){ + debugMessage('getting pubdate from url ' . $url . ''); + // Initialize cache + $cache = Cache::create('FileCache'); + $cache->setPath(CACHE_DIR . '/pages'); + $params = [$url]; + $cache->setParameters($params); + // Get cachefile timestamp + $time = $cache->getTime(); + return ($time !== false ? $time : time()); + } +} diff --git a/bridges/WorldOfTanksBridge.php b/bridges/WorldOfTanksBridge.php new file mode 100644 index 0000000..f783e29 --- /dev/null +++ b/bridges/WorldOfTanksBridge.php @@ -0,0 +1,72 @@ +<?php +class WorldOfTanksBridge extends BridgeAbstract { + + const MAINTAINER = 'mitsukarenai'; + const NAME = 'World of Tanks'; + const URI = 'http://worldoftanks.eu/'; + const DESCRIPTION = 'News about the tank slaughter game.'; + + const PARAMETERS = array( array( + 'category' => array( + // TODO: should be a list + 'name' => 'nom de la catégorie' + ), + 'lang' => array( + 'name' => 'Langue', + 'type' => 'list', + 'values' => array( + 'Français' => 'fr', + 'English' => 'en', + 'Español' => 'es', + 'Deutsch' => 'de', + 'Čeština' => 'cs', + 'Polski' => 'pl', + 'Türkçe' => 'tr' + ) + ) + )); + + private $title = ''; + + public function getURI(){ + if(!is_null($this->getInput('lang'))) { + $lang = $this->getInput('lang'); + $uri = self::URI . $lang . '/news/'; + if(!empty($this->getInput('category'))) { + $uri .= 'pc-browser/' . $this->getInput('category') . '/'; + } + return $uri; + } + + return parent::getURI(); + } + + public function getName(){ + return $this->title ?: parent::getName(); + } + + public function collectData(){ + $html = getSimpleHTMLDOM($this->getURI()) + or returnServerError('Could not request ' . $this->getURI()); + debugMessage("loaded HTML from " . $this->getURI()); + // customize name + $this->title = $html->find('title', 0)->innertext; + foreach($html->find('.b-imgblock_ico') as $infoLink) { + $this->parseLine($infoLink); + } + } + + private function parseLine($infoLink){ + $item = array(); + $item['uri'] = self::URI . $infoLink->href; + // now load that uri from cache + debugMessage('loading page ' . $item['uri']); + $articlePage = getSimpleHTMLDOMCached($item['uri']); + $content = $articlePage->find('.l-content', 0); + defaultLinkTo($content, self::URI); + $item['title'] = $content->find('h1', 0)->innertext; + $item['content'] = $content->find('.b-content', 0)->innertext; + $item['timestamp'] = $content->find('.b-statistic_time', 0)->getAttribute("data-timestamp"); + $this->items[] = $item; + } +} diff --git a/bridges/XbooruBridge.php b/bridges/XbooruBridge.php new file mode 100644 index 0000000..d3605be --- /dev/null +++ b/bridges/XbooruBridge.php @@ -0,0 +1,12 @@ +<?php +require_once('GelbooruBridge.php'); + +class XbooruBridge extends GelbooruBridge { + + const MAINTAINER = 'mitsukarenai'; + const NAME = 'Xbooru'; + const URI = 'http://xbooru.com/'; + const DESCRIPTION = 'Returns images from given page'; + + const PIDBYPAGE = 50; +} diff --git a/bridges/YandereBridge.php b/bridges/YandereBridge.php new file mode 100644 index 0000000..df8b30e --- /dev/null +++ b/bridges/YandereBridge.php @@ -0,0 +1,11 @@ +<?php +require_once('MoebooruBridge.php'); + +class YandereBridge extends MoebooruBridge { + + const MAINTAINER = 'mitsukarenai'; + const NAME = 'Yande.re'; + const URI = 'https://yande.re/'; + const DESCRIPTION = 'Returns images from given page and tags'; + +} diff --git a/bridges/YoutubeBridge.php b/bridges/YoutubeBridge.php new file mode 100644 index 0000000..dab3252 --- /dev/null +++ b/bridges/YoutubeBridge.php @@ -0,0 +1,189 @@ +<?php +/** +* RssBridgeYoutube +* Returns the newest videos +* WARNING: to parse big playlists (over ~90 videos), you need to edit simple_html_dom.php: +* change: define('MAX_FILE_SIZE', 600000); +* into: define('MAX_FILE_SIZE', 900000); (or more) +*/ +class YoutubeBridge extends BridgeAbstract { + + const NAME = 'YouTube Bridge'; + const URI = 'https://www.youtube.com/'; + const CACHE_TIMEOUT = 10800; // 3h + const DESCRIPTION = 'Returns the 10 newest videos by username/channel/playlist or search'; + const MAINTAINER = 'mitsukarenai'; + + const PARAMETERS = array( + 'By username' => array( + 'u' => array( + 'name' => 'username', + 'exampleValue' => 'test', + 'required' => true + ) + ), + 'By channel id' => array( + 'c' => array( + 'name' => 'channel id', + 'exampleValue' => "15", + 'required' => true + ) + ), + 'By playlist Id' => array( + 'p' => array( + 'name' => 'playlist id', + 'exampleValue' => "15" + ) + ), + 'Search result' => array( + 's' => array( + 'name' => 'search keyword', + 'exampleValue' => 'test' + ), + 'pa' => array( + 'name' => 'page', + 'type' => 'number', + 'exampleValue' => 1 + ) + ) + ); + + private function ytBridgeQueryVideoInfo($vid, &$author, &$desc, &$time){ + $html = $this->ytGetSimpleHTMLDOM(self::URI . "watch?v=$vid"); + $author = $html->innertext; + $author = substr($author, strpos($author, '"author=') + 8); + $author = substr($author, 0, strpos($author, '\u0026')); + $desc = $html->find('div#watch-description-text', 0)->innertext; + $time = strtotime($html->find('meta[itemprop=datePublished]', 0)->getAttribute('content')); + } + + private function ytBridgeAddItem($vid, $title, $author, $desc, $time){ + $item = array(); + $item['id'] = $vid; + $item['title'] = $title; + $item['author'] = $author; + $item['timestamp'] = $time; + $item['uri'] = self::URI . 'watch?v=' . $vid; + $thumbnailUri = str_replace('/www.', '/img.', self::URI) . 'vi/' . $vid . '/0.jpg'; + $item['content'] = '<a href="' . $item['uri'] . '"><img src="' . $thumbnailUri . '" /></a><br />' . $desc; + $this->items[] = $item; + } + + private function ytBridgeParseXmlFeed($xml) { + foreach($xml->find('entry') as $element) { + $title = $this->ytBridgeFixTitle($element->find('title', 0)->plaintext); + $author = $element->find('name', 0)->plaintext; + $desc = $element->find('media:description', 0)->innertext; + + // Make sure the description is easy on the eye :) + $desc = htmlspecialchars($desc); + $desc = nl2br($desc); + $desc = preg_replace('/(http[s]{0,1}\:\/\/[a-zA-Z0-9.\/\?\&=\-_]{4,})/ims', + '<a href="$1" target="_blank">$1</a> ', + $desc); + + $vid = str_replace('yt:video:', '', $element->find('id', 0)->plaintext); + $time = strtotime($element->find('published', 0)->plaintext); + $this->ytBridgeAddItem($vid, $title, $author, $desc, $time); + } + $this->request = $this->ytBridgeFixTitle($xml->find('feed > title', 0)->plaintext); + } + + private function ytBridgeParseHtmlListing($html, $element_selector, $title_selector){ + $limit = 10; + $count = 0; + foreach($html->find($element_selector) as $element) { + if($count < $limit) { + $author = ''; + $desc = ''; + $time = 0; + $vid = str_replace('/watch?v=', '', $element->find('a', 0)->href); + $title = $this->ytBridgeFixTitle($element->find($title_selector, 0)->plaintext); + if($title != '[Private Video]') { + $this->ytBridgeQueryVideoInfo($vid, $author, $desc, $time); + $this->ytBridgeAddItem($vid, $title, $author, $desc, $time); + $count++; + } + } + } + } + + private function ytBridgeFixTitle($title) { + // convert both Ӓ and " to UTF-8 + return html_entity_decode($title, ENT_QUOTES, 'UTF-8'); + } + + private function ytGetSimpleHTMLDOM($url){ + return getSimpleHTMLDOM($url, + $use_include_path = false, + $context = null, + $offset = 0, + $maxLen = null, + $lowercase = true, + $forceTagsClosed = true, + $target_charset = DEFAULT_TARGET_CHARSET, + $stripRN = false, + $defaultBRText = DEFAULT_BR_TEXT, + $defaultSpanText = DEFAULT_SPAN_TEXT); + } + + public function collectData(){ + + $xml = ''; + $html = ''; + $url_feed = ''; + $url_listing = ''; + + if($this->getInput('u')) { /* User and Channel modes */ + $this->request = $this->getInput('u'); + $url_feed = self::URI . 'feeds/videos.xml?user=' . urlencode($this->request); + $url_listing = self::URI . 'user/' . urlencode($this->request) . '/videos'; + } elseif($this->getInput('c')) { + $this->request = $this->getInput('c'); + $url_feed = self::URI . 'feeds/videos.xml?channel_id=' . urlencode($this->request); + $url_listing = self::URI . 'channel/' . urlencode($this->request) . '/videos'; + } + + if(!empty($url_feed) && !empty($url_listing)) { + if($xml = $this->ytGetSimpleHTMLDOM($url_feed)) { + $this->ytBridgeParseXmlFeed($xml); + } elseif($html = $this->ytGetSimpleHTMLDOM($url_listing)) { + $this->ytBridgeParseHtmlListing($html, 'li.channels-content-item', 'h3'); + } else { + returnServerError("Could not request YouTube. Tried:\n - $url_feed\n - $url_listing"); + } + } elseif($this->getInput('p')) { /* playlist mode */ + $this->request = $this->getInput('p'); + $url_listing = self::URI . 'playlist?list=' . urlencode($this->request); + $html = $this->ytGetSimpleHTMLDOM($url_listing) + or returnServerError("Could not request YouTube. Tried:\n - $url_listing"); + $this->ytBridgeParseHtmlListing($html, 'tr.pl-video', '.pl-video-title a'); + $this->request = 'Playlist: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); + } elseif($this->getInput('s')) { /* search mode */ + $this->request = $this->getInput('s'); + $page = 1; + if($this->getInput('pa')) + $page = (int)preg_replace("/[^0-9]/", '', $this->getInput('pa')); + + $url_listing = self::URI + . 'results?search_query=' + . urlencode($this->request) + . '&page=' + . $page + . '&filters=video&search_sort=video_date_uploaded'; + + $html = $this->ytGetSimpleHTMLDOM($url_listing) + or returnServerError("Could not request YouTube. Tried:\n - $url_listing"); + + $this->ytBridgeParseHtmlListing($html, 'div.yt-lockup', 'h3'); + $this->request = 'Search: ' . str_replace(' - YouTube', '', $html->find('title', 0)->plaintext); + } else { /* no valid mode */ + returnClientError("You must either specify either:\n - YouTube + username (?u=...)\n - Channel id (?c=...)\n - Playlist id (?p=...)\n - Search (?s=...)"); + } + } + + public function getName(){ + return (!empty($this->request) ? $this->request . ' - ' : '') . 'YouTube Bridge'; + } +} diff --git a/bridges/ZDNetBridge.php b/bridges/ZDNetBridge.php new file mode 100644 index 0000000..86e4b49 --- /dev/null +++ b/bridges/ZDNetBridge.php @@ -0,0 +1,302 @@ +<?php +class ZDNetBridge extends BridgeAbstract { + + const MAINTAINER = 'ORelio'; + const NAME = 'ZDNet Bridge'; + const URI = 'http://www.zdnet.com/'; + const DESCRIPTION = 'Technology News, Analysis, Comments and Product Reviews for IT Professionals.'; + + //http://www.zdnet.com/zdnet.opml + const PARAMETERS = array( array( + 'feed' => array( + 'name' => 'Feed', + 'type' => 'list', + 'values' => array( + 'Subscribe to ZDNet RSS Feeds' => array( + 'All Blogs' => 'blog', + 'Just News' => 'news', + 'All Reviews' => 'topic/reviews', + 'Latest Downloads' => 'downloads!recent', + 'Latest Articles' => '/', + 'Latest Australia Articles' => 'au', + 'Latest UK Articles' => 'uk', + 'Latest US Articles' => 'us', + 'Latest Asia Articles' => 'as' + ), + 'Keep up with ZDNet Blogs RSS:' => array( + 'Transforming the Datacenter' => 'blog/transforming-datacenter', + 'SMB India' => 'blog/smb-india', + 'Indonesia BizTech' => 'blog/indonesia-biztech', + 'Hong Kong Techie' => 'blog/hong-kong-techie', + 'Tech Taiwan' => 'blog/tech-taiwan', + 'Startup India' => 'blog/startup-india', + 'Starting Up Asia' => 'blog/starting-up-asia', + 'Next-Gen Partner' => 'blog/partner', + 'Post-PC Developments' => 'blog/post-pc', + 'Benelux' => 'blog/benelux', + 'Heat Sink' => 'blog/heat-sink', + 'Italy\'s got tech' => 'blog/italy', + 'African Enterprise' => 'blog/african-enterprise', + 'New Tech for Old India' => 'blog/new-india', + 'Estonia Uncovered' => 'blog/estonia', + 'IT Iberia' => 'blog/iberia', + 'Brazil Tech' => 'blog/brazil', + '500 words into the future' => 'blog/500-words-into-the-future', + 'ÜberTech' => 'blog/ubertech', + 'All About Microsoft' => 'blog/microsoft', + 'Back office' => 'blog/back-office', + 'Barker Bites Back' => 'blog/barker-bites-back', + 'Between the Lines' => 'blog/btl', + 'Big on Data' => 'blog/big-data', + 'bootstrappr' => 'blog/bootstrappr', + 'By The Way' => 'blog/by-the-way', + 'Central European Processing' => 'blog/central-europe', + 'Cloud Builders' => 'blog/cloud-builders', + 'Communication Breakdown' => 'blog/communication-breakdown', + 'Collaboration 2.0' => 'blog/collaboration', + 'Constellation Research' => 'blog/constellation', + 'Consumerization: BYOD' => 'blog/consumerization', + 'DIY-IT' => 'blog/diy-it', + 'Enterprise Web 2.0' => 'blog/hinchcliffe', + 'Five Nines: The Next Gen Datacenter' => 'blog/datacenter', + 'Forrester Research' => 'blog/forrester', + 'Full Duplex' => 'blog/full-duplex', + 'Gen Why?' => 'blog/gen-why', + 'Hardware 2.0' => 'blog/hardware', + 'Identity Matters' => 'blog/identity', + 'iGeneration' => 'blog/igeneration', + 'Internet of Everything' => 'blog/cisco', + 'Beyond IT Failure' => 'blog/projectfailures', + 'Jamie\'s Mostly Linux Stuff' => 'blog/jamies-mostly-linux-stuff', + 'Jack\'s Blog' => 'blog/jacks-blog', + 'Laptops & Desktops' => 'blog/computers', + 'Linux and Open Source' => 'blog/open-source', + 'London Calling' => 'blog/london', + 'Mapping Babel' => 'blog/mapping-babel', + 'Mixed Signals' => 'blog/mixed-signals', + 'Mobile India' => 'blog/mobile-india', + 'Mobile News' => 'blog/mobile-news', + 'Networking' => 'blog/networking', + 'Norse Code' => 'blog/norse-code', + 'Null Pointer' => 'blog/null-pointer', + 'The Full Tilt' => 'blog/the-full-tilt', + 'Pinoy Post' => 'blog/pinoy-post', + 'Practically Tech' => 'blog/practically-tech', + 'Product Central' => 'blog/product-central', + 'Pulp Tech' => 'blog/violetblue', + 'Qubits and Pieces' => 'blog/qubits-and-pieces', + 'Securify This!' => 'blog/securify-this', + 'Service Oriented' => 'blog/service-oriented', + 'Small Talk' => 'blog/small-talk', + 'Small Business Matters' => 'blog/small-business-matters', + 'Smartphones and Cell Phones' => 'blog/cell-phones', + 'Social Business' => 'blog/feeds', + 'Social CRM: The Conversation' => 'blog/crm', + 'Software & Services Safari' => 'blog/sommer', + 'Storage Bits' => 'blog/storage', + 'Stacking up Open Clouds' => 'blog/apac-redhat', + 'Techie Isles' => 'blog/techie-isles', + 'Technolatte' => 'blog/technolatte', + 'Tech Podium' => 'blog/tech-podium', + 'Tel Aviv Tech' => 'blog/tel-aviv', + 'Tech Broiler' => 'blog/perlow', + 'The SANMAN' => 'blog/the-sanman', + 'The open source revolution' => 'blog/the-open-source-revolution', + 'The German View' => 'blog/german', + 'The Ed Bott Report' => 'blog/bott', + 'The Mobile Gadgeteer' => 'blog/mobile-gadgeteer', + 'The Apple Core' => 'blog/apple', + 'Tom Foremski: IMHO' => 'blog/foremski', + 'Twisted Wire' => 'blog/twisted-wire', + 'Vive la tech' => 'blog/france', + 'Virtually Speaking' => 'blog/virtualization', + 'View from China' => 'blog/china', + 'Web design & Free Software' => 'blog/web-design-and-free-software', + 'ZDNet Government' => 'blog/government', + 'ZDNet UK Book Reviews' => 'blog/zdnet-uk-book-reviews', + 'ZDNet UK First Take' => 'blog/zdnet-uk-first-take', + 'Zero Day' => 'blog/security' + ), + 'ZDNet Hot Topics RSS:' => array( + 'Apple' => 'topic/apple', + 'Collaboration' => 'topic/collaboration', + 'Enterprise Software' => 'topic/enterprise-software', + 'Google' => 'topic/google', + 'Great debate' => 'topic/great-debate', + 'Hardware' => 'topic/hardware', + 'IBM' => 'topic/ibm', + 'iOS' => 'topic/ios', + 'iPhone' => 'topic/iphone', + 'iPad' => 'topic/ipad', + 'IT Priorities' => 'topic/it-priorities', + 'Laptops' => 'topic/laptops', + 'Legal' => 'topic/legal', + 'Linux' => 'topic/linux', + 'Microsoft' => 'topic/microsoft', + 'Mobile OS' => 'topic/mobile-os', + 'Mobility' => 'topic/mobility', + 'Networking' => 'topic/networking', + 'Oracle' => 'topic/oracle', + 'Processors' => 'topic/processors', + 'Samsung' => 'topic/samsung', + 'Security' => 'topic/security', + 'Small business: going big on mobility' => 'topic/small-business-going-big-on-mobility' + ), + 'Product Blogs:' => array( + 'Digital Cameras & Camcorders' => 'blog/digitalcameras', + 'Home Theater' => 'blog/home-theater', + 'Laptops and Desktops' => 'blog/computers', + 'The Mobile Gadgeteer' => 'blog/mobile-gadgeteer', + 'Smartphones and Cell Phones' => 'blog/cell-phones', + 'The ToyBox' => 'blog/gadgetreviews' + ), + 'Vertical Blogs:' => array( + 'ZDNet Education' => 'blog/education', + 'ZDNet Healthcare' => 'blog/healthcare', + 'ZDNet Government' => 'blog/government' + ) + ) + ) + )); + + public function collectData(){ + + function stripCdata($string){ + $string = str_replace('<![CDATA[', '', $string); + $string = str_replace(']]>', '', $string); + return trim($string); + } + + function extractFromDelimiters($string, $start, $end){ + if(strpos($string, $start) !== false) { + $section_retrieved = substr($string, strpos($string, $start) + strlen($start)); + $section_retrieved = substr($section_retrieved, 0, strpos($section_retrieved, $end)); + return $section_retrieved; + } + + return false; + } + + function stripWithDelimiters($string, $start, $end){ + while(strpos($string, $start) !== false) { + $section_to_remove = substr($string, strpos($string, $start)); + $section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end)); + $string = str_replace($section_to_remove, '', $string); + } + + return $string; + } + + function stripRecursiveHtmlSection($string, $tag_name, $tag_start){ + $open_tag = '<' . $tag_name; + $close_tag = '</' . $tag_name . '>'; + $close_tag_length = strlen($close_tag); + if(strpos($tag_start, $open_tag) === 0) { + while(strpos($string, $tag_start) !== false) { + $max_recursion = 100; + $section_to_remove = null; + $section_start = strpos($string, $tag_start); + $search_offset = $section_start; + do { + $max_recursion--; + $section_end = strpos($string, $close_tag, $search_offset); + $search_offset = $section_end + $close_tag_length; + $section_to_remove = substr( + $string, + $section_start, + $section_end - $section_start + $close_tag_length + ); + + $open_tag_count = substr_count($section_to_remove, $open_tag); + $close_tag_count = substr_count($section_to_remove, $close_tag); + } while ($open_tag_count > $close_tag_count && $max_recursion > 0); + $string = str_replace($section_to_remove, '', $string); + } + } + return $string; + } + + $baseUri = self::URI; + $feed = $this->getInput('feed'); + if(strpos($feed, 'downloads!') !== false) { + $feed = str_replace('downloads!', '', $feed); + $baseUri = str_replace('www.', 'downloads.', $baseUri); + } + $url = $baseUri . trim($feed, '/') . '/rss.xml'; + $html = getSimpleHTMLDOM($url) + or returnServerError('Could not request ZDNet: ' . $url); + $limit = 0; + + foreach($html->find('item') as $element) { + if($limit < 10) { + $article_url = preg_replace( + '/([^#]+)#ftag=.*/', + '$1', + stripCdata(extractFromDelimiters($element->innertext, '<link>', '</link>')) + ); + + $article_author = stripCdata(extractFromDelimiters($element->innertext, 'role="author">', '<')); + $article_title = stripCdata($element->find('title', 0)->plaintext); + $article_subtitle = stripCdata($element->find('description', 0)->plaintext); + $article_timestamp = strtotime(stripCdata($element->find('pubDate', 0)->plaintext)); + $article = getSimpleHTMLDOM($article_url) + or returnServerError('Could not request ZDNet: ' . $article_url); + + if(!empty($article_author)) { + $author = $article_author; + } else { + $author = $article->find('meta[name=author]', 0); + if(is_object($author)) { + $author = $author->content; + } else { + $author = 'ZDNet'; + } + } + + $thumbnail = $article->find('meta[itemprop=image]', 0); + if(is_object($thumbnail)) { + $thumbnail = $thumbnail->content; + } else { + $thumbnail = ''; + } + + $contents = $article->find('article', 0)->innertext; + foreach(array( + '<div class="shareBar"', + '<div class="shortcodeGalleryWrapper"', + '<div class="relatedContent', + '<div class="downloadNow', + '<div data-shortcode', + '<div id="sharethrough', + '<div id="inpage-video' + ) as $div_start) { + $contents = stripRecursiveHtmlSection($contents, 'div', $div_start); + } + $contents = stripWithDelimiters($contents, '<script', '</script>'); + $contents = stripWithDelimiters($contents, '<meta itemprop="image"', '>'); + $contents = trim(stripWithDelimiters($contents, '<section class="sharethrough-top', '</section>')); + $content_img = strpos($contents, '<img'); //Look for first image + if (($content_img !== false && $content_img < 512) || $thumbnail == '') { + $content_img = ''; //Image already present on article beginning or no thumbnail + } else { + $content_img = '<p><img src="'.$thumbnail.'" /></p>'; //Include thumbnail + } + $contents = $content_img + . '<p><b>' + . $article_subtitle + . '</b></p>' + . $contents; + + $item = array(); + $item['author'] = $author; + $item['uri'] = $article_url; + $item['title'] = $article_title; + $item['timestamp'] = $article_timestamp; + $item['content'] = $contents; + $this->items[] = $item; + $limit++; + } + } + + } +} |