summaryrefslogtreecommitdiff
path: root/bridges/SexactuBridge.php
diff options
context:
space:
mode:
authorJohannes Schauer <josch@debian.org>2017-08-04 22:06:01 +0200
committerJohannes Schauer <josch@debian.org>2017-08-04 22:06:01 +0200
commitb005331cd910c0cc7dee2ddf82491b8248f431cf (patch)
treeff8b5cbfe81d570b878cb8d60ee51d07c3b1d059 /bridges/SexactuBridge.php
Import rss-bridge_2017-08-03.orig.tar.gz
[dgit import orig rss-bridge_2017-08-03.orig.tar.gz]
Diffstat (limited to 'bridges/SexactuBridge.php')
-rw-r--r--bridges/SexactuBridge.php88
1 files changed, 88 insertions, 0 deletions
diff --git a/bridges/SexactuBridge.php b/bridges/SexactuBridge.php
new file mode 100644
index 0000000..5bc552a
--- /dev/null
+++ b/bridges/SexactuBridge.php
@@ -0,0 +1,88 @@
+<?php
+class SexactuBridge extends BridgeAbstract {
+
+ const MAINTAINER = 'Riduidel';
+ const NAME = 'Sexactu';
+ const AUTHOR = 'Maïa Mazaurette';
+ const URI = 'http://www.gqmagazine.fr';
+ const CACHE_TIMEOUT = 7200; // 2h
+ const DESCRIPTION = 'Sexactu via rss-bridge';
+
+ const REPLACED_ATTRIBUTES = array(
+ 'href' => 'href',
+ 'src' => 'src',
+ 'data-original' => 'src'
+ );
+
+ public function getURI(){
+ return self::URI . '/sexactu';
+ }
+
+ public function collectData(){
+ $html = getSimpleHTMLDOM($this->getURI())
+ or returnServerError('Could not request ' . $this->getURI());
+
+ $sexactu = $html->find('.container_sexactu', 0);
+ $rowList = $sexactu->find('.row');
+ foreach($rowList as $row) {
+ // only use first list as second one only contains pages numbers
+
+ $title = $row->find('.title', 0);
+ if($title) {
+ $item = array();
+ $item['author'] = self::AUTHOR;
+ $item['title'] = $title->plaintext;
+ $urlAttribute = "data-href";
+ $uri = $title->$urlAttribute;
+ if($uri === false)
+ continue;
+ if(substr($uri, 0, 1) === 'h') { // absolute uri
+ $item['uri'] = $uri;
+ } else if(substr($uri, 0, 1) === '/') { // domain relative url
+ $item['uri'] = self::URI . $uri;
+ } else {
+ $item['uri'] = $this->getURI() . $uri;
+ }
+ $article = $this->loadFullArticle($item['uri']);
+ $item['content'] = $this->replaceUriInHtmlElement($article->find('.article_content', 0));
+
+ $publicationDate = $article->find('time[itemprop=datePublished]', 0);
+ $short_date = $publicationDate->datetime;
+ $item['timestamp'] = strtotime($short_date);
+ } else {
+ // Sometimes we get rubbish, ignore.
+ continue;
+ }
+ $this->items[] = $item;
+ }
+ }
+
+ /**
+ * Loads the full article and returns the contents
+ * @param $uri The article URI
+ * @return The article content
+ */
+ private function loadFullArticle($uri){
+ $html = getSimpleHTMLDOMCached($uri);
+
+ $content = $html->find('#article', 0);
+ if($content) {
+ return $content;
+ }
+
+ return null;
+ }
+
+ /**
+ * Replaces all relative URIs with absolute ones
+ * @param $element A simplehtmldom element
+ * @return The $element->innertext with all URIs replaced
+ */
+ private function replaceUriInHtmlElement($element){
+ $returned = $element->innertext;
+ foreach (self::REPLACED_ATTRIBUTES as $initial => $final) {
+ $returned = str_replace($initial . '="/', $final . '="' . self::URI . '/', $returned);
+ }
+ return $returned;
+ }
+}