summaryrefslogtreecommitdiff
path: root/bridges/SexactuBridge.php
blob: 5bc552abbb7dc297f2b2473d90c499ded6de8f98 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
<?php
class SexactuBridge extends BridgeAbstract {

	const MAINTAINER = 'Riduidel';
	const NAME = 'Sexactu';
	const AUTHOR = 'Maïa Mazaurette';
	const URI = 'http://www.gqmagazine.fr';
	const CACHE_TIMEOUT = 7200; // 2h
	const DESCRIPTION = 'Sexactu via rss-bridge';

	const REPLACED_ATTRIBUTES = array(
			'href' => 'href',
			'src' => 'src',
			'data-original' => 'src'
	);

	public function getURI(){
		return self::URI . '/sexactu';
	}

	public function collectData(){
		$html = getSimpleHTMLDOM($this->getURI())
			or returnServerError('Could not request ' . $this->getURI());

		$sexactu = $html->find('.container_sexactu', 0);
		$rowList = $sexactu->find('.row');
		foreach($rowList as $row) {
			// only use first list as second one only contains pages numbers

			$title = $row->find('.title', 0);
			if($title) {
				$item = array();
				$item['author'] = self::AUTHOR;
				$item['title'] = $title->plaintext;
				$urlAttribute = "data-href";
				$uri = $title->$urlAttribute;
				if($uri === false)
					continue;
				if(substr($uri, 0, 1) === 'h') { // absolute uri
					$item['uri'] = $uri;
				} else if(substr($uri, 0, 1) === '/') { // domain relative url
					$item['uri'] = self::URI . $uri;
				} else {
					$item['uri'] = $this->getURI() . $uri;
				}
				$article = $this->loadFullArticle($item['uri']);
				$item['content'] = $this->replaceUriInHtmlElement($article->find('.article_content', 0));

				$publicationDate = $article->find('time[itemprop=datePublished]', 0);
				$short_date = $publicationDate->datetime;
				$item['timestamp'] = strtotime($short_date);
			} else {
				// Sometimes we get rubbish, ignore.
				continue;
			}
			$this->items[] = $item;
		}
	}

	/**
	 * Loads the full article and returns the contents
	 * @param $uri The article URI
	 * @return The article content
	 */
	private function loadFullArticle($uri){
		$html = getSimpleHTMLDOMCached($uri);

		$content = $html->find('#article', 0);
		if($content) {
			return $content;
		}

		return null;
	}

	/**
	 * Replaces all relative URIs with absolute ones
	 * @param $element A simplehtmldom element
	 * @return The $element->innertext with all URIs replaced
	 */
	private function replaceUriInHtmlElement($element){
		$returned = $element->innertext;
		foreach (self::REPLACED_ATTRIBUTES as $initial => $final) {
			$returned = str_replace($initial . '="/', $final . '="' . self::URI . '/', $returned);
		}
		return $returned;
	}
}