summaryrefslogtreecommitdiff
path: root/bridges/LeMondeInformatiqueBridge.php
blob: 706752ffd7ca4cf623500236e22254177031fc07 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
<?php
class LeMondeInformatiqueBridge extends FeedExpander {

	const MAINTAINER = 'ORelio';
	const NAME = 'Le Monde Informatique';
	const URI = 'http://www.lemondeinformatique.fr/';
	const CACHE_TIMEOUT = 1800; // 30min
	const DESCRIPTION = 'Returns the newest articles.';

	public function collectData(){
		$this->collectExpandableDatas(self::URI . 'rss/rss.xml', 10);
	}

	protected function parseItem($newsItem){
		$item = parent::parseItem($newsItem);
		$article_html = getSimpleHTMLDOMCached($item['uri'])
			or returnServerError('Could not request LeMondeInformatique: ' . $item['uri']);
		$item['content'] = $this->cleanArticle($article_html->find('div#article', 0)->innertext);
		$item['title'] = $article_html->find('h1.cleanprint-title', 0)->plaintext;
		return $item;
	}

	private function stripCDATA($string){
		$string = str_replace('<![CDATA[', '', $string);
		$string = str_replace(']]>', '', $string);
		return $string;
	}

	private function stripWithDelimiters($string, $start, $end){
		while(strpos($string, $start) !== false) {
			$section_to_remove = substr($string, strpos($string, $start));
			$section_to_remove = substr($section_to_remove, 0, strpos($section_to_remove, $end) + strlen($end));
			$string = str_replace($section_to_remove, '', $string);
		}

		return $string;
	}

	private function cleanArticle($article_html){
		$article_html = $this->stripWithDelimiters($article_html, '<script', '</script>');
		$article_html = $this->stripWithDelimiters($article_html, '<h1 class="cleanprint-title"', '</h1>');
		return $article_html;
	}
}