summaryrefslogtreecommitdiff
path: root/bridges/MediapartBridge.php
blob: f7fff4ab8c167c9e2c23180b8da6bc3a2e3caab5 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
<?php

class MediapartBridge extends FeedExpander {
	const MAINTAINER = 'killruana';
	const NAME = 'Mediapart Bridge';
	const URI = 'https://www.mediapart.fr/';
	const PARAMETERS = array(
		array(
			'single_page_mode' => array(
				'name' => 'Single page article',
				'type' => 'checkbox',
				'title' => 'Display long articles on a single page',
				'defaultValue' => 'checked'
			),
			'mpsessid' => array(
				'name' => 'MPSESSID',
				'type' => 'text',
				'title' => 'Value of the session cookie MPSESSID'
			)
		)
	);
	const CACHE_TIMEOUT = 7200; // 2h
	const DESCRIPTION = 'Returns the newest articles.';

	public function collectData() {
		$url = self::URI . 'articles/feed';
		$this->collectExpandableDatas($url);
	}

	protected function parseItem($newsItem) {
		$item = parent::parseItem($newsItem);

		// Mediapart provide multiple type of contents.
		// We only process items relative to the newspaper
		// See issue #1292 - https://github.com/RSS-Bridge/rss-bridge/issues/1292
		if (strpos($item['uri'], self::URI . 'journal/') === 0) {
			// Enable single page mode?
			if ($this->getInput('single_page_mode') === true) {
				$item['uri'] .= '?onglet=full';
			}

			// If a session cookie is defined, get the full article
			$mpsessid = $this->getInput('mpsessid');
			if (!empty($mpsessid)) {
				// Set the session cookie
				$opt = array();
				$opt[CURLOPT_COOKIE] = 'MPSESSID=' . $mpsessid;

				// Get the page
				$articlePage = getSimpleHTMLDOM(
					$newsItem->link . '?onglet=full',
					array(),
					$opt);

				// Extract the article content
				$content = $articlePage->find('div.content-article', 0)->innertext;
				$content = sanitize($content);
				$content = defaultLinkTo($content, static::URI);
				$item['content'] .= $content;
			}
		}

		return $item;
	}
}