summaryrefslogtreecommitdiff
path: root/bridges/WiredBridge.php
blob: 8da93d0c3f7084a6457d7c3c44b7f21aca888d10 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
<?php
class WiredBridge extends FeedExpander {
	const MAINTAINER = 'ORelio';
	const NAME = 'WIRED Bridge';
	const URI = 'https://www.wired.com/';
	const DESCRIPTION = 'Returns the newest articles from WIRED';

	const PARAMETERS = array( array(
		'feed' => array(
			'name' => 'Feed',
			'type' => 'list',
			'values' => array(
				'WIRED Top Stories' => 'rss',			// /feed/rss
				'Business' => 'business',				// /feed/category/business/latest/rss
				'Culture' => 'culture',					// /feed/category/culture/latest/rss
				'Gear' => 'gear',						// /feed/category/gear/latest/rss
				'Ideas' => 'ideas',						// /feed/category/ideas/latest/rss
				'Science' => 'science',					// /feed/category/science/latest/rss
				'Security' => 'security',				// /feed/category/security/latest/rss
				'Transportation' => 'transportation',	// /feed/category/transportation/latest/rss
				'Backchannel' => 'backchannel',			// /feed/category/backchannel/latest/rss
				'WIRED Guides' => 'wired-guide',		// /feed/tag/wired-guide/latest/rss
				'Photo' => 'photo'						// /feed/category/photo/latest/rss
			)
		)
	));

	public function collectData(){
		$feed = $this->getInput('feed');
		if(empty($feed) || !ctype_alpha(str_replace('-', '', $feed))) {
			returnClientError('Invalid feed, please check the "feed" parameter.');
		}

		$feed_url = $this->getURI() . 'feed/';
		if ($feed != 'rss') {
			if ($feed != 'wired-guide') {
				$feed_url .= 'category/';
			} else {
				$feed_url .= 'tag/';
			}
			$feed_url .= "$feed/latest/";
		}
		$feed_url .= 'rss';

		$this->collectExpandableDatas($feed_url);
	}

	protected function parseItem($newsItem){
		$item = parent::parseItem($newsItem);
		$article = getSimpleHTMLDOMCached($item['uri'])
			or returnServerError('Could not request WIRED: ' . $item['uri']);
		$item['content'] = $this->extractArticleContent($article);

		$headline = strval($newsItem->description);
		if(!empty($headline)) {
			$item['content'] = '<p><b>' . $headline . '</b></p>' . $item['content'];
		}

		$item_image = $article->find('meta[property="og:image"]', 0);
		if(!empty($item_image)) {
			$item['enclosures'] = array($item_image->content);
			$item['content'] = '<p><img src="' . $item_image->content . '" /></p>' . $item['content'];
		}

		return $item;
	}

	private function extractArticleContent($article){
		$content = $article->find('article', 0);
		$truncate = true;

		if (empty($content)) {
			$content = $article->find('div.listicle-main-component__container', 0);
			$truncate = false;
		}

		if (!empty($content)) {
			$content = $content->innertext;
		}

		foreach (array(
			'<div class="content-header',
			'<div class="mid-banner-wrap',
			'<div class="related',
			'<div class="social-icons',
			'<div class="recirc-most-popular',
			'<div class="grid--item article-related-video',
			'<div class="row full-bleed-ad',
		) as $div_start) {
			$content = stripRecursiveHTMLSection($content, 'div', $div_start);
		}

		if ($truncate) {
			//Clutter after standard article is too hard to clean properly
			$content = trim(explode('<hr', $content)[0]);
		}

		$content = str_replace('href="/', 'href="' . $this->getURI() . '/', $content);

		return $content;
	}
}