summaryrefslogtreecommitdiff
path: root/bridges/TheHackerNewsBridge.php
blob: d0d2e9730ea36d6e26ad94ed986a6c8ebb0e8f45 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
<?php
class TheHackerNewsBridge extends BridgeAbstract {

	const MAINTAINER = 'ORelio';
	const NAME = 'The Hacker News Bridge';
	const URI = 'https://thehackernews.com/';
	const DESCRIPTION = 'Cyber Security, Hacking, Technology News.';

	public function collectData(){

		$html = getSimpleHTMLDOM($this->getURI())
			or returnServerError('Could not request TheHackerNews: ' . $this->getURI());
		$limit = 0;

		foreach($html->find('div.body-post') as $element) {
			if($limit < 5) {

				$article_url = $element->find('a.story-link', 0)->href;
				$article_author = trim($element->find('i.fa-user', 0)->parent()->plaintext);
				$article_title = $element->find('h2.home-title', 0)->plaintext;

				//Date without time
				$article_timestamp = strtotime(
					extractFromDelimiters(
						$element->find('i.fa-calendar', 0)->parent()->outertext,
						'</i>',
						'<span>'
					)
				);

				//Article thumbnail in lazy-loading image
				if (is_object($element->find('img[data-echo]', 0))) {
					$article_thumbnail = array(
						extractFromDelimiters(
							$element->find('img[data-echo]', 0)->outertext,
							"data-echo='",
							"'"
						)
					);
				} else {
					$article_thumbnail = array();
				}

				if ($article = getSimpleHTMLDOMCached($article_url)) {

					//Article body
					$contents = $article->find('div.articlebody', 0)->innertext;
					$contents = stripRecursiveHtmlSection($contents, 'div', '<div class="ad_');
					$contents = stripWithDelimiters($contents, 'id="google_ads', '</iframe>');
					$contents = stripWithDelimiters($contents, '<script', '</script>');

					//Date with time
					if (is_object($article->find('meta[itemprop=dateModified]', 0))) {
						$article_timestamp = strtotime(
							extractFromDelimiters(
								$article->find('meta[itemprop=dateModified]', 0)->outertext,
								"content='",
								"'"
							)
						);
					}
				} else {
					$contents = 'Could not request TheHackerNews: ' . $article_url;
				}

				$item = array();
				$item['uri'] = $article_url;
				$item['title'] = $article_title;
				$item['author'] = $article_author;
				$item['enclosures'] = $article_thumbnail;
				$item['timestamp'] = $article_timestamp;
				$item['content'] = trim($contents);
				$this->items[] = $item;
				$limit++;
			}
		}

	}
}