summaryrefslogtreecommitdiff
path: root/bridges/KununuBridge.php
blob: 2f4bf0b2a15c9ae1b63f1949694d612682d6dcef (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
<?php
class KununuBridge extends BridgeAbstract {
	const MAINTAINER = 'logmanoriginal';
	const NAME = 'Kununu Bridge';
	const URI = 'https://www.kununu.com/';
	const CACHE_TIMEOUT = 86400; // 24h
	const DESCRIPTION = 'Returns the latest reviews for a company and site of your choice.';

	const PARAMETERS = array(
		'global' => array(
			'site' => array(
				'name' => 'Site',
				'type' => 'list',
				'required' => true,
				'title' => 'Select your site',
				'values' => array(
					'Austria' => 'at',
					'Germany' => 'de',
					'Switzerland' => 'ch',
					'United States' => 'us'
				)
			),
			'full' => array(
				'name' => 'Load full article',
				'type' => 'checkbox',
				'required' => false,
				'exampleValue' => 'checked',
				'title' => 'Activate to load full article'
			)
		),
		array(
			'company' => array(
				'name' => 'Company',
				'required' => true,
				'exampleValue' => 'kununu-us',
				'title' => 'Insert company name (i.e. Kununu US) or URI path (i.e. kununu-us)'
			)
		)
	);

	private $companyName = '';

	public function getURI(){
		if(!is_null($this->getInput('company')) && !is_null($this->getInput('site'))) {

			$company = $this->fixCompanyName($this->getInput('company'));
			$site = $this->getInput('site');
			$section = '';

			switch($site) {
			case 'at':
			case 'de':
			case 'ch':
				$section = 'kommentare';
				break;
			case 'us':
				$section = 'reviews';
				break;
			}

			return self::URI . $site . '/' . $company . '/' . $section . '?sort=update_time_desc';
		}

		return parent::getURI();
	}

	public function getName(){
		if(!is_null($this->getInput('company'))) {
			$company = $this->fixCompanyName($this->getInput('company'));
			return ($this->companyName ?: $company) . ' - ' . self::NAME;
		}

		return parent::getName();
	}

	public function getIcon() {
		return 'https://www.kununu.com/favicon-196x196.png';
	}

	public function collectData(){
		$full = $this->getInput('full');

		// Load page
		$html = getSimpleHTMLDOM($this->getURI())
			or returnServerError('Unable to receive data from ' . $this->getURI() . '!');

		$html = defaultLinkTo($html, static::URI);

		// Update name for this request
		$company = $html->find('span[class="company-name"]', 0)
			or returnServerError('Cannot find company name!');

		$this->companyName = $company->innertext;

		// Find the section with all the panels (reviews)
		$section = $html->find('section.kununu-scroll-element', 0)
			or returnServerError('Unable to find panel section!');

		// Find all articles (within the panels)
		$articles = $section->find('article')
			or returnServerError('Unable to find articles!');

		// Go through all articles
		foreach($articles as $article) {

			$anchor = $article->find('h1.review-title a', 0)
				or returnServerError('Cannot find article URI!');

			$date = $article->find('meta[itemprop=dateCreated]', 0)
				or returnServerError('Cannot find article date!');

			$rating = $article->find('span.rating', 0)
				or returnServerError('Cannot find article rating!');

			$summary = $article->find('[itemprop=name]', 0)
				or returnServerError('Cannot find article summary!');

			$item = array();

			$item['author'] = $this->extractArticleAuthorPosition($article);
			$item['timestamp'] = strtotime($date);
			$item['title'] = $rating->getAttribute('aria-label')
			. ' : '
			. strip_tags($summary->innertext);

			$item['uri'] = $anchor->href;

			if($full) {
				$item['content'] = $this->extractFullDescription($item['uri']);
			} else {
				$item['content'] = $this->extractArticleDescription($article);
			}

			$this->items[] = $item;

		}
	}

	/*
	* Returns a fixed version of the provided company name
	*/
	private function fixCompanyName($company){
		$company = trim($company);
		$company = str_replace(' ', '-', $company);
		$company = strtolower($company);

		$umlauts = Array('/ä/','/ö/','/ü/','/Ä/','/Ö/','/Ü/','/ß/');
		$replace = Array('ae','oe','ue','Ae','Oe','Ue','ss');

		return preg_replace($umlauts, $replace, $company);
	}

	/**
	* Returns the position of the author from a given article
	*/
	private function extractArticleAuthorPosition($article){
		// We need to parse the user-content manually
		$user_content = $article->find('div.user-content', 0)
			or returnServerError('Cannot find user content!');

		// Go through all h2 elements to find index of required span (I know... it's stupid)
		$author_position = 'Unknown';
		foreach($user_content->find('div') as $content) {
			if(stristr(strtolower($content->plaintext), 'position')) { /* This works for at, ch, de, us */
				$author_position = $content->next_sibling()->plaintext;
				break;
			}
		}

		return $author_position;
	}

	/**
	* Returns the description from a given article
	*/
	private function extractArticleDescription($article){
		$description = $article->find('[itemprop=reviewBody]', 0)
			or returnServerError('Cannot find article description!');

		return $description->innertext;
	}

	/**
	* Returns the full description from a given uri
	*/
	private function extractFullDescription($uri){
		// Load full article
		$html = getSimpleHTMLDOMCached($uri)
			or returnServerError('Could not load full description!');

		$html =	defaultLinkTo($html, static::URI);

		// Find the article
		$article = $html->find('article', 0)
			or returnServerError('Cannot find article!');

		// Luckily they use the same layout for the review overview and full article pages :)
		return $this->extractArticleDescription($article);
	}
}