summaryrefslogtreecommitdiff
path: root/lib/contents.php
blob: ec62c8db194e8b1486a5307a3f28e04c034bcafd (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
<?php
function getContents($url,
$use_include_path = false,
$context = null,
$offset = 0,
$maxlen = null){
	$contextOptions = array(
		'http' => array(
			'user_agent' => ini_get('user_agent'),
			'accept_encoding' => 'gzip'
		)
	);

	if(defined('PROXY_URL') && !defined('NOPROXY')) {
		$contextOptions['http']['proxy'] = PROXY_URL;
		$contextOptions['http']['request_fulluri'] = true;

		if(is_null($context)) {
			$context = stream_context_create($contextOptions);
		} else {
			$prevContext = $context;
			if(!stream_context_set_option($context, $contextOptions)) {
				$context = $prevContext;
			}
		}
	}

	if(is_null($maxlen)) {
		$content = file_get_contents($url, $use_include_path, $context, $offset);
	} else {
		$content = file_get_contents($url, $use_include_path, $context, $offset, $maxlen);
	}

	if($content === false)
		debugMessage('Cant\'t download ' . $url);

	// handle compressed data
	foreach($http_response_header as $header) {
		if(stristr($header, 'content-encoding')) {
			switch(true) {
			case stristr($header, 'gzip'):
				$content = gzinflate(substr($content, 10, -8));
				break;
			case stristr($header, 'compress'):
				//TODO
			case stristr($header, 'deflate'):
				//TODO
			case stristr($header, 'brotli'):
				//TODO
				returnServerError($header . '=> Not implemented yet');
				break;
			case stristr($header, 'identity'):
				break;
			default:
				returnServerError($header . '=> Unknown compression');
			}
		}
	}

	return $content;
}

function getSimpleHTMLDOM($url,
$use_include_path = false,
$context = null,
$offset = 0,
$maxLen = null,
$lowercase = true,
$forceTagsClosed = true,
$target_charset = DEFAULT_TARGET_CHARSET,
$stripRN = true,
$defaultBRText = DEFAULT_BR_TEXT,
$defaultSpanText = DEFAULT_SPAN_TEXT){
	$content = getContents($url, $use_include_path, $context, $offset, $maxLen);
	return str_get_html($content,
	$lowercase,
	$forceTagsClosed,
	$target_charset,
	$stripRN,
	$defaultBRText,
	$defaultSpanText);
}

/**
 * Maintain locally cached versions of pages to avoid multiple downloads.
 * @param url url to cache
 * @param duration duration of the cache file in seconds (default: 24h/86400s)
 * @return content of the file as string
 */
function getSimpleHTMLDOMCached($url,
$duration = 86400,
$use_include_path = false,
$context = null,
$offset = 0,
$maxLen = null,
$lowercase = true,
$forceTagsClosed = true,
$target_charset = DEFAULT_TARGET_CHARSET,
$stripRN = true,
$defaultBRText = DEFAULT_BR_TEXT,
$defaultSpanText = DEFAULT_SPAN_TEXT){
	debugMessage('Caching url ' . $url . ', duration ' . $duration);

	// Initialize cache
	$cache = Cache::create('FileCache');
	$cache->setPath(CACHE_DIR . '/pages');
	$cache->purgeCache(86400); // 24 hours (forced)

	$params = [$url];
	$cache->setParameters($params);

	// Determine if cached file is within duration
	$time = $cache->getTime();
	if($time !== false
	&& (time() - $duration < $time)
	&& (!defined('DEBUG') || DEBUG !== true)) { // Contents within duration
		$content = $cache->loadData();
	} else { // Content not within duration
		$content = getContents($url, $use_include_path, $context, $offset, $maxLen);
		if($content !== false) {
			$cache->saveData($content);
		}
	}

	return str_get_html($content,
	$lowercase,
	$forceTagsClosed,
	$target_charset,
	$stripRN,
	$defaultBRText,
	$defaultSpanText);
}