summaryrefslogtreecommitdiff
path: root/bridges/TwitterBridge.php
diff options
context:
space:
mode:
authorJohannes 'josch' Schauer <josch@debian.org>2020-11-10 13:17:03 +0100
committerJohannes 'josch' Schauer <josch@debian.org>2020-11-10 13:17:03 +0100
commitdaeb2c0913653d197fad2a75010cfc6034c6a9e8 (patch)
treed25328f98ca39a5ac8abad156b5e8f5781505a3c /bridges/TwitterBridge.php
parent432eb165b83d4483780a279b02929b05b3e09fa5 (diff)
New upstream version 2020-11-10+dfsg1
Diffstat (limited to 'bridges/TwitterBridge.php')
-rw-r--r--bridges/TwitterBridge.php498
1 files changed, 292 insertions, 206 deletions
diff --git a/bridges/TwitterBridge.php b/bridges/TwitterBridge.php
index 0d8b024..0bc2f67 100644
--- a/bridges/TwitterBridge.php
+++ b/bridges/TwitterBridge.php
@@ -2,6 +2,9 @@
class TwitterBridge extends BridgeAbstract {
const NAME = 'Twitter Bridge';
const URI = 'https://twitter.com/';
+ const API_URI = 'https://api.twitter.com';
+ const GUEST_TOKEN_USES = 100;
+ const GUEST_TOKEN_EXPIRY = 300; // 5min
const CACHE_TIMEOUT = 300; // 5min
const DESCRIPTION = 'returns tweets';
const MAINTAINER = 'pmaziere';
@@ -92,6 +95,20 @@ EOD
'required' => false,
'title' => 'Specify term to search for'
)
+ ),
+ 'By list ID' => array(
+ 'listid' => array(
+ 'name' => 'List ID',
+ 'exampleValue' => '31748',
+ 'required' => true,
+ 'title' => 'Insert the list id'
+ ),
+ 'filter' => array(
+ 'name' => 'Filter',
+ 'exampleValue' => '#rss-bridge',
+ 'required' => false,
+ 'title' => 'Specify term to search for'
+ )
)
);
@@ -142,6 +159,8 @@ EOD
break;
case 'By list':
return $this->getInput('list') . ' - Twitter list by ' . $this->getInput('user');
+ case 'By list ID':
+ return 'Twitter List #' . $this->getInput('listid');
default: return parent::getName();
}
return 'Twitter ' . $specific . $this->getInput($param);
@@ -164,26 +183,46 @@ EOD
. urlencode($this->getInput('user'))
. '/lists/'
. str_replace(' ', '-', strtolower($this->getInput('list')));
+ case 'By list ID':
+ return self::URI
+ . 'i/lists/'
+ . urlencode($this->getInput('listid'));
default: return parent::getURI();
}
}
+ private function getApiURI() {
+ switch($this->queriedContext) {
+ case 'By keyword or hashtag':
+ return self::API_URI
+ . '/2/search/adaptive.json?q='
+ . urlencode($this->getInput('q'))
+ . '&tweet_mode=extended&tweet_search_mode=live';
+ case 'By username':
+ return self::API_URI
+ . '/2/timeline/profile/'
+ . $this->getRestId($this->getInput('u'))
+ . '.json?tweet_mode=extended';
+ case 'By list':
+ return self::API_URI
+ . '/2/timeline/list.json?list_id='
+ . $this->getListId($this->getInput('user'), $this->getInput('list'))
+ . '&tweet_mode=extended';
+ case 'By list ID':
+ return self::API_URI
+ . '/2/timeline/list.json?list_id='
+ . $this->getInput('listid')
+ . '&tweet_mode=extended';
+ default: returnServerError('Invalid query context !');
+ }
+ }
+
public function collectData(){
$html = '';
$page = $this->getURI();
+ $data = json_decode($this->getApiContents($this->getApiURI()));
- $header = array(
- 'User-Agent: Mozilla/5.0 (Windows NT 9.0; WOW64; Trident/7.0; rv:11.0) like Gecko'
- );
-
- if(php_sapi_name() === 'cli' && empty(ini_get('curl.cainfo'))) {
- $cookies = $this->getCookies($page);
- $html = getSimpleHTMLDOM($page, array_merge($header, array("Cookie: $cookies")));
- } else {
- $html = getSimpleHTMLDOM($page, $header, array(CURLOPT_COOKIEFILE => ''));
- }
-
- if(!$html) {
+ if(!$data) {
switch($this->queriedContext) {
case 'By keyword or hashtag':
returnServerError('No results for this query.');
@@ -196,75 +235,80 @@ EOD
$hidePictures = $this->getInput('nopic');
- foreach($html->find('div.js-stream-tweet') as $tweet) {
-
- // Skip retweets?
- if($this->getInput('noretweet')
- && $tweet->find('div.context span.js-retweet-text a', 0)) {
- continue;
+ $promotedTweetIds = array_reduce($data->timeline->instructions[0]->addEntries->entries, function($carry, $entry) {
+ if (!isset($entry->content->item)) {
+ return $carry;
}
+ $tweet = $entry->content->item->content->tweet;
+ if (isset($tweet->promotedMetadata)) {
+ $carry[] = $tweet->id;
+ }
+ return $carry;
+ }, array());
+
+ foreach($data->globalObjects->tweets as $tweet) {
- // remove 'invisible' content
- foreach($tweet->find('.invisible') as $invisible) {
- $invisible->outertext = '';
+ /* Debug::log('>>> ' . json_encode($tweet)); */
+ // Skip spurious retweets
+ if (isset($tweet->retweeted_status_id_str) && substr($tweet->full_text, 0, 4) === 'RT @') {
+ continue;
}
- // Skip protmoted tweets
- $heading = $tweet->previousSibling();
- if(!is_null($heading) &&
- $heading->getAttribute('class') === 'promoted-tweet-heading'
- ) {
+ // Skip promoted tweets
+ if (in_array($tweet->id_str, $promotedTweetIds)) {
continue;
}
$item = array();
// extract username and sanitize
- $item['username'] = htmlspecialchars_decode($tweet->getAttribute('data-screen-name'), ENT_QUOTES);
- // extract fullname (pseudonym)
- $item['fullname'] = htmlspecialchars_decode($tweet->getAttribute('data-name'), ENT_QUOTES);
- // get author
+ $user_info = $this->getUserInformation($tweet->user_id_str, $data->globalObjects);
+
+ $item['username'] = $user_info->screen_name;
+ $item['fullname'] = $user_info->name;
$item['author'] = $item['fullname'] . ' (@' . $item['username'] . ')';
- if($rt = $tweet->find('div.context span.js-retweet-text a', 0)) {
- $item['author'] .= ' RT: @' . $rt->plaintext;
+ if (null !== $this->getInput('u') && $item['username'] != $this->getInput('u')) {
+ $item['author'] .= ' RT: @' . $this->getInput('u');
}
- // get avatar link
- $item['avatar'] = $tweet->find('img', 0)->src;
- // get TweetID
- $item['id'] = $tweet->getAttribute('data-tweet-id');
- // get tweet link
- $item['uri'] = self::URI . substr($tweet->find('a.js-permalink', 0)->getAttribute('href'), 1);
- // extract tweet timestamp
- $item['timestamp'] = $tweet->find('span.js-short-timestamp', 0)->getAttribute('data-time');
- // generate the title
- $item['title'] = strip_tags($this->fixAnchorSpacing(htmlspecialchars_decode(
- $tweet->find('p.js-tweet-text', 0), ENT_QUOTES), '<a>'));
+ $item['avatar'] = $user_info->profile_image_url_https;
- switch($this->queriedContext) {
- case 'By list':
- // Check if filter applies to list (using raw content)
- if($this->getInput('filter')) {
- if(stripos($tweet->find('p.js-tweet-text', 0)->plaintext, $this->getInput('filter')) === false) {
- continue 2; // switch + for-loop!
- }
- }
- break;
- default:
+ $item['id'] = $tweet->id_str;
+ $item['uri'] = self::URI . $item['username'] . '/status/' . $item['id'];
+ // extract tweet timestamp
+ $item['timestamp'] = $tweet->created_at;
+
+ // Convert plain text URLs into HTML hyperlinks
+ $cleanedTweet = $tweet->full_text;
+ $foundUrls = false;
+
+ if (isset($tweet->entities->media)) {
+ foreach($tweet->entities->media as $media) {
+ $cleanedTweet = str_replace($media->url,
+ '<a href="' . $media->expanded_url . '">' . $media->display_url . '</a>',
+ $cleanedTweet);
+ $foundUrls = true;
+ }
}
+ if (isset($tweet->entities->urls)) {
+ foreach($tweet->entities->urls as $url) {
+ $cleanedTweet = str_replace($url->url,
+ '<a href="' . $url->expanded_url . '">' . $url->display_url . '</a>',
+ $cleanedTweet);
+ $foundUrls = true;
+ }
+ }
+ if ($foundUrls === false) {
+ // fallback to regex'es
+ $reg_ex = '/(http|https|ftp|ftps)\:\/\/[a-zA-Z0-9\-\.]+\.[a-zA-Z]{2,3}(\/\S*)?/';
+ if(preg_match($reg_ex, $tweet->full_text, $url)) {
+ $cleanedTweet = preg_replace($reg_ex,
+ "<a href='{$url[0]}' target='_blank'>{$url[0]}</a> ",
+ $cleanedTweet);
+ }
+ }
+ // generate the title
+ $item['title'] = strip_tags($cleanedTweet);
- $this->processContentLinks($tweet);
- $this->processEmojis($tweet);
-
- // get tweet text
- $cleanedTweet = str_replace(
- 'href="/',
- 'href="' . self::URI,
- $tweet->find('p.js-tweet-text', 0)->innertext
- );
-
- // fix anchors missing spaces in-between
- $cleanedTweet = $this->fixAnchorSpacing($cleanedTweet);
-
- // Add picture to content
+ // Add avatar
$picture_html = '';
if(!$hidePictures) {
$picture_html = <<<EOD
@@ -278,31 +322,79 @@ EOD
EOD;
}
- // Add embeded image to content
- $image_html = '';
- $images = $this->getImageURI($tweet);
- if(!$this->getInput('noimg') && !is_null($images)) {
-
- foreach ($images as $image) {
-
- // Set image scaling
- $image_orig = $this->getInput('noimgscaling') ? $image : $image . ':orig';
- $image_thumb = $this->getInput('noimgscaling') ? $image : $image . ':thumb';
-
- // add enclosures
- $item['enclosures'][] = $image_orig;
+ // Get images
+ $media_html = '';
+ if(isset($tweet->extended_entities->media) && !$this->getInput('noimg')) {
+ foreach($tweet->extended_entities->media as $media) {
+ switch($media->type) {
+ case 'photo':
+ $image = $media->media_url_https . '?name=orig';
+ $display_image = $media->media_url_https;
+ // add enclosures
+ $item['enclosures'][] = $image;
- $image_html .= <<<EOD
-<a href="{$image_orig}">
+ $media_html .= <<<EOD
+<a href="{$image}">
<img
style="align:top; max-width:558px; border:1px solid black;"
- src="{$image_thumb}" />
+ referrerpolicy="no-referrer"
+ src="{$display_image}" />
</a>
EOD;
+ break;
+ case 'video':
+ case 'animated_gif':
+ if(isset($media->video_info)) {
+ $link = $media->expanded_url;
+ $poster = $media->media_url_https;
+ $video = null;
+ $maxBitrate = -1;
+ foreach($media->video_info->variants as $variant) {
+ $bitRate = isset($variant->bitrate) ? $variant->bitrate : -100;
+ if ($bitRate > $maxBitrate) {
+ $maxBitrate = $bitRate;
+ $video = $variant->url;
+ }
+ }
+ if(!is_null($video)) {
+ // add enclosures
+ $item['enclosures'][] = $video;
+ $item['enclosures'][] = $poster;
+
+ $media_html .= <<<EOD
+<a href="{$link}">Video</a>
+<video
+ style="align:top; max-width:558px; border:1px solid black;"
+ referrerpolicy="no-referrer"
+ src="{$video}" poster="{$poster}" />
+EOD;
+ }
+ }
+ break;
+ default:
+ Debug::log('Missing support for media type: ' . $media->type);
+ }
}
}
- // add content
+ switch($this->queriedContext) {
+ case 'By list':
+ case 'By list ID':
+ // Check if filter applies to list (using raw content)
+ if($this->getInput('filter')) {
+ if(stripos($cleanedTweet, $this->getInput('filter')) === false) {
+ continue 2; // switch + for-loop!
+ }
+ }
+ break;
+ case 'By username':
+ if ($this->getInput('noretweet') && $item['username'] != $this->getInput('u')) {
+ continue 2; // switch + for-loop!
+ }
+ break;
+ default:
+ }
+
$item['content'] = <<<EOD
<div style="display: inline-block; vertical-align: top;">
{$picture_html}
@@ -311,155 +403,149 @@ EOD;
<blockquote>{$cleanedTweet}</blockquote>
</div>
<div style="display: block; vertical-align: top;">
- <blockquote>{$image_html}</blockquote>
+ <blockquote>{$media_html}</blockquote>
</div>
EOD;
- // add quoted tweet
- $quotedTweet = $tweet->find('div.QuoteTweet', 0);
- if($quotedTweet) {
- // get tweet text
- $cleanedQuotedTweet = str_replace(
- 'href="/',
- 'href="' . self::URI,
- $quotedTweet->find('div.tweet-text', 0)->innertext
- );
-
- $this->processContentLinks($quotedTweet);
- $this->processEmojis($quotedTweet);
-
- // Add embeded image to content
- $quotedImage_html = '';
- $quotedImages = $this->getQuotedImageURI($tweet);
-
- if(!$this->getInput('noimg') && !is_null($quotedImages)) {
-
- foreach ($quotedImages as $image) {
-
- // Set image scaling
- $image_orig = $this->getInput('noimgscaling') ? $image : $image . ':orig';
- $image_thumb = $this->getInput('noimgscaling') ? $image : $image . ':thumb';
-
- // add enclosures
- $item['enclosures'][] = $image_orig;
-
- $quotedImage_html .= <<<EOD
-<a href="{$image_orig}">
-<img
- style="align:top; max-width:558px; border:1px solid black;"
- src="{$image_thumb}" />
-</a>
-EOD;
- }
- }
-
- $item['content'] = <<<EOD
-{$item['content']}
-<hr>
-<div style="display: inline-block; vertical-align: top;">
- <blockquote>{$cleanedQuotedTweet}</blockquote>
-</div>
-<div style="display: block; vertical-align: top;">
- <blockquote>{$quotedImage_html}</blockquote>
-</div>
-EOD;
- }
$item['content'] = htmlspecialchars_decode($item['content'], ENT_QUOTES);
// put out
$this->items[] = $item;
}
- }
-
- private function processEmojis($tweet){
- // process emojis (reduce size)
- foreach($tweet->find('img.Emoji') as $img) {
- $img->style .= ' height: 1em;';
- }
- }
- private function processContentLinks($tweet){
- // processing content links
- foreach($tweet->find('a') as $link) {
- if($link->hasAttribute('data-expanded-url')) {
- $link->href = $link->getAttribute('data-expanded-url');
- }
- $link->removeAttribute('data-expanded-url');
- $link->removeAttribute('data-query-source');
- $link->removeAttribute('rel');
- $link->removeAttribute('class');
- $link->removeAttribute('target');
- $link->removeAttribute('title');
- }
+ usort($this->items, array('TwitterBridge', 'compareTweetId'));
}
- private function fixAnchorSpacing($content){
- // fix anchors missing spaces in-between
- return str_replace(
- '<a',
- ' <a',
- $content
- );
+ private static function compareTweetId($tweet1, $tweet2) {
+ return (intval($tweet1['id']) < intval($tweet2['id']) ? 1 : -1);
}
- private function getImageURI($tweet){
- // Find media in tweet
- $images = array();
-
- $container = $tweet->find('div.AdaptiveMedia-container', 0);
+ //The aim of this function is to get an API key and a guest token
+ //This function takes 2 requests, and therefore is cached
+ private function getApiKey() {
+
+ $cacheFac = new CacheFactory();
+ $cacheFac->setWorkingDir(PATH_LIB_CACHES);
+ $r_cache = $cacheFac->create(Configuration::getConfig('cache', 'type'));
+ $r_cache->setScope(get_called_class());
+ $r_cache->setKey(array('refresh'));
+ $data = $r_cache->loadData();
+
+ $refresh = null;
+ if($data === null) {
+ $refresh = time();
+ $r_cache->saveData($refresh);
+ } else {
+ $refresh = $data;
+ }
- if($container && $container->find('img', 0)) {
- foreach ($container->find('img') as $img) {
- $images[] = $img->src;
+ $cacheFac = new CacheFactory();
+ $cacheFac->setWorkingDir(PATH_LIB_CACHES);
+ $cache = $cacheFac->create(Configuration::getConfig('cache', 'type'));
+ $cache->setScope(get_called_class());
+ $cache->setKey(array('api_key'));
+ $data = $cache->loadData();
+
+ $apiKey = null;
+ if($data === null || (time() - $refresh) > self::GUEST_TOKEN_EXPIRY) {
+ $twitterPage = getContents('https://twitter.com');
+
+ $jsLink = false;
+ $jsMainRegexArray = array(
+ '/(https:\/\/abs\.twimg\.com\/responsive-web\/web\/main\.[^\.]+\.js)/m',
+ '/(https:\/\/abs\.twimg\.com\/responsive-web\/web_legacy\/main\.[^\.]+\.js)/m',
+ '/(https:\/\/abs\.twimg\.com\/responsive-web\/client-web\/main\.[^\.]+\.js)/m',
+ '/(https:\/\/abs\.twimg\.com\/responsive-web\/client-web-legacy\/main\.[^\.]+\.js)/m',
+ );
+ foreach ($jsMainRegexArray as $jsMainRegex) {
+ if (preg_match_all($jsMainRegex, $twitterPage, $jsMainMatches, PREG_SET_ORDER, 0)) {
+ $jsLink = $jsMainMatches[0][0];
+ break;
+ }
+ }
+ if (!$jsLink) {
+ returnServerError('Could not locate main.js link');
}
- }
- if (!empty($images)) {
- return $images;
+ $jsContent = getContents($jsLink);
+ $apiKeyRegex = '/([a-zA-Z0-9]{59}%[a-zA-Z0-9]{44})/m';
+ preg_match_all($apiKeyRegex, $jsContent, $apiKeyMatches, PREG_SET_ORDER, 0);
+ $apiKey = $apiKeyMatches[0][0];
+ $cache->saveData($apiKey);
+ } else {
+ $apiKey = $data;
}
- return null;
- }
+ $cacheFac2 = new CacheFactory();
+ $cacheFac2->setWorkingDir(PATH_LIB_CACHES);
+ $gt_cache = $cacheFac->create(Configuration::getConfig('cache', 'type'));
+ $gt_cache->setScope(get_called_class());
+ $gt_cache->setKey(array('guest_token'));
+ $guestTokenUses = $gt_cache->loadData();
+
+ $guestToken = null;
+ if($guestTokenUses === null || !is_array($guestTokenUses) || count($guestTokenUses) != 2
+ || $guestTokenUses[0] <= 0 || (time() - $refresh) > self::GUEST_TOKEN_EXPIRY) {
+ $guestToken = $this->getGuestToken();
+ $gt_cache->saveData(array(self::GUEST_TOKEN_USES, $guestToken));
+ $r_cache->saveData(time());
+ } else {
+ $guestTokenUses[0] -= 1;
+ $gt_cache->saveData($guestTokenUses);
+ $guestToken = $guestTokenUses[1];
+ }
- private function getQuotedImageURI($tweet){
- // Find media in tweet
- $images = array();
+ return array($apiKey, $guestToken);
- $container = $tweet->find('div.QuoteMedia-container', 0);
+ }
- if($container && $container->find('img', 0)) {
- foreach ($container->find('img') as $img) {
- $images[] = $img->src;
- }
- }
+ // Get a guest token. This is different to an API key,
+ // and it seems to change more regularly than the API key.
+ private function getGuestToken() {
+ $pageContent = getContents('https://twitter.com', array(), array(), true);
+
+ $guestTokenRegex = '/gt=([0-9]*)/m';
+ preg_match_all($guestTokenRegex, $pageContent['header'], $guestTokenMatches, PREG_SET_ORDER, 0);
+ if (!$guestTokenMatches)
+ preg_match_all($guestTokenRegex, $pageContent['content'], $guestTokenMatches, PREG_SET_ORDER, 0);
+ if (!$guestTokenMatches) returnServerError('Could not parse guest token');
+ $guestToken = $guestTokenMatches[0][1];
+ return $guestToken;
+ }
- if (!empty($images)) {
- return $images;
- }
+ private function getApiContents($uri) {
+ $apiKeys = $this->getApiKey();
+ $headers = array('authorization: Bearer ' . $apiKeys[0],
+ 'x-guest-token: ' . $apiKeys[1],
+ );
+ return getContents($uri, $headers);
+ }
- return null;
+ private function getRestId($username) {
+ $searchparams = urlencode('{"screen_name":"' . strtolower($username) . '", "withHighlightedLabel":true}');
+ $searchURL = self::API_URI . '/graphql/-xfUfZsnR_zqjFd-IfrN5A/UserByScreenName?variables=' . $searchparams;
+ $searchResult = $this->getApiContents($searchURL);
+ $searchResult = json_decode($searchResult);
+ return $searchResult->data->user->rest_id;
}
- private function getCookies($pageURL){
+ private function getListId($username, $listName) {
+ $searchparams = urlencode('{"screenName":"'
+ . strtolower($username)
+ . '", "listSlug": "'
+ . $listName
+ . '", "withHighlightedLabel":false}');
+ $searchURL = self::API_URI . '/graphql/ErWsz9cObLel1BF-HjuBlA/ListBySlug?variables=' . $searchparams;
+ $searchResult = $this->getApiContents($searchURL);
+ $searchResult = json_decode($searchResult);
+ return $searchResult->data->user_by_screen_name->list->id_str;
+ }
- $ctx = stream_context_create(array(
- 'http' => array(
- 'follow_location' => false
- )
- )
- );
- $a = file_get_contents($pageURL, 0, $ctx);
-
- //First request to get the cookie
- $cookies = '';
- foreach($http_response_header as $hdr) {
- if(stripos($hdr, 'Set-Cookie') !== false) {
- $cLine = explode(':', $hdr)[1];
- $cLine = explode(';', $cLine)[0];
- $cookies .= ';' . $cLine;
+ private function getUserInformation($userId, $apiData) {
+ foreach($apiData->users as $user) {
+ if($user->id_str == $userId) {
+ return $user;
}
}
-
- return substr($cookies, 2);
}
}