diff options
author | Johannes 'josch' Schauer <josch@debian.org> | 2020-03-07 10:30:21 +0100 |
---|---|---|
committer | Johannes 'josch' Schauer <josch@debian.org> | 2020-03-07 10:30:21 +0100 |
commit | 432eb165b83d4483780a279b02929b05b3e09fa5 (patch) | |
tree | 53ff708fcd05370af1595fd720440cde85a46891 /bridges/FolhaDeSaoPauloBridge.php | |
parent | 779ac0902d2586e1ac31ad41881d8922ec40a7ea (diff) |
New upstream version 2020-02-26+dfsg1
Diffstat (limited to 'bridges/FolhaDeSaoPauloBridge.php')
-rw-r--r-- | bridges/FolhaDeSaoPauloBridge.php | 51 |
1 files changed, 51 insertions, 0 deletions
diff --git a/bridges/FolhaDeSaoPauloBridge.php b/bridges/FolhaDeSaoPauloBridge.php new file mode 100644 index 0000000..acd8d25 --- /dev/null +++ b/bridges/FolhaDeSaoPauloBridge.php @@ -0,0 +1,51 @@ +<?php +class FolhaDeSaoPauloBridge extends FeedExpander { + const MAINTAINER = 'somini'; + const NAME = 'Folha de São Paulo'; + const URI = 'https://www1.folha.uol.com.br'; + const DESCRIPTION = 'Returns the newest posts from Folha de São Paulo (full text)'; + const PARAMETERS = array( + array( + 'feed' => array( + 'name' => 'Feed sub-URL', + 'type' => 'text', + 'title' => 'Select the sub-feed (see https://www1.folha.uol.com.br/feed/)', + 'exampleValue' => 'emcimadahora/rss091.xml', + ) + ) + ); + + protected function parseItem($item){ + $item = parent::parseItem($item); + + $articleHTMLContent = getSimpleHTMLDOMCached($item['uri']); + if($articleHTMLContent) { + foreach ($articleHTMLContent->find('div.c-news__body .is-hidden') as $toRemove) { + $toRemove->innertext = ''; + } + $item_content = $articleHTMLContent->find('div.c-news__body', 0); + if ($item_content) { + $text = $item_content->innertext; + $text = strip_tags($text, '<p><b><a><blockquote><img><em>'); + $item['content'] = $text; + } + } else { + Debug::log('???: ' . $item['uri']); + } + + return $item; + } + + public function collectData(){ + $feed_input = $this->getInput('feed'); + if (substr($feed_input, 0, strlen(self::URI)) === self::URI) { + Debug::log('Input:: ' . $feed_input); + $feed_url = $feed_input; + } else { + /* TODO: prepend `/` if missing */ + $feed_url = self::URI . '/' . $this->getInput('feed'); + } + Debug::log('URL: ' . $feed_url); + $this->collectExpandableDatas($feed_url); + } +} |