summaryrefslogtreecommitdiff
path: root/bridges/FolhaDeSaoPauloBridge.php
diff options
context:
space:
mode:
authorJohannes 'josch' Schauer <josch@debian.org>2020-03-07 10:30:21 +0100
committerJohannes 'josch' Schauer <josch@debian.org>2020-03-07 10:30:21 +0100
commit432eb165b83d4483780a279b02929b05b3e09fa5 (patch)
tree53ff708fcd05370af1595fd720440cde85a46891 /bridges/FolhaDeSaoPauloBridge.php
parent779ac0902d2586e1ac31ad41881d8922ec40a7ea (diff)
New upstream version 2020-02-26+dfsg1
Diffstat (limited to 'bridges/FolhaDeSaoPauloBridge.php')
-rw-r--r--bridges/FolhaDeSaoPauloBridge.php51
1 files changed, 51 insertions, 0 deletions
diff --git a/bridges/FolhaDeSaoPauloBridge.php b/bridges/FolhaDeSaoPauloBridge.php
new file mode 100644
index 0000000..acd8d25
--- /dev/null
+++ b/bridges/FolhaDeSaoPauloBridge.php
@@ -0,0 +1,51 @@
+<?php
+class FolhaDeSaoPauloBridge extends FeedExpander {
+ const MAINTAINER = 'somini';
+ const NAME = 'Folha de São Paulo';
+ const URI = 'https://www1.folha.uol.com.br';
+ const DESCRIPTION = 'Returns the newest posts from Folha de São Paulo (full text)';
+ const PARAMETERS = array(
+ array(
+ 'feed' => array(
+ 'name' => 'Feed sub-URL',
+ 'type' => 'text',
+ 'title' => 'Select the sub-feed (see https://www1.folha.uol.com.br/feed/)',
+ 'exampleValue' => 'emcimadahora/rss091.xml',
+ )
+ )
+ );
+
+ protected function parseItem($item){
+ $item = parent::parseItem($item);
+
+ $articleHTMLContent = getSimpleHTMLDOMCached($item['uri']);
+ if($articleHTMLContent) {
+ foreach ($articleHTMLContent->find('div.c-news__body .is-hidden') as $toRemove) {
+ $toRemove->innertext = '';
+ }
+ $item_content = $articleHTMLContent->find('div.c-news__body', 0);
+ if ($item_content) {
+ $text = $item_content->innertext;
+ $text = strip_tags($text, '<p><b><a><blockquote><img><em>');
+ $item['content'] = $text;
+ }
+ } else {
+ Debug::log('???: ' . $item['uri']);
+ }
+
+ return $item;
+ }
+
+ public function collectData(){
+ $feed_input = $this->getInput('feed');
+ if (substr($feed_input, 0, strlen(self::URI)) === self::URI) {
+ Debug::log('Input:: ' . $feed_input);
+ $feed_url = $feed_input;
+ } else {
+ /* TODO: prepend `/` if missing */
+ $feed_url = self::URI . '/' . $this->getInput('feed');
+ }
+ Debug::log('URL: ' . $feed_url);
+ $this->collectExpandableDatas($feed_url);
+ }
+}