artabro/wire/modules/Markup/MarkupRSS.module

534 lines
16 KiB
Text
Raw Permalink Normal View History

2024-08-27 11:35:37 +02:00
<?php namespace ProcessWire;
/**
* ProcessWire Markup RSS Module
*
* Given a PageArray of pages, this module will render an RSS feed from them.
* This is intended to be used directly from a template file. See usage below.
*
* USAGE
* ~~~~~~
* $rss = $modules->get('MarkupRSS');
* $rss->setArray([ // specify RSS feed settings
* 'title' => 'Latest updates',
* 'description' => 'The most recent pages updated on my site',
* 'itemTitleField' => 'title',
* 'itemDateField' => 'created', // date field or 'created', 'published' or 'modified'
* 'itemDescriptionField' => 'summary',
* 'itemDescriptionLength' => 1000, // truncate descriptions to this max length or 0 to allow HTML
* 'itemContentField' => 'body', // optional HTML full-content, or omit to exclude
* 'itemAuthorField' => 'author', // optional text or Page field containing author(s)
* ]);
* $items = $pages->find('limit=10, sort=-modified'); // or any pages you want
* $rss->render($items);
* exit; // exit now, or if you dont then at least stop sending further output
* ~~~~~~
*
* See also the $defaultConfigData below (first thing in the class) to see what
* options you can change at runtime.
*
*
* ProcessWire 3.x, Copyright 2023 by Ryan Cramer
* https://processwire.com
*
* @property string $title
* @property string $url
* @property string $description
* @property string $xsl
* @property string $css
* @property string $copyright
* @property int $ttl
* @property string $itemTitleField
* @property string $itemContentField
* @property string $itemDateField
* @property string $itemDescriptionField Field to use for item description.
* @property string $itemDescriptionLength Max length for item description or 0 to allow HTML markup with any length (default=1024)
* @property string $itemAuthorField
* @property string $itemAuthorElement
* @property string $header
* @property array|PageArray $feedPages
* @property bool $stripTags Strip tags from item description? Applies only if `itemDescriptionLength>0`. (default=true)
*
*
*/
class MarkupRSS extends WireData implements Module, ConfigurableModule {
/**
* Return general info about the module for ProcessWire
*
*/
public static function getModuleInfo() {
return array(
'title' => 'Markup RSS Feed',
'version' => 105,
'summary' => 'Renders an RSS feed. Given a PageArray, renders an RSS feed of them.',
'icon' => 'rss-square',
);
}
protected static $defaultConfigData = array(
'title' => 'Untitled RSS Feed',
'url' => '',
'description' => '',
'xsl' => '',
'css' => '',
'copyright' => '',
'ttl' => 60,
'stripTags' => true,
'itemTitleField' => 'title',
'itemContentField' => '', // for <content:encoded>
'itemDescriptionField' => 'summary',
'itemDescriptionLength' => 1024,
'itemDateField' => 'created',
'itemAuthorField' => '', // i.e. createdUser.title or leave blank to not use
'itemAuthorElement' => 'dc:creator', // may be 'dc:creator' or 'author' (author if email address)
'header' => 'Content-Type: application/xml; charset=utf-8;',
'feedPages' => array(),
);
/**
* Set the default config data
*
*/
public function __construct() {
parent::__construct();
$this->setArray(self::$defaultConfigData);
}
/**
* Module init
*
*/
public function init() { }
/**
* @param string $str
* @return string
*
*/
protected function ent1($str) {
if(strpos($str, '&') !== false) $str = $this->wire()->sanitizer->unentities($str, true);
return $this->ent($str);
}
/**
* @param string $str
* @return string
*
*
*/
protected function ent($str) {
$str = htmlspecialchars($str, ENT_XML1 | ENT_QUOTES, 'UTF-8');
$str = strtr($str, array(
// https://validator.w3.org/feed/
// recommends using hexadecimal entities here
'&gt;' => '&#x0003E;',
'&lt;' => '&#x0003C;',
'&amp;' => '&#x00026;',
'&quot;' => '&#x00022;',
'&apos;' => '&#x00027;',
'&#39;' => '&#x00027;',
));
return $str;
}
/**
* Render RSS header
*
* @return string
*
*/
protected function renderHeader() {
if(!$this->url) $this->url = $this->page->httpUrl;
$xsl = $this->ent1($this->xsl);
$css = $this->ent1($this->css);
$title = $this->ent1($this->title);
$url = $this->ent1($this->url);
$description = $this->ent1($this->description);
$pubDate = date(\DATE_RSS);
$ttl = (int) $this->ttl;
$copyright = $this->ent1($this->copyright);
$out = '<?xml version="1.0" encoding="utf-8" ?>' . "\n";
if($xsl) $out .= "<?xml-stylesheet type='text/xsl' href='$xsl' ?>\n";
if($css) $out .= "<?xml-stylesheet type='text/css' href='$css' ?>\n";
$xmlns = array(
'xmlns:atom="http://www.w3.org/2005/Atom"',
'xmlns:dc="http://purl.org/dc/elements/1.1/"'
);
if($this->itemContentField) {
$xmlns[] = 'xmlns:content="http://purl.org/rss/1.0/modules/content/"';
}
$xmlns = implode(' ', $xmlns);
$out .=
"<rss version=\"2.0\" $xmlns>\n" .
"<channel>\n" .
"\t<title>$title</title>\n" .
"\t<link>$url</link>\n" .
"\t<atom:link href=\"$url\" rel=\"self\" type=\"application/rss+xml\" />\n" .
"\t<description>$description</description>\n" .
"\t<pubDate>$pubDate</pubDate>\n";
if($copyright) $out .= "\t<copyright>$copyright</copyright>\n";
if($ttl) $out .= "\t<ttl>$ttl</ttl>\n";
return $out;
}
/**
* Render individual RSS item
*
* @param Page $page
* @return string
*
*/
protected function renderItem(Page $page) {
$sanitizer = $this->wire()->sanitizer;
$title = strip_tags($page->get($this->itemTitleField));
if(empty($title)) return '';
$author = '';
$description = '';
$content = '';
$pubDate = '';
$title = $this->ent1($title);
if($this->itemDateField && ($ts = $page->getUnformatted($this->itemDateField))) {
// date
$pubDate = "\t\t<pubDate>" . date(DATE_RSS, $ts) . "</pubDate>\n";
}
if($this->itemAuthorField) {
// author
$author = $page->get($this->itemAuthorField);
if($author instanceof Page) {
$author = $author->get('title|name');
} else if($author instanceof PageArray) {
$author = $author->implode(', ', 'title');
}
$author = (string) $author;
if(strlen($author)) {
$author = $this->ent1($author);
$author = "\t\t<$this->itemAuthorElement>$author</$this->itemAuthorElement>\n";
} else {
$author = '';
}
}
if($this->itemDescriptionField) {
// description summary
$description = $page->get($this->itemDescriptionField);
if($description !== null) {
if($this->itemDescriptionLength == 0) {
// direct markup allowed in item description
$description = $this->relativeToAbsoluteHtml($description, $page);
} else {
$description = $sanitizer->unentities($description, true);
$description = $this->truncateDescription($description);
$description = $this->ent($description);
}
$description = '<![CDATA[' . $description . ']]>';
} else {
$description = '';
}
}
if($this->itemContentField) {
// full HTML content, like that from CKEditor
$content = (string) $page->get($this->itemContentField);
$content = $this->relativeToAbsoluteHtml($content, $page);
$content = "\t\t<content:encoded><![CDATA[" . $content . "]]></content:encoded>\n";
}
$out =
"\t<item>\n" .
"\t\t<title>$title</title>\n" .
"\t\t<description>$description</description>\n" .
$pubDate .
$author .
$content .
"\t\t<link>$page->httpUrl</link>\n" .
"\t\t<guid>$page->httpUrl</guid>\n" .
"\t</item>\n";
return $out;
}
/**
* Render the feed and return it
*
* @param PageArray|null $feedPages
* @return string
*
*/
public function renderFeed(PageArray $feedPages = null) {
if(!is_null($feedPages)) $this->feedPages = $feedPages;
$out = $this->renderHeader();
foreach($this->feedPages as $page) {
if(!$page->viewable()) continue;
$out .= $this->renderItem($page);
}
$out .= "</channel>\n</rss>\n";
return $out;
}
/**
* Render the feed and echo it (with proper http header)
*
* @param PageArray|null $feedPages
* @return bool
*
*/
public function render(PageArray $feedPages = null) {
header($this->header);
echo $this->renderFeed($feedPages);
return true;
}
/**
* Truncate the description to a specific length and then truncate to avoid splitting any words.
*
* @param string $str
* @return string
*
*/
protected function truncateDescription($str) {
$str = trim($str);
$maxlen = $this->itemDescriptionLength;
if(!$maxlen) return $str;
if($this->stripTags) $str = strip_tags($str);
if(strlen($str) < $maxlen) return $str;
$str = trim(substr($str, 0, $maxlen));
// boundaries that we can end the summary with
$boundaries = array('. ', '? ', '! ', ', ', '; ', '-');
$bestPos = 0;
foreach($boundaries as $boundary) {
if(($pos = strrpos($str, $boundary)) !== false) {
// find the boundary that is furthest in string
if($pos > $bestPos) $bestPos = $pos;
}
}
// determine if we should truncate to last punctuation or last space.
// if the last punctuation is further away then 1/4th the total length, then we'll
// truncate to the last space. Otherwise, we'll truncate to the last punctuation.
$spacePos = strrpos($str, ' ');
if($spacePos > $bestPos && (($spacePos - ($maxlen / 4)) > $bestPos)) $bestPos = $spacePos;
if(!$bestPos) $bestPos = $maxlen;
return trim(substr($str, 0, $bestPos+1));
}
/**
* Update links and other references in HTML content to be suitable for RSS
*
* @param string $content
* @param Page $page
* @return string
*
*/
protected function relativeToAbsoluteHtml($content, Page $page) {
$rootUrl = $this->wire()->config->urls->httpRoot;
$pageUrl = $page->httpUrl();
$a = array(
' href="/' => ' href="' . $rootUrl,
" href='/" => " href='" . $rootUrl,
' src="/' => ' src="' . $rootUrl,
" src='/" => " src='" . $rootUrl,
' href="#' => ' href="' . $pageUrl . '#',
" href='#" => " href='" . $pageUrl . '#',
'<![CDATA[' => '&lt;![CDATA[',
']]>' => ']]&gt;'
);
return str_replace(array_keys($a), array_values($a), $content);
}
/**
* Provide fields for configuring this module
*
* @param array $data
* @return InputfieldWrapper
*
*/
public function getModuleConfigInputfields(array $data) {
/** @var Modules $modules */
$modules = $this->wire('modules');
/** @var InputfieldWrapper $form */
$form = $this->wire(new InputfieldWrapper());
/** @var InputfieldFieldset $inputfields */
$inputfields = $modules->get('InputfieldFieldset');
$inputfields->attr('name', '_defaults');
$inputfields->label = 'RSS feed defaults';
$inputfields->icon = 'rss';
$inputfields->description =
"Select the default options for any given feed. Each of these may be overridden in the API, " .
"so the options you select below should be considered defaults, unless you only have 1 feed. " .
"If you only need to support 1 feed, then you will not need to override any of these in the API.";
$form->add($inputfields);
foreach(self::$defaultConfigData as $key => $value) {
if(!isset($data[$key])) $data[$key] = $value;
}
/** @var InputfieldText $f */
$f = $modules->get('InputfieldText');
$f->attr('name', 'title');
$f->attr('value', $data['title']);
$f->label = "Feed title";
$f->description = "The primary title of the RSS feed.";
$f->columnWidth = 50;
$inputfields->add($f);
/** @var InputfieldURL $f */
$f = $modules->get('InputfieldURL');
$f->attr('name', 'url');
$f->attr('value', $data['url']);
$f->label = "Feed URL";
$f->description = "Optional URL on your site that serves as a feed index.";
$f->columnWidth = 50;
$inputfields->add($f);
/** @var InputfieldText $f */
$f = $modules->get('InputfieldText');
$f->attr('name', 'description');
$f->attr('value', $data['description']);
$f->label = "Feed description";
$f->description = "Optional default description for a feed.";
$f->columnWidth = 50;
$inputfields->add($f);
/** @var InputfieldURL $f */
$f = $modules->get('InputfieldURL');
$f->attr('name', 'xsl');
$f->attr('value', $data['xsl']);
$f->label = "Link to XSL stylesheet";
$f->description = "Optional URL/link to an XSL stylesheet.";
$f->columnWidth = 50;
$inputfields->add($f);
/** @var InputfieldURL $f */
$f = $modules->get('InputfieldURL');
$f->attr('name', 'css');
$f->attr('value', $data['css']);
$f->label = "Link to CSS stylesheet";
$f->description = "Optional URL/link to a CSS stylesheet.";
$f->columnWidth = 50;
$inputfields->add($f);
/** @var InputfieldText $f */
$f = $modules->get('InputfieldText');
$f->attr('name', 'copyright');
$f->attr('value', $data['copyright']);
$f->label = "Feed copyright";
$f->description = "Optional default copyright statement for a feed.";
$f->columnWidth = 50;
$inputfields->add($f);
/** @var InputfieldSelect $f3 */
$f3 = $modules->get('InputfieldSelect');
$f3->attr('name', 'itemDateField');
$f3->attr('value', $data['itemDateField']);
$f3->label = "Feed item date field";
$f3->description = "The default field to use as an individual feed item's date.";
$f3->addOption('created');
$f3->addOption('modified');
$f3->addOption('published');
$f3->columnWidth = 50;
/** @var InputfieldSelect $f1 */
$f1 = $modules->get('InputfieldSelect');
$f1->attr('name', 'itemTitleField');
$f1->attr('value', $data['itemTitleField']);
$f1->label = "Feed item title field";
$f1->description = "The default field to use as an individual feed item's title.";
$f1->columnWidth = 50;
/** @var InputfieldSelect $f2 */
$f2 = $modules->get('InputfieldSelect');
$f2->attr('name', 'itemDescriptionField');
$f2->attr('value', $data['itemDescriptionField']);
$f2->label = "Feed item description field";
$f2->columnWidth = 50;
$f2->description = "The default field to use as an individual feed item's description (typically a summary or body field). Note that HTML will be stripped out.";
/** @var InputfieldInteger $f2a */
$f2a = $modules->get('InputfieldInteger');
$f2a->attr('name', 'itemDescriptionLength');
$f2a->attr('value', (int) $data['itemDescriptionLength']);
$f2a->label = "Maximum characters for item description field";
$f2a->columnWidth = 50;
$f2a->description = "The item description will be truncated to be no longer than the max length. When greater than 0, HTML tags will be removed or encoded.";
$f2a->notes = "Specify `0` for no max length AND to allow HTML in the description.";
/** @var InputfieldSelect $f4 */
$f4 = $modules->get('InputfieldSelect');
$f4->attr('name', 'itemContentField');
$f4->attr('value', $data['itemContentField']);
$f4->label = "HTML content/body field";
$f4->description = "Optional field that contains the entire article/bodycopy in HTML. Select only if you intend to include the entire content in the RSS feed, otherwise use just the description field.";
$f4->columnWidth = 50;
/** @var InputfieldInteger $ttl */
$ttl = $modules->get('InputfieldInteger');
$ttl->attr('name', 'ttl');
$ttl->attr('value', (int) $data['ttl']);
$ttl->label = "Feed TTL";
$ttl->description = "TTL stands for \"time to live\" in minutes. It indicates how long a channel can be cached before refreshing from the source. Default is 60.";
$ttl->columnWidth = 50;
foreach($this->wire()->fields as $field) {
$fieldtype = $field->type;
if($fieldtype instanceof FieldtypeTextarea) {
$f2->addOption($field->name);
$f4->addOption($field->name);
} else if($fieldtype instanceof FieldtypeText) {
$f1->addOption($field->name);
$f2->addOption($field->name);
} else if($fieldtype instanceof FieldtypeDatetime) {
$f3->addOption($field->name);
}
}
$inputfields->add($f1);
$inputfields->add($f3);
$inputfields->add($f2);
$inputfields->add($f2a);
$inputfields->add($f4);
$inputfields->add($ttl);
return $form;
}
}