artabro/wire/modules/Markup/MarkupRSS.module
2024-08-27 11:35:37 +02:00

533 lines
16 KiB
Text
Raw Permalink Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

<?php namespace ProcessWire;
/**
* ProcessWire Markup RSS Module
*
* Given a PageArray of pages, this module will render an RSS feed from them.
* This is intended to be used directly from a template file. See usage below.
*
* USAGE
* ~~~~~~
* $rss = $modules->get('MarkupRSS');
* $rss->setArray([ // specify RSS feed settings
* 'title' => 'Latest updates',
* 'description' => 'The most recent pages updated on my site',
* 'itemTitleField' => 'title',
* 'itemDateField' => 'created', // date field or 'created', 'published' or 'modified'
* 'itemDescriptionField' => 'summary',
* 'itemDescriptionLength' => 1000, // truncate descriptions to this max length or 0 to allow HTML
* 'itemContentField' => 'body', // optional HTML full-content, or omit to exclude
* 'itemAuthorField' => 'author', // optional text or Page field containing author(s)
* ]);
* $items = $pages->find('limit=10, sort=-modified'); // or any pages you want
* $rss->render($items);
* exit; // exit now, or if you dont then at least stop sending further output
* ~~~~~~
*
* See also the $defaultConfigData below (first thing in the class) to see what
* options you can change at runtime.
*
*
* ProcessWire 3.x, Copyright 2023 by Ryan Cramer
* https://processwire.com
*
* @property string $title
* @property string $url
* @property string $description
* @property string $xsl
* @property string $css
* @property string $copyright
* @property int $ttl
* @property string $itemTitleField
* @property string $itemContentField
* @property string $itemDateField
* @property string $itemDescriptionField Field to use for item description.
* @property string $itemDescriptionLength Max length for item description or 0 to allow HTML markup with any length (default=1024)
* @property string $itemAuthorField
* @property string $itemAuthorElement
* @property string $header
* @property array|PageArray $feedPages
* @property bool $stripTags Strip tags from item description? Applies only if `itemDescriptionLength>0`. (default=true)
*
*
*/
class MarkupRSS extends WireData implements Module, ConfigurableModule {
/**
* Return general info about the module for ProcessWire
*
*/
public static function getModuleInfo() {
return array(
'title' => 'Markup RSS Feed',
'version' => 105,
'summary' => 'Renders an RSS feed. Given a PageArray, renders an RSS feed of them.',
'icon' => 'rss-square',
);
}
protected static $defaultConfigData = array(
'title' => 'Untitled RSS Feed',
'url' => '',
'description' => '',
'xsl' => '',
'css' => '',
'copyright' => '',
'ttl' => 60,
'stripTags' => true,
'itemTitleField' => 'title',
'itemContentField' => '', // for <content:encoded>
'itemDescriptionField' => 'summary',
'itemDescriptionLength' => 1024,
'itemDateField' => 'created',
'itemAuthorField' => '', // i.e. createdUser.title or leave blank to not use
'itemAuthorElement' => 'dc:creator', // may be 'dc:creator' or 'author' (author if email address)
'header' => 'Content-Type: application/xml; charset=utf-8;',
'feedPages' => array(),
);
/**
* Set the default config data
*
*/
public function __construct() {
parent::__construct();
$this->setArray(self::$defaultConfigData);
}
/**
* Module init
*
*/
public function init() { }
/**
* @param string $str
* @return string
*
*/
protected function ent1($str) {
if(strpos($str, '&') !== false) $str = $this->wire()->sanitizer->unentities($str, true);
return $this->ent($str);
}
/**
* @param string $str
* @return string
*
*
*/
protected function ent($str) {
$str = htmlspecialchars($str, ENT_XML1 | ENT_QUOTES, 'UTF-8');
$str = strtr($str, array(
// https://validator.w3.org/feed/
// recommends using hexadecimal entities here
'&gt;' => '&#x0003E;',
'&lt;' => '&#x0003C;',
'&amp;' => '&#x00026;',
'&quot;' => '&#x00022;',
'&apos;' => '&#x00027;',
'&#39;' => '&#x00027;',
));
return $str;
}
/**
* Render RSS header
*
* @return string
*
*/
protected function renderHeader() {
if(!$this->url) $this->url = $this->page->httpUrl;
$xsl = $this->ent1($this->xsl);
$css = $this->ent1($this->css);
$title = $this->ent1($this->title);
$url = $this->ent1($this->url);
$description = $this->ent1($this->description);
$pubDate = date(\DATE_RSS);
$ttl = (int) $this->ttl;
$copyright = $this->ent1($this->copyright);
$out = '<?xml version="1.0" encoding="utf-8" ?>' . "\n";
if($xsl) $out .= "<?xml-stylesheet type='text/xsl' href='$xsl' ?>\n";
if($css) $out .= "<?xml-stylesheet type='text/css' href='$css' ?>\n";
$xmlns = array(
'xmlns:atom="http://www.w3.org/2005/Atom"',
'xmlns:dc="http://purl.org/dc/elements/1.1/"'
);
if($this->itemContentField) {
$xmlns[] = 'xmlns:content="http://purl.org/rss/1.0/modules/content/"';
}
$xmlns = implode(' ', $xmlns);
$out .=
"<rss version=\"2.0\" $xmlns>\n" .
"<channel>\n" .
"\t<title>$title</title>\n" .
"\t<link>$url</link>\n" .
"\t<atom:link href=\"$url\" rel=\"self\" type=\"application/rss+xml\" />\n" .
"\t<description>$description</description>\n" .
"\t<pubDate>$pubDate</pubDate>\n";
if($copyright) $out .= "\t<copyright>$copyright</copyright>\n";
if($ttl) $out .= "\t<ttl>$ttl</ttl>\n";
return $out;
}
/**
* Render individual RSS item
*
* @param Page $page
* @return string
*
*/
protected function renderItem(Page $page) {
$sanitizer = $this->wire()->sanitizer;
$title = strip_tags($page->get($this->itemTitleField));
if(empty($title)) return '';
$author = '';
$description = '';
$content = '';
$pubDate = '';
$title = $this->ent1($title);
if($this->itemDateField && ($ts = $page->getUnformatted($this->itemDateField))) {
// date
$pubDate = "\t\t<pubDate>" . date(DATE_RSS, $ts) . "</pubDate>\n";
}
if($this->itemAuthorField) {
// author
$author = $page->get($this->itemAuthorField);
if($author instanceof Page) {
$author = $author->get('title|name');
} else if($author instanceof PageArray) {
$author = $author->implode(', ', 'title');
}
$author = (string) $author;
if(strlen($author)) {
$author = $this->ent1($author);
$author = "\t\t<$this->itemAuthorElement>$author</$this->itemAuthorElement>\n";
} else {
$author = '';
}
}
if($this->itemDescriptionField) {
// description summary
$description = $page->get($this->itemDescriptionField);
if($description !== null) {
if($this->itemDescriptionLength == 0) {
// direct markup allowed in item description
$description = $this->relativeToAbsoluteHtml($description, $page);
} else {
$description = $sanitizer->unentities($description, true);
$description = $this->truncateDescription($description);
$description = $this->ent($description);
}
$description = '<![CDATA[' . $description . ']]>';
} else {
$description = '';
}
}
if($this->itemContentField) {
// full HTML content, like that from CKEditor
$content = (string) $page->get($this->itemContentField);
$content = $this->relativeToAbsoluteHtml($content, $page);
$content = "\t\t<content:encoded><![CDATA[" . $content . "]]></content:encoded>\n";
}
$out =
"\t<item>\n" .
"\t\t<title>$title</title>\n" .
"\t\t<description>$description</description>\n" .
$pubDate .
$author .
$content .
"\t\t<link>$page->httpUrl</link>\n" .
"\t\t<guid>$page->httpUrl</guid>\n" .
"\t</item>\n";
return $out;
}
/**
* Render the feed and return it
*
* @param PageArray|null $feedPages
* @return string
*
*/
public function renderFeed(PageArray $feedPages = null) {
if(!is_null($feedPages)) $this->feedPages = $feedPages;
$out = $this->renderHeader();
foreach($this->feedPages as $page) {
if(!$page->viewable()) continue;
$out .= $this->renderItem($page);
}
$out .= "</channel>\n</rss>\n";
return $out;
}
/**
* Render the feed and echo it (with proper http header)
*
* @param PageArray|null $feedPages
* @return bool
*
*/
public function render(PageArray $feedPages = null) {
header($this->header);
echo $this->renderFeed($feedPages);
return true;
}
/**
* Truncate the description to a specific length and then truncate to avoid splitting any words.
*
* @param string $str
* @return string
*
*/
protected function truncateDescription($str) {
$str = trim($str);
$maxlen = $this->itemDescriptionLength;
if(!$maxlen) return $str;
if($this->stripTags) $str = strip_tags($str);
if(strlen($str) < $maxlen) return $str;
$str = trim(substr($str, 0, $maxlen));
// boundaries that we can end the summary with
$boundaries = array('. ', '? ', '! ', ', ', '; ', '-');
$bestPos = 0;
foreach($boundaries as $boundary) {
if(($pos = strrpos($str, $boundary)) !== false) {
// find the boundary that is furthest in string
if($pos > $bestPos) $bestPos = $pos;
}
}
// determine if we should truncate to last punctuation or last space.
// if the last punctuation is further away then 1/4th the total length, then we'll
// truncate to the last space. Otherwise, we'll truncate to the last punctuation.
$spacePos = strrpos($str, ' ');
if($spacePos > $bestPos && (($spacePos - ($maxlen / 4)) > $bestPos)) $bestPos = $spacePos;
if(!$bestPos) $bestPos = $maxlen;
return trim(substr($str, 0, $bestPos+1));
}
/**
* Update links and other references in HTML content to be suitable for RSS
*
* @param string $content
* @param Page $page
* @return string
*
*/
protected function relativeToAbsoluteHtml($content, Page $page) {
$rootUrl = $this->wire()->config->urls->httpRoot;
$pageUrl = $page->httpUrl();
$a = array(
' href="/' => ' href="' . $rootUrl,
" href='/" => " href='" . $rootUrl,
' src="/' => ' src="' . $rootUrl,
" src='/" => " src='" . $rootUrl,
' href="#' => ' href="' . $pageUrl . '#',
" href='#" => " href='" . $pageUrl . '#',
'<![CDATA[' => '&lt;![CDATA[',
']]>' => ']]&gt;'
);
return str_replace(array_keys($a), array_values($a), $content);
}
/**
* Provide fields for configuring this module
*
* @param array $data
* @return InputfieldWrapper
*
*/
public function getModuleConfigInputfields(array $data) {
/** @var Modules $modules */
$modules = $this->wire('modules');
/** @var InputfieldWrapper $form */
$form = $this->wire(new InputfieldWrapper());
/** @var InputfieldFieldset $inputfields */
$inputfields = $modules->get('InputfieldFieldset');
$inputfields->attr('name', '_defaults');
$inputfields->label = 'RSS feed defaults';
$inputfields->icon = 'rss';
$inputfields->description =
"Select the default options for any given feed. Each of these may be overridden in the API, " .
"so the options you select below should be considered defaults, unless you only have 1 feed. " .
"If you only need to support 1 feed, then you will not need to override any of these in the API.";
$form->add($inputfields);
foreach(self::$defaultConfigData as $key => $value) {
if(!isset($data[$key])) $data[$key] = $value;
}
/** @var InputfieldText $f */
$f = $modules->get('InputfieldText');
$f->attr('name', 'title');
$f->attr('value', $data['title']);
$f->label = "Feed title";
$f->description = "The primary title of the RSS feed.";
$f->columnWidth = 50;
$inputfields->add($f);
/** @var InputfieldURL $f */
$f = $modules->get('InputfieldURL');
$f->attr('name', 'url');
$f->attr('value', $data['url']);
$f->label = "Feed URL";
$f->description = "Optional URL on your site that serves as a feed index.";
$f->columnWidth = 50;
$inputfields->add($f);
/** @var InputfieldText $f */
$f = $modules->get('InputfieldText');
$f->attr('name', 'description');
$f->attr('value', $data['description']);
$f->label = "Feed description";
$f->description = "Optional default description for a feed.";
$f->columnWidth = 50;
$inputfields->add($f);
/** @var InputfieldURL $f */
$f = $modules->get('InputfieldURL');
$f->attr('name', 'xsl');
$f->attr('value', $data['xsl']);
$f->label = "Link to XSL stylesheet";
$f->description = "Optional URL/link to an XSL stylesheet.";
$f->columnWidth = 50;
$inputfields->add($f);
/** @var InputfieldURL $f */
$f = $modules->get('InputfieldURL');
$f->attr('name', 'css');
$f->attr('value', $data['css']);
$f->label = "Link to CSS stylesheet";
$f->description = "Optional URL/link to a CSS stylesheet.";
$f->columnWidth = 50;
$inputfields->add($f);
/** @var InputfieldText $f */
$f = $modules->get('InputfieldText');
$f->attr('name', 'copyright');
$f->attr('value', $data['copyright']);
$f->label = "Feed copyright";
$f->description = "Optional default copyright statement for a feed.";
$f->columnWidth = 50;
$inputfields->add($f);
/** @var InputfieldSelect $f3 */
$f3 = $modules->get('InputfieldSelect');
$f3->attr('name', 'itemDateField');
$f3->attr('value', $data['itemDateField']);
$f3->label = "Feed item date field";
$f3->description = "The default field to use as an individual feed item's date.";
$f3->addOption('created');
$f3->addOption('modified');
$f3->addOption('published');
$f3->columnWidth = 50;
/** @var InputfieldSelect $f1 */
$f1 = $modules->get('InputfieldSelect');
$f1->attr('name', 'itemTitleField');
$f1->attr('value', $data['itemTitleField']);
$f1->label = "Feed item title field";
$f1->description = "The default field to use as an individual feed item's title.";
$f1->columnWidth = 50;
/** @var InputfieldSelect $f2 */
$f2 = $modules->get('InputfieldSelect');
$f2->attr('name', 'itemDescriptionField');
$f2->attr('value', $data['itemDescriptionField']);
$f2->label = "Feed item description field";
$f2->columnWidth = 50;
$f2->description = "The default field to use as an individual feed item's description (typically a summary or body field). Note that HTML will be stripped out.";
/** @var InputfieldInteger $f2a */
$f2a = $modules->get('InputfieldInteger');
$f2a->attr('name', 'itemDescriptionLength');
$f2a->attr('value', (int) $data['itemDescriptionLength']);
$f2a->label = "Maximum characters for item description field";
$f2a->columnWidth = 50;
$f2a->description = "The item description will be truncated to be no longer than the max length. When greater than 0, HTML tags will be removed or encoded.";
$f2a->notes = "Specify `0` for no max length AND to allow HTML in the description.";
/** @var InputfieldSelect $f4 */
$f4 = $modules->get('InputfieldSelect');
$f4->attr('name', 'itemContentField');
$f4->attr('value', $data['itemContentField']);
$f4->label = "HTML content/body field";
$f4->description = "Optional field that contains the entire article/bodycopy in HTML. Select only if you intend to include the entire content in the RSS feed, otherwise use just the description field.";
$f4->columnWidth = 50;
/** @var InputfieldInteger $ttl */
$ttl = $modules->get('InputfieldInteger');
$ttl->attr('name', 'ttl');
$ttl->attr('value', (int) $data['ttl']);
$ttl->label = "Feed TTL";
$ttl->description = "TTL stands for \"time to live\" in minutes. It indicates how long a channel can be cached before refreshing from the source. Default is 60.";
$ttl->columnWidth = 50;
foreach($this->wire()->fields as $field) {
$fieldtype = $field->type;
if($fieldtype instanceof FieldtypeTextarea) {
$f2->addOption($field->name);
$f4->addOption($field->name);
} else if($fieldtype instanceof FieldtypeText) {
$f1->addOption($field->name);
$f2->addOption($field->name);
} else if($fieldtype instanceof FieldtypeDatetime) {
$f3->addOption($field->name);
}
}
$inputfields->add($f1);
$inputfields->add($f3);
$inputfields->add($f2);
$inputfields->add($f2a);
$inputfields->add($f4);
$inputfields->add($ttl);
return $form;
}
}