2011-08-28 17:29:01 +02:00
< ? php
2012-08-27 18:04:00 +02:00
/* Copyright ( C ) 2011 - 2012 Laurent Destailleur < eldy @ users . sourceforge . net >
2011-08-28 17:29:01 +02:00
*
2021-05-11 23:49:33 +02:00
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation ; either version 3 of the License , or
* ( at your option ) any later version .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program . If not , see < https :// www . gnu . org / licenses />.
*/
2011-08-28 17:29:01 +02:00
/**
* \file htdocs / core / class / rssparser . class . php
* \ingroup core
2012-08-27 18:04:00 +02:00
* \brief File of class to parse RSS feeds
*/
/**
* Class to parse RSS files
2011-08-28 17:29:01 +02:00
*/
class RssParser
{
2020-04-14 10:45:52 +02:00
/**
* @ var DoliDB Database handler .
*/
public $db ;
/**
* @ var string Error code ( or message )
*/
public $error = '' ;
private $_format = '' ;
private $_urlRSS ;
private $_language ;
private $_generator ;
private $_copyright ;
private $_lastbuilddate ;
private $_imageurl ;
private $_link ;
private $_title ;
private $_description ;
private $_lastfetchdate ; // Last successful fetch
private $_rssarray = array ();
// For parsing with xmlparser
public $stack = array (); // parser stack
private $_CONTENT_CONSTRUCTS = array ( 'content' , 'summary' , 'info' , 'title' , 'tagline' , 'copyright' );
/**
* Constructor
*
* @ param DoliDB $db Database handler
*/
public function __construct ( $db )
{
$this -> db = $db ;
}
/**
* getFormat
*
* @ return string
*/
public function getFormat ()
{
return $this -> _format ;
}
/**
* getUrlRss
*
* @ return string
*/
public function getUrlRss ()
{
return $this -> _urlRSS ;
}
/**
* getLanguage
*
* @ return string
*/
public function getLanguage ()
{
return $this -> _language ;
}
/**
* getGenerator
*
* @ return string
*/
public function getGenerator ()
{
return $this -> _generator ;
}
/**
* getCopyright
*
* @ return string
*/
public function getCopyright ()
{
return $this -> _copyright ;
}
/**
* getLastBuildDate
*
* @ return string
*/
public function getLastBuildDate ()
{
return $this -> _lastbuilddate ;
}
/**
* getImageUrl
*
* @ return string
*/
public function getImageUrl ()
{
return $this -> _imageurl ;
}
/**
* getLink
*
* @ return string
*/
public function getLink ()
{
return $this -> _link ;
}
/**
* getTitle
*
* @ return string
*/
public function getTitle ()
{
return $this -> _title ;
}
/**
* getDescription
*
* @ return string
*/
public function getDescription ()
{
return $this -> _description ;
}
/**
* getLastFetchDate
*
* @ return string
*/
public function getLastFetchDate ()
{
return $this -> _lastfetchdate ;
}
/**
* getItems
*
* @ return string
*/
public function getItems ()
{
return $this -> _rssarray ;
}
/**
* Parse rss URL
*
* @ param string $urlRSS Url to parse
* @ param int $maxNb Max nb of records to get ( 0 for no limit )
* @ param int $cachedelay 0 = No cache , nb of seconds we accept cache files ( cachedir must also be defined )
2021-03-25 19:05:01 +01:00
* @ param string $cachedir Directory where to save cache file ( For example $conf -> externalrss -> dir_temp )
2020-04-14 10:45:52 +02:00
* @ return int < 0 if KO , > 0 if OK
*/
public function parser ( $urlRSS , $maxNb = 0 , $cachedelay = 60 , $cachedir = '' )
{
global $conf ;
include_once DOL_DOCUMENT_ROOT . '/core/lib/files.lib.php' ;
2021-03-25 19:05:01 +01:00
include_once DOL_DOCUMENT_ROOT . '/core/lib/geturl.lib.php' ;
2020-04-14 10:45:52 +02:00
$rss = '' ;
$str = '' ; // This will contain content of feed
// Check parameters
2021-02-23 22:03:23 +01:00
if ( ! dol_is_url ( $urlRSS )) {
2020-04-14 10:45:52 +02:00
$this -> error = " ErrorBadUrl " ;
return - 1 ;
}
$this -> _urlRSS = $urlRSS ;
$newpathofdestfile = $cachedir . '/' . dol_hash ( $this -> _urlRSS , 3 ); // Force md5 hash (does not contains special chars)
$newmask = '0644' ;
//dol_syslog("RssPArser::parser parse url=".$urlRSS." => cache file=".$newpathofdestfile);
$nowgmt = dol_now ();
// Search into cache
$foundintocache = 0 ;
2021-02-23 22:03:23 +01:00
if ( $cachedelay > 0 && $cachedir ) {
2020-04-14 10:45:52 +02:00
$filedate = dol_filemtime ( $newpathofdestfile );
2021-02-23 22:03:23 +01:00
if ( $filedate >= ( $nowgmt - $cachedelay )) {
2020-04-14 10:45:52 +02:00
//dol_syslog("RssParser::parser cache file ".$newpathofdestfile." is not older than now - cachedelay (".$nowgmt." - ".$cachedelay.") so we use it.");
$foundintocache = 1 ;
$this -> _lastfetchdate = $filedate ;
2020-05-21 15:05:19 +02:00
} else {
2020-04-14 10:45:52 +02:00
dol_syslog ( get_class ( $this ) . " ::parser cache file " . $newpathofdestfile . " is not found or older than now - cachedelay ( " . $nowgmt . " - " . $cachedelay . " ) so we can't use it. " );
}
}
// Load file into $str
2021-02-23 22:03:23 +01:00
if ( $foundintocache ) { // Cache file found and is not too old
2020-04-14 10:45:52 +02:00
$str = file_get_contents ( $newpathofdestfile );
2020-05-21 15:05:19 +02:00
} else {
2020-04-14 10:45:52 +02:00
try {
2021-03-25 19:05:01 +01:00
$result = getURLContent ( $this -> _urlRSS , 'GET' , '' , 1 , array (), array ( 'http' , 'https' ), 0 );
2022-04-24 23:02:53 +02:00
2021-03-25 19:05:01 +01:00
if ( ! empty ( $result [ 'content' ])) {
$str = $result [ 'content' ];
2022-04-28 19:23:13 +02:00
} elseif ( ! empty ( $result [ 'curl_error_msg' ])) {
2022-04-24 23:02:53 +02:00
$this -> error = 'Error retrieving URL ' . $this -> _urlRSS . ' - ' . $result [ 'curl_error_msg' ];
return - 1 ;
2021-02-23 22:03:23 +01:00
}
2020-05-21 15:05:19 +02:00
} catch ( Exception $e ) {
2022-04-24 23:02:53 +02:00
$this -> error = 'Error retrieving URL ' . $this -> _urlRSS . ' - ' . $e -> getMessage ();
return - 2 ;
2020-04-14 10:45:52 +02:00
}
}
2021-02-23 22:03:23 +01:00
if ( $str !== false ) {
2020-04-14 10:45:52 +02:00
// Convert $str into xml
2021-02-23 22:03:23 +01:00
if ( ! empty ( $conf -> global -> EXTERNALRSS_USE_SIMPLEXML )) {
2020-04-14 10:45:52 +02:00
//print 'xx'.LIBXML_NOCDATA;
libxml_use_internal_errors ( false );
2021-09-30 20:00:48 +02:00
$rss = simplexml_load_string ( $str , " SimpleXMLElement " , LIBXML_NOCDATA | LIBXML_NOCDATA );
2020-05-21 15:05:19 +02:00
} else {
2020-10-31 14:32:18 +01:00
if ( ! function_exists ( 'xml_parser_create' )) {
2020-06-22 01:02:17 +02:00
$this -> error = 'Function xml_parser_create are not supported by your PHP' ;
return - 1 ;
}
2020-04-14 10:45:52 +02:00
$xmlparser = xml_parser_create ( '' );
2022-04-24 23:02:53 +02:00
if ( ! is_resource ( $xmlparser ) && ! is_object ( $xmlparser )) {
2020-06-22 01:02:17 +02:00
$this -> error = " ErrorFailedToCreateParser " ;
return - 1 ;
2020-04-14 10:45:52 +02:00
}
xml_set_object ( $xmlparser , $this );
xml_set_element_handler ( $xmlparser , 'feed_start_element' , 'feed_end_element' );
xml_set_character_data_handler ( $xmlparser , 'feed_cdata' );
2022-04-24 23:02:53 +02:00
2020-04-14 10:45:52 +02:00
$status = xml_parse ( $xmlparser , $str );
xml_parser_free ( $xmlparser );
$rss = $this ;
2022-04-24 23:02:53 +02:00
//var_dump($status.' '.$rss->_format);exit;
2020-04-14 10:45:52 +02:00
}
}
// If $rss loaded
2021-02-23 22:03:23 +01:00
if ( $rss ) {
2020-04-14 10:45:52 +02:00
// Save file into cache
2021-02-23 22:03:23 +01:00
if ( empty ( $foundintocache ) && $cachedir ) {
2020-04-14 10:45:52 +02:00
dol_syslog ( get_class ( $this ) . " ::parser cache file " . $newpathofdestfile . " is saved onto disk. " );
2021-02-23 22:03:23 +01:00
if ( ! dol_is_dir ( $cachedir )) {
dol_mkdir ( $cachedir );
}
2020-04-14 10:45:52 +02:00
$fp = fopen ( $newpathofdestfile , 'w' );
2021-02-23 22:03:23 +01:00
if ( $fp ) {
2020-04-14 10:45:52 +02:00
fwrite ( $fp , $str );
fclose ( $fp );
2021-02-23 22:03:23 +01:00
if ( ! empty ( $conf -> global -> MAIN_UMASK )) {
$newmask = $conf -> global -> MAIN_UMASK ;
}
2020-04-14 10:45:52 +02:00
@ chmod ( $newpathofdestfile , octdec ( $newmask ));
$this -> _lastfetchdate = $nowgmt ;
2020-05-21 15:05:19 +02:00
} else {
2020-04-14 10:45:52 +02:00
print 'Error, failed to open file ' . $newpathofdestfile . ' for write' ;
}
}
unset ( $str ); // Free memory
2021-02-23 22:03:23 +01:00
if ( empty ( $rss -> _format )) { // If format not detected automatically
2020-04-14 10:45:52 +02:00
$rss -> _format = 'rss' ;
2021-02-23 22:03:23 +01:00
if ( empty ( $rss -> channel )) {
$rss -> _format = 'atom' ;
}
2020-04-14 10:45:52 +02:00
}
$items = array ();
// Save description entries
2020-11-19 22:03:13 +01:00
if ( $rss -> _format == 'rss' ) {
2020-04-14 10:45:52 +02:00
//var_dump($rss);
2020-11-19 22:03:13 +01:00
if ( ! empty ( $conf -> global -> EXTERNALRSS_USE_SIMPLEXML )) {
2021-02-23 22:03:23 +01:00
if ( ! empty ( $rss -> channel -> language )) {
$this -> _language = ( string ) $rss -> channel -> language ;
}
if ( ! empty ( $rss -> channel -> generator )) {
$this -> _generator = ( string ) $rss -> channel -> generator ;
}
if ( ! empty ( $rss -> channel -> copyright )) {
$this -> _copyright = ( string ) $rss -> channel -> copyright ;
}
if ( ! empty ( $rss -> channel -> lastbuilddate )) {
$this -> _lastbuilddate = ( string ) $rss -> channel -> lastbuilddate ;
}
if ( ! empty ( $rss -> channel -> image -> url [ 0 ])) {
$this -> _imageurl = ( string ) $rss -> channel -> image -> url [ 0 ];
}
if ( ! empty ( $rss -> channel -> link )) {
$this -> _link = ( string ) $rss -> channel -> link ;
}
if ( ! empty ( $rss -> channel -> title )) {
$this -> _title = ( string ) $rss -> channel -> title ;
}
if ( ! empty ( $rss -> channel -> description )) {
$this -> _description = ( string ) $rss -> channel -> description ;
}
2020-05-21 15:05:19 +02:00
} else {
2020-04-14 10:45:52 +02:00
//var_dump($rss->channel);
2021-02-23 22:03:23 +01:00
if ( ! empty ( $rss -> channel [ 'language' ])) {
$this -> _language = ( string ) $rss -> channel [ 'language' ];
}
if ( ! empty ( $rss -> channel [ 'generator' ])) {
$this -> _generator = ( string ) $rss -> channel [ 'generator' ];
}
if ( ! empty ( $rss -> channel [ 'copyright' ])) {
$this -> _copyright = ( string ) $rss -> channel [ 'copyright' ];
}
if ( ! empty ( $rss -> channel [ 'lastbuilddate' ])) {
$this -> _lastbuilddate = ( string ) $rss -> channel [ 'lastbuilddate' ];
}
if ( ! empty ( $rss -> image [ 'url' ])) {
$this -> _imageurl = ( string ) $rss -> image [ 'url' ];
}
if ( ! empty ( $rss -> channel [ 'link' ])) {
$this -> _link = ( string ) $rss -> channel [ 'link' ];
}
if ( ! empty ( $rss -> channel [ 'title' ])) {
$this -> _title = ( string ) $rss -> channel [ 'title' ];
}
if ( ! empty ( $rss -> channel [ 'description' ])) {
$this -> _description = ( string ) $rss -> channel [ 'description' ];
}
2020-04-14 10:45:52 +02:00
}
2021-02-23 22:03:23 +01:00
if ( ! empty ( $conf -> global -> EXTERNALRSS_USE_SIMPLEXML )) {
$items = $rss -> channel -> item ; // With simplexml
} else {
$items = $rss -> items ; // With xmlparse
}
2020-04-14 10:45:52 +02:00
//var_dump($items);exit;
2020-11-19 22:03:13 +01:00
} elseif ( $rss -> _format == 'atom' ) {
2020-04-14 10:45:52 +02:00
//var_dump($rss);
2021-02-23 22:03:23 +01:00
if ( ! empty ( $conf -> global -> EXTERNALRSS_USE_SIMPLEXML )) {
if ( ! empty ( $rss -> generator )) {
$this -> _generator = ( string ) $rss -> generator ;
}
if ( ! empty ( $rss -> lastbuilddate )) {
$this -> _lastbuilddate = ( string ) $rss -> modified ;
}
if ( ! empty ( $rss -> link -> href )) {
$this -> _link = ( string ) $rss -> link -> href ;
}
if ( ! empty ( $rss -> title )) {
$this -> _title = ( string ) $rss -> title ;
}
if ( ! empty ( $rss -> description )) {
$this -> _description = ( string ) $rss -> description ;
}
2020-05-21 15:05:19 +02:00
} else {
2020-04-14 10:45:52 +02:00
//if (!empty($rss->channel['rss_language'])) $this->_language = (string) $rss->channel['rss_language'];
2021-02-23 22:03:23 +01:00
if ( ! empty ( $rss -> channel [ 'generator' ])) {
$this -> _generator = ( string ) $rss -> channel [ 'generator' ];
}
2020-04-14 10:45:52 +02:00
//if (!empty($rss->channel['rss_copyright'])) $this->_copyright = (string) $rss->channel['rss_copyright'];
2021-02-23 22:03:23 +01:00
if ( ! empty ( $rss -> channel [ 'modified' ])) {
$this -> _lastbuilddate = ( string ) $rss -> channel [ 'modified' ];
}
2020-04-14 10:45:52 +02:00
//if (!empty($rss->image['rss_url'])) $this->_imageurl = (string) $rss->image['rss_url'];
2021-02-23 22:03:23 +01:00
if ( ! empty ( $rss -> channel [ 'link' ])) {
$this -> _link = ( string ) $rss -> channel [ 'link' ];
}
if ( ! empty ( $rss -> channel [ 'title' ])) {
$this -> _title = ( string ) $rss -> channel [ 'title' ];
}
2020-04-14 10:45:52 +02:00
//if (!empty($rss->channel['rss_description'])) $this->_description = (string) $rss->channel['rss_description'];
2020-09-20 16:06:53 +02:00
if ( ! empty ( $rss -> channel )) {
2020-09-19 00:51:09 +02:00
$this -> _imageurl = $this -> getAtomImageUrl ( $rss -> channel );
}
2020-04-14 10:45:52 +02:00
}
if ( ! empty ( $conf -> global -> EXTERNALRSS_USE_SIMPLEXML )) {
2021-03-01 20:37:16 +01:00
$tmprss = xml2php ( $rss );
$items = $tmprss [ 'entry' ];
} else {
// With simplexml
2021-02-23 22:03:23 +01:00
$items = $rss -> items ; // With xmlparse
}
2020-04-14 10:45:52 +02:00
//var_dump($items);exit;
}
$i = 0 ;
// Loop on each record
2020-11-19 22:03:13 +01:00
if ( is_array ( $items )) {
foreach ( $items as $item ) {
2020-04-14 10:45:52 +02:00
//var_dump($item);exit;
2020-11-19 22:03:13 +01:00
if ( $rss -> _format == 'rss' ) {
if ( ! empty ( $conf -> global -> EXTERNALRSS_USE_SIMPLEXML )) {
2020-04-14 10:45:52 +02:00
$itemLink = ( string ) $item -> link ;
$itemTitle = ( string ) $item -> title ;
$itemDescription = ( string ) $item -> description ;
$itemPubDate = ( string ) $item -> pubDate ;
$itemId = '' ;
$itemAuthor = '' ;
2020-05-21 15:05:19 +02:00
} else {
2020-04-14 10:45:52 +02:00
$itemLink = ( string ) $item [ 'link' ];
$itemTitle = ( string ) $item [ 'title' ];
$itemDescription = ( string ) $item [ 'description' ];
$itemPubDate = ( string ) $item [ 'pubdate' ];
$itemId = ( string ) $item [ 'guid' ];
$itemAuthor = ( string ) $item [ 'author' ];
}
// Loop on each category
$itemCategory = array ();
2020-11-19 22:03:13 +01:00
if ( is_array ( $item -> category )) {
foreach ( $item -> category as $cat ) {
2020-04-14 10:45:52 +02:00
$itemCategory [] = ( string ) $cat ;
}
}
2020-11-19 22:03:13 +01:00
} elseif ( $rss -> _format == 'atom' ) {
if ( ! empty ( $conf -> global -> EXTERNALRSS_USE_SIMPLEXML )) {
2020-04-14 10:45:52 +02:00
$itemLink = ( isset ( $item [ 'link' ]) ? ( string ) $item [ 'link' ] : '' );
$itemTitle = ( string ) $item [ 'title' ];
2020-04-16 10:49:39 +02:00
$itemDescription = $this -> getAtomItemDescription ( $item );
2020-04-14 10:45:52 +02:00
$itemPubDate = ( string ) $item [ 'created' ];
$itemId = ( string ) $item [ 'id' ];
$itemAuthor = ( string ) ( $item [ 'author' ] ? $item [ 'author' ] : $item [ 'author_name' ]);
2020-05-21 15:05:19 +02:00
} else {
2020-04-14 10:45:52 +02:00
$itemLink = ( isset ( $item [ 'link' ]) ? ( string ) $item [ 'link' ] : '' );
$itemTitle = ( string ) $item [ 'title' ];
2020-04-16 10:49:39 +02:00
$itemDescription = $this -> getAtomItemDescription ( $item );
2020-04-14 10:45:52 +02:00
$itemPubDate = ( string ) $item [ 'created' ];
$itemId = ( string ) $item [ 'id' ];
$itemAuthor = ( string ) ( $item [ 'author' ] ? $item [ 'author' ] : $item [ 'author_name' ]);
}
2020-11-19 22:03:13 +01:00
$itemCategory = array ();
} else {
$itemCategory = array ();
$itemLink = '' ;
$itemTitle = '' ;
$itemDescription = '' ;
$itemPubDate = '' ;
$itemId = '' ;
$itemAuthor = '' ;
print 'ErrorBadFeedFormat' ;
}
2020-04-14 10:45:52 +02:00
// Add record to result array
$this -> _rssarray [ $i ] = array (
'link' => $itemLink ,
'title' => $itemTitle ,
'description' => $itemDescription ,
'pubDate' => $itemPubDate ,
'category' => $itemCategory ,
'id' => $itemId ,
2020-11-19 22:03:13 +01:00
'author' => $itemAuthor
);
2020-04-14 10:45:52 +02:00
//var_dump($this->_rssarray);
$i ++ ;
2021-02-23 22:03:23 +01:00
if ( $i > $maxNb ) {
break ; // We get all records we want
}
2020-04-14 10:45:52 +02:00
}
}
return 1 ;
2020-05-21 15:05:19 +02:00
} else {
2020-04-14 10:45:52 +02:00
$this -> error = 'ErrorFailedToLoadRSSFile' ;
return - 1 ;
}
}
// phpcs:disable PEAR.NamingConventions.ValidFunctionName.ScopeNotCamelCaps
/**
* Triggered when opened tag is found
*
* @ param string $p Start
* @ param string $element Tag
* @ param array $attrs Attributes of tags
* @ return void
*/
public function feed_start_element ( $p , $element , & $attrs )
{
// phpcs:enable
$el = $element = strtolower ( $element );
$attrs = array_change_key_case ( $attrs , CASE_LOWER );
// check for a namespace, and split if found
$ns = false ;
2021-02-23 22:03:23 +01:00
if ( strpos ( $element , ':' )) {
2020-04-14 10:45:52 +02:00
list ( $ns , $el ) = explode ( ':' , $element , 2 );
}
2021-02-23 22:03:23 +01:00
if ( $ns and $ns != 'rdf' ) {
2020-04-14 10:45:52 +02:00
$this -> current_namespace = $ns ;
}
// if feed type isn't set, then this is first element of feed identify feed from root element
2021-02-23 22:03:23 +01:00
if ( empty ( $this -> _format )) {
2020-04-14 10:45:52 +02:00
if ( $el == 'rdf' ) {
$this -> _format = 'rss' ;
$this -> feed_version = '1.0' ;
2020-05-21 15:05:19 +02:00
} elseif ( $el == 'rss' ) {
2020-04-14 10:45:52 +02:00
$this -> _format = 'rss' ;
$this -> feed_version = $attrs [ 'version' ];
2020-05-21 15:05:19 +02:00
} elseif ( $el == 'feed' ) {
2020-04-14 10:45:52 +02:00
$this -> _format = 'atom' ;
$this -> feed_version = $attrs [ 'version' ];
$this -> inchannel = true ;
}
return ;
}
2021-02-23 22:03:23 +01:00
if ( $el == 'channel' ) {
2020-04-14 10:45:52 +02:00
$this -> inchannel = true ;
2021-05-17 07:57:32 +02:00
} elseif ( $el == 'item' || $el == 'entry' ) {
2020-04-14 10:45:52 +02:00
$this -> initem = true ;
if ( isset ( $attrs [ 'rdf:about' ])) {
$this -> current_item [ 'about' ] = $attrs [ 'rdf:about' ];
}
2021-05-17 07:57:32 +02:00
} elseif ( $this -> _format == 'rss' && $this -> current_namespace == '' && $el == 'textinput' ) {
2021-03-01 20:37:16 +01:00
// if we're in the default namespace of an RSS feed,
// record textinput or image fields
2020-04-14 10:45:52 +02:00
$this -> intextinput = true ;
2021-05-17 07:57:32 +02:00
} elseif ( $this -> _format == 'rss' && $this -> current_namespace == '' && $el == 'image' ) {
2020-04-14 10:45:52 +02:00
$this -> inimage = true ;
2021-05-17 07:57:32 +02:00
} elseif ( $this -> _format == 'atom' && in_array ( $el , $this -> _CONTENT_CONSTRUCTS )) {
2021-03-01 20:37:16 +01:00
// handle atom content constructs
2020-04-14 10:45:52 +02:00
// avoid clashing w/ RSS mod_content
if ( $el == 'content' ) {
$el = 'atom_content' ;
}
$this -> incontent = $el ;
2021-05-17 07:57:32 +02:00
} elseif ( $this -> _format == 'atom' && $this -> incontent ) {
2021-03-01 20:37:16 +01:00
// if inside an Atom content construct (e.g. content or summary) field treat tags as text
2020-04-14 10:45:52 +02:00
// if tags are inlined, then flatten
$attrs_str = join ( ' ' , array_map ( 'map_attrs' , array_keys ( $attrs ), array_values ( $attrs )));
$this -> append_content ( " < $element $attrs_str > " );
array_unshift ( $this -> stack , $el );
2021-05-17 07:57:32 +02:00
} elseif ( $this -> _format == 'atom' && $el == 'link' ) {
2021-03-01 20:37:16 +01:00
// Atom support many links per containging element.
// Magpie treats link elements of type rel='alternate'
// as being equivalent to RSS's simple link element.
2021-02-23 22:03:23 +01:00
if ( isset ( $attrs [ 'rel' ]) && $attrs [ 'rel' ] == 'alternate' ) {
2020-04-14 10:45:52 +02:00
$link_el = 'link' ;
2021-02-23 22:03:23 +01:00
} elseif ( ! isset ( $attrs [ 'rel' ])) {
2020-04-14 10:45:52 +02:00
$link_el = 'link' ;
2020-05-21 15:05:19 +02:00
} else {
2020-04-14 10:45:52 +02:00
$link_el = 'link_' . $attrs [ 'rel' ];
}
$this -> append ( $link_el , $attrs [ 'href' ]);
2021-03-01 20:37:16 +01:00
} else {
// set stack[0] to current element
2020-04-14 10:45:52 +02:00
array_unshift ( $this -> stack , $el );
}
}
// phpcs:disable PEAR.NamingConventions.ValidFunctionName.ScopeNotCamelCaps
/**
* Triggered when CDATA is found
*
* @ param string $p P
* @ param string $text Tag
* @ return void
*/
public function feed_cdata ( $p , $text )
{
// phpcs:enable
2021-02-23 22:03:23 +01:00
if ( $this -> _format == 'atom' and $this -> incontent ) {
2020-04-14 10:45:52 +02:00
$this -> append_content ( $text );
2020-05-21 15:05:19 +02:00
} else {
2020-04-14 10:45:52 +02:00
$current_el = join ( '_' , array_reverse ( $this -> stack ));
$this -> append ( $current_el , $text );
}
}
// phpcs:disable PEAR.NamingConventions.ValidFunctionName.ScopeNotCamelCaps
/**
* Triggered when closed tag is found
*
* @ param string $p P
* @ param string $el Tag
* @ return void
*/
public function feed_end_element ( $p , $el )
{
// phpcs:enable
$el = strtolower ( $el );
2021-02-23 22:03:23 +01:00
if ( $el == 'item' or $el == 'entry' ) {
2020-04-14 10:45:52 +02:00
$this -> items [] = $this -> current_item ;
$this -> current_item = array ();
$this -> initem = false ;
2021-02-23 22:03:23 +01:00
} elseif ( $this -> _format == 'rss' and $this -> current_namespace == '' and $el == 'textinput' ) {
2020-04-14 10:45:52 +02:00
$this -> intextinput = false ;
2021-02-23 22:03:23 +01:00
} elseif ( $this -> _format == 'rss' and $this -> current_namespace == '' and $el == 'image' ) {
2020-04-14 10:45:52 +02:00
$this -> inimage = false ;
2021-02-23 22:03:23 +01:00
} elseif ( $this -> _format == 'atom' and in_array ( $el , $this -> _CONTENT_CONSTRUCTS )) {
2020-04-14 10:45:52 +02:00
$this -> incontent = false ;
2021-02-23 22:03:23 +01:00
} elseif ( $el == 'channel' or $el == 'feed' ) {
2020-04-14 10:45:52 +02:00
$this -> inchannel = false ;
2020-05-21 15:05:19 +02:00
} elseif ( $this -> _format == 'atom' and $this -> incontent ) {
2020-04-14 10:45:52 +02:00
// balance tags properly
// note: i don't think this is actually neccessary
2021-02-23 22:03:23 +01:00
if ( $this -> stack [ 0 ] == $el ) {
2020-04-14 10:45:52 +02:00
$this -> append_content ( " </ $el > " );
2020-05-21 15:05:19 +02:00
} else {
2020-04-14 10:45:52 +02:00
$this -> append_content ( " < $el /> " );
}
array_shift ( $this -> stack );
2020-05-21 15:05:19 +02:00
} else {
2020-04-14 10:45:52 +02:00
array_shift ( $this -> stack );
}
$this -> current_namespace = false ;
}
/**
* To concat 2 string with no warning if an operand is not defined
*
* @ param string $str1 Str1
* @ param string $str2 Str2
* @ return string String cancatenated
*/
public function concat ( & $str1 , $str2 = " " )
{
if ( ! isset ( $str1 )) {
$str1 = " " ;
}
$str1 .= $str2 ;
}
// phpcs:disable PEAR.NamingConventions.ValidFunctionName.ScopeNotCamelCaps
/**
* Enter description here ...
*
* @ param string $text Text
* @ return void
*/
public function append_content ( $text )
{
// phpcs:enable
if ( $this -> initem ) {
$this -> concat ( $this -> current_item [ $this -> incontent ], $text );
2020-05-21 15:05:19 +02:00
} elseif ( $this -> inchannel ) {
2020-04-14 10:45:52 +02:00
$this -> concat ( $this -> channel [ $this -> incontent ], $text );
}
}
/**
* smart append - field and namespace aware
*
* @ param string $el El
* @ param string $text Text
* @ return void
*/
public function append ( $el , $text )
{
if ( ! $el ) {
return ;
}
2021-02-23 22:03:23 +01:00
if ( $this -> current_namespace ) {
2020-04-14 10:45:52 +02:00
if ( $this -> initem ) {
$this -> concat ( $this -> current_item [ $this -> current_namespace ][ $el ], $text );
2020-05-21 15:05:19 +02:00
} elseif ( $this -> inchannel ) {
2020-04-14 10:45:52 +02:00
$this -> concat ( $this -> channel [ $this -> current_namespace ][ $el ], $text );
2020-05-21 15:05:19 +02:00
} elseif ( $this -> intextinput ) {
2020-04-14 10:45:52 +02:00
$this -> concat ( $this -> textinput [ $this -> current_namespace ][ $el ], $text );
2020-05-21 15:05:19 +02:00
} elseif ( $this -> inimage ) {
2020-04-14 10:45:52 +02:00
$this -> concat ( $this -> image [ $this -> current_namespace ][ $el ], $text );
}
2020-05-21 15:05:19 +02:00
} else {
2020-04-14 10:45:52 +02:00
if ( $this -> initem ) {
$this -> concat ( $this -> current_item [ $el ], $text );
2020-05-21 15:05:19 +02:00
} elseif ( $this -> intextinput ) {
2020-04-14 10:45:52 +02:00
$this -> concat ( $this -> textinput [ $el ], $text );
2020-05-21 15:05:19 +02:00
} elseif ( $this -> inimage ) {
2020-04-14 10:45:52 +02:00
$this -> concat ( $this -> image [ $el ], $text );
2020-05-21 15:05:19 +02:00
} elseif ( $this -> inchannel ) {
2020-04-14 10:45:52 +02:00
$this -> concat ( $this -> channel [ $el ], $text );
}
}
}
2020-04-16 10:49:39 +02:00
/**
* Return a description / summary for one item from a ATOM feed
*
* @ param array $item A parsed item of a ATOM feed
* @ param int $maxlength ( optional ) The maximum length for the description
* @ return string A summary description
*/
private function getAtomItemDescription ( array $item , $maxlength = 500 )
{
$result = " " ;
2021-02-23 22:03:23 +01:00
if ( isset ( $item [ 'summary' ])) {
2020-04-16 10:49:39 +02:00
$result = $item [ 'summary' ];
2021-02-23 22:03:23 +01:00
} elseif ( isset ( $item [ 'atom_content' ])) {
2020-04-16 10:49:39 +02:00
$result = $item [ 'atom_content' ];
}
// remove all HTML elements that can possible break the maximum size of a tooltip,
// like headings, image, video etc. and allow only simple style elements
$result = strip_tags ( $result , " <br><p><ul><ol><li> " );
$result = str_replace ( " \n " , " " , $result );
2021-02-23 22:03:23 +01:00
if ( strlen ( $result ) > $maxlength ) {
2020-04-16 10:49:39 +02:00
$result = substr ( $result , 0 , $maxlength );
$result .= " ... " ;
}
return $result ;
}
/**
* Return a URL to a image of the given ATOM feed
*
* @ param array $feed The ATOM feed that possible contain a link to a logo or icon
* @ return string A URL to a image from a ATOM feed when found , otherwise a empty string
*/
private function getAtomImageUrl ( array $feed )
{
2021-02-23 22:03:23 +01:00
if ( isset ( $feed [ 'icon' ])) {
2020-04-16 10:49:39 +02:00
return $feed [ 'logo' ];
}
2021-02-23 22:03:23 +01:00
if ( isset ( $feed [ 'icon' ])) {
2020-04-16 10:49:39 +02:00
return $feed [ 'logo' ];
}
2021-02-23 22:03:23 +01:00
if ( isset ( $feed [ 'webfeeds:logo' ])) {
2020-04-16 10:49:39 +02:00
return $feed [ 'webfeeds:logo' ];
}
2021-02-23 22:03:23 +01:00
if ( isset ( $feed [ 'webfeeds:icon' ])) {
2020-04-16 10:49:39 +02:00
return $feed [ 'webfeeds:icon' ];
}
2021-02-23 22:03:23 +01:00
if ( isset ( $feed [ 'webfeeds:wordmark' ])) {
2020-04-16 10:49:39 +02:00
return $feed [ 'webfeeds:wordmark' ];
}
return " " ;
}
2011-08-28 17:29:01 +02:00
}
/**
* Function to convert an XML object into an array
2012-01-10 01:31:06 +01:00
*
2014-04-23 15:11:26 +02:00
* @ param SimpleXMLElement $xml Xml
2012-01-10 01:31:06 +01:00
* @ return void
2011-08-28 17:29:01 +02:00
*/
function xml2php ( $xml )
{
2020-04-14 10:45:52 +02:00
$fils = 0 ;
$tab = false ;
$array = array ();
2021-02-23 22:03:23 +01:00
foreach ( $xml -> children () as $key => $value ) {
2020-04-14 10:45:52 +02:00
$child = xml2php ( $value );
//To deal with the attributes
2021-02-23 22:03:23 +01:00
foreach ( $value -> attributes () as $ak => $av ) {
2020-04-14 10:45:52 +02:00
$child [ $ak ] = ( string ) $av ;
}
//Let see if the new child is not in the array
2021-02-23 22:03:23 +01:00
if ( $tab === false && in_array ( $key , array_keys ( $array ))) {
2020-04-14 10:45:52 +02:00
//If this element is already in the array we will create an indexed array
$tmp = $array [ $key ];
$array [ $key ] = null ;
$array [ $key ][] = $tmp ;
$array [ $key ][] = $child ;
$tab = true ;
2021-02-23 22:03:23 +01:00
} elseif ( $tab === true ) {
2020-04-14 10:45:52 +02:00
//Add an element in an existing array
$array [ $key ][] = $child ;
2020-05-21 15:05:19 +02:00
} else {
2020-04-14 10:45:52 +02:00
//Add a simple element
$array [ $key ] = $child ;
}
$fils ++ ;
}
2021-02-23 22:03:23 +01:00
if ( $fils == 0 ) {
2020-04-14 10:45:52 +02:00
return ( string ) $xml ;
}
return $array ;
}