2010-02-15 23:39:13 +01:00
< ? php
2017-04-29 01:01:12 +02:00
2010-02-15 23:39:13 +01:00
require 'Segment.php' ;
2017-04-29 01:01:12 +02:00
2021-07-05 23:07:56 +02:00
/**
* Class of ODT Exception
*/
2010-02-15 23:39:13 +01:00
class OdfException extends Exception
2017-04-29 01:01:12 +02:00
{
}
2023-10-01 17:24:25 +02:00
/**
* Class of ODT Exception
*/
class OdfExceptionSegmentNotFound extends Exception
{
}
2010-02-15 23:39:13 +01:00
/**
* Templating class for odt file
* You need PHP 5.2 at least
* You need Zip Extension or PclZip library
*
2017-04-29 01:01:12 +02:00
* @ copyright 2008 - Julien Pauli - Cyril PIERRE de GEYER - Anaska ( http :// www . anaska . com )
* @ copyright 2010 - 2015 - Laurent Destailleur - eldy @ users . sourceforge . net
* @ copyright 2010 - Vikas Mahajan - http :// vikasmahajan . wordpress . com
* @ copyright 2012 - Stephen Larroque - lrq3000 @ gmail . com
2019-09-23 21:55:30 +02:00
* @ license https :// www . gnu . org / copyleft / gpl . html GPL License
2015-09-05 00:43:13 +02:00
* @ version 1.5 . 0
2010-02-15 23:39:13 +01:00
*/
class Odf
{
2010-03-13 17:05:36 +01:00
protected $config = array (
2023-02-09 15:58:28 +01:00
'ZIP_PROXY' => 'PclZipProxy' , // PclZipProxy, PhpZipProxy
'DELIMITER_LEFT' => '{' ,
'DELIMITER_RIGHT' => '}' ,
'PATH_TO_TMP' => '/tmp'
2013-04-20 11:03:23 +02:00
);
2025-01-09 21:11:26 +01:00
/**
* @ var PclZipProxy | PhpZipProxy
*/
2013-04-20 11:03:23 +02:00
protected $file ;
2025-01-09 21:11:26 +01:00
/**
* @ var string To store content of content . xml file
*/
protected $contentXml ;
2025-01-09 21:18:21 +01:00
/**
* @ var string To store content of meta . xml file
*/
protected $metaXml ;
/**
* @ var string To store content of styles . xml file
*/
protected $stylesXml ;
/**
* @ var string To store content of META - INF / manifest . xml file
*/
protected $manifestXml ;
/**
* @ var string
*/
2013-04-20 11:03:23 +02:00
protected $tmpfile ;
2025-01-09 21:11:26 +01:00
/**
* @ var string
*/
2025-01-09 21:18:21 +01:00
protected $tmpdir = '' ;
2013-04-20 11:03:23 +02:00
protected $images = array ();
protected $vars = array ();
protected $segments = array ();
2017-10-10 18:54:30 +02:00
2025-01-09 21:11:26 +01:00
/**
* @ var string
*/
2015-09-05 00:43:13 +02:00
public $creator ;
2025-01-09 21:11:26 +01:00
/**
* @ var string
*/
2015-09-05 00:43:13 +02:00
public $title ;
2025-01-09 21:11:26 +01:00
/**
* @ var string
*/
2015-09-05 00:43:13 +02:00
public $subject ;
2025-01-09 21:18:21 +01:00
public $userdefined = array ();
2017-10-10 18:54:30 +02:00
2013-04-20 11:03:23 +02:00
const PIXEL_TO_CM = 0.026458333 ;
2025-01-09 11:05:45 +01:00
const FIND_TAGS_REGEX = '/<([A-Za-z0-9]+)(?:\s([A-Za-z]+(?:\-[A-Za-z]+)?(?:=(?:".*?")|(?:[0-9]+))))*(?:(?:\s\/>)|(?:>(((?!<\1(\s.*)?>).)*)<\/\1>))/s' ;
const FIND_ENCODED_TAGS_REGEX = '/<([A-Za-z]+)(?:\s([A-Za-z]+(?:\-[A-Za-z]+)?(?:=(?:".*?")|(?:[0-9]+))))*(?:(?:\s\/>)|(?:>(((?!<\1(\s.*)?>).)*)<\/\1>))/' ;
2022-11-16 11:12:34 +01:00
2017-10-10 18:54:30 +02:00
2013-04-20 11:03:23 +02:00
/**
* Class constructor
*
2017-04-29 01:01:12 +02:00
* @ param string $filename The name of the odt file
2025-01-09 21:18:21 +01:00
* @ param array $config Array of config data
2013-04-20 11:03:23 +02:00
* @ throws OdfException
*/
public function __construct ( $filename , $config = array ())
{
clearstatcache ();
if ( ! is_array ( $config )) {
throw new OdfException ( 'Configuration data must be provided as array' );
}
foreach ( $config as $configKey => $configValue ) {
if ( array_key_exists ( $configKey , $this -> config )) {
$this -> config [ $configKey ] = $configValue ;
2010-03-13 17:05:36 +01:00
}
2013-04-20 11:03:23 +02:00
}
2010-03-13 17:05:36 +01:00
2013-04-20 11:03:23 +02:00
$md5uniqid = md5 ( uniqid ());
2021-07-05 23:07:40 +02:00
if ( $this -> config [ 'PATH_TO_TMP' ]) $this -> tmpdir = preg_replace ( '|[\/]$|' , '' , $this -> config [ 'PATH_TO_TMP' ]); // Remove last \ or /
2013-04-20 11:03:23 +02:00
$this -> tmpdir .= ( $this -> tmpdir ? '/' : '' ) . $md5uniqid ;
$this -> tmpfile = $this -> tmpdir . '/' . $md5uniqid . '.odt' ; // We keep .odt extension to allow OpenOffice usage during debug.
2010-05-12 19:15:02 +02:00
2013-04-20 11:03:23 +02:00
// A working directory is required for some zip proxy like PclZipProxy
2021-07-05 23:07:40 +02:00
if ( in_array ( $this -> config [ 'ZIP_PROXY' ], array ( 'PclZipProxy' )) && ! is_dir ( $this -> config [ 'PATH_TO_TMP' ])) {
2013-04-20 11:03:23 +02:00
throw new OdfException ( 'Temporary directory ' . $this -> config [ 'PATH_TO_TMP' ] . ' must exists' );
}
2010-05-12 19:15:02 +02:00
2013-04-20 11:03:23 +02:00
// Create tmp direcoty (will be deleted in destructor)
if ( ! file_exists ( $this -> tmpdir )) {
2021-11-08 18:10:35 +01:00
$result = mkdir ( $this -> tmpdir );
2013-04-20 11:03:23 +02:00
}
2010-02-28 23:49:06 +01:00
2013-04-20 11:03:23 +02:00
// Load zip proxy
$zipHandler = $this -> config [ 'ZIP_PROXY' ];
2021-07-05 23:07:40 +02:00
if ( ! defined ( 'PCLZIP_TEMPORARY_DIR' )) define ( 'PCLZIP_TEMPORARY_DIR' , $this -> tmpdir );
include_once 'zip/' . $zipHandler . '.php' ;
2013-04-20 11:03:23 +02:00
if ( ! class_exists ( $this -> config [ 'ZIP_PROXY' ])) {
throw new OdfException ( $this -> config [ 'ZIP_PROXY' ] . ' class not found - check your php settings' );
}
$this -> file = new $zipHandler ( $this -> tmpdir );
2010-03-13 17:05:36 +01:00
2013-04-20 11:03:23 +02:00
if ( $this -> file -> open ( $filename ) !== true ) { // This also create the tmpdir directory
throw new OdfException ( " Error while Opening the file ' $filename ' - Check your odt filename " );
2010-03-13 17:05:36 +01:00
}
2013-04-20 11:03:23 +02:00
if (( $this -> contentXml = $this -> file -> getFromName ( 'content.xml' )) === false ) {
throw new OdfException ( " Nothing to parse - Check that the content.xml file is correctly formed in source file ' $filename ' " );
}
if (( $this -> manifestXml = $this -> file -> getFromName ( 'META-INF/manifest.xml' )) === false ) {
throw new OdfException ( " Something is wrong with META-INF/manifest.xml in source file ' $filename ' " );
}
2015-09-05 00:43:13 +02:00
if (( $this -> metaXml = $this -> file -> getFromName ( 'meta.xml' )) === false ) {
throw new OdfException ( " Nothing to parse - Check that the meta.xml file is correctly formed in source file ' $filename ' " );
}
2013-04-20 11:03:23 +02:00
if (( $this -> stylesXml = $this -> file -> getFromName ( 'styles.xml' )) === false ) {
throw new OdfException ( " Nothing to parse - Check that the styles.xml file is correctly formed in source file ' $filename ' " );
}
$this -> file -> close ();
//print "tmpdir=".$tmpdir;
//print "filename=".$filename;
//print "tmpfile=".$tmpfile;
copy ( $filename , $this -> tmpfile );
2017-10-10 18:54:30 +02:00
// Now file has been loaded, we must move the [!-- BEGIN and [!-- END tags outside the
2016-06-22 20:22:22 +02:00
// <table:table-row tag and clean bad lines tags.
2013-04-20 11:03:23 +02:00
$this -> _moveRowSegments ();
}
/**
2022-01-26 14:42:26 +01:00
* Assing a template variable into -> vars .
* For example , key is { object_date } and value is '2021-01-01'
2013-04-20 11:03:23 +02:00
*
2017-04-29 01:01:12 +02:00
* @ param string $key Name of the variable within the template
* @ param string $value Replacement value
* @ param bool $encode If true , special XML characters are encoded
2017-10-10 18:54:30 +02:00
* @ param string $charset Charset
2013-04-20 11:03:23 +02:00
* @ throws OdfException
* @ return odf
*/
public function setVars ( $key , $value , $encode = true , $charset = 'ISO-8859' )
{
$tag = $this -> config [ 'DELIMITER_LEFT' ] . $key . $this -> config [ 'DELIMITER_RIGHT' ];
2022-01-26 14:42:26 +01:00
2013-04-20 11:03:23 +02:00
// TODO Warning string may be:
// <text:span text:style-name="T13">{</text:span><text:span text:style-name="T12">aaa</text:span><text:span text:style-name="T13">}</text:span>
// instead of {aaa} so we should enhance this function.
//print $key.'-'.$value.'-'.strpos($this->contentXml, $this->config['DELIMITER_LEFT'] . $key . $this->config['DELIMITER_RIGHT']).'<br>';
2017-04-29 01:01:12 +02:00
if ( strpos ( $this -> contentXml , $tag ) === false && strpos ( $this -> stylesXml , $tag ) === false ) {
2021-01-26 16:10:27 +01:00
// Add the throw only for development. In most cases, it is normal to not having the key into the document (only few keys are presents).
//throw new OdfException("var $key not found in the document");
2021-02-09 22:02:48 +01:00
return $this ;
2013-04-11 18:38:50 +02:00
}
2014-05-07 19:13:02 +02:00
2020-09-29 12:17:25 +02:00
$this -> vars [ $tag ] = $this -> convertVarToOdf ( $value , $encode , $charset );
2020-10-15 19:36:08 +02:00
2020-09-29 12:17:25 +02:00
return $this ;
}
/**
2023-02-09 15:58:28 +01:00
* Replaces html tags found into the $value with ODT compatible tags and return the converted compatible string
2021-07-05 23:07:56 +02:00
*
2023-02-09 15:58:28 +01:00
* @ param string $value Replacement value
* @ param bool $encode If true , special XML characters are encoded
* @ param string $charset Charset
* @ return string String in ODTsyntax format
2021-07-05 23:07:40 +02:00
*/
2020-09-29 12:17:25 +02:00
public function convertVarToOdf ( $value , $encode = true , $charset = 'ISO-8859' )
{
2022-11-16 11:12:34 +01:00
$value = html_entity_decode ( $value , ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML401 );
2023-03-24 12:11:39 +01:00
// fix breaklines.
2023-03-26 15:22:12 +02:00
$value = preg_replace ( '/<br\s*\/?>/' , " <br /> " , $value );
2020-09-29 12:17:25 +02:00
$convertedValue = $value ;
2014-05-07 19:13:02 +02:00
2020-03-02 10:18:38 +01:00
// Check if the value includes html tags
2020-03-04 08:54:49 +01:00
if ( $this -> _hasHtmlTag ( $value ) === true ) {
2023-10-12 11:30:43 +02:00
$value = strip_tags ( $value , '<br><strong><b><i><em><u><s><sub><sup><span>' ); // remove html tags except the one into the list in second parameter
2022-11-16 11:12:34 +01:00
2020-03-04 08:54:49 +01:00
// Default styles for strong/b, i/em, u, s, sub & sup
$automaticStyles = array (
'<style:style style:name="boldText" style:family="text"><style:text-properties fo:font-weight="bold" style:font-weight-asian="bold" style:font-weight-complex="bold" /></style:style>' ,
'<style:style style:name="italicText" style:family="text"><style:text-properties fo:font-style="italic" style:font-style-asian="italic" style:font-style-complex="italic" /></style:style>' ,
'<style:style style:name="underlineText" style:family="text"><style:text-properties style:text-underline-style="solid" style:text-underline-width="auto" style:text-underline-color="font-color" /></style:style>' ,
'<style:style style:name="strikethroughText" style:family="text"><style:text-properties style:text-line-through-style="solid" style:text-line-through-type="single" /></style:style>' ,
'<style:style style:name="subText" style:family="text"><style:text-properties style:text-position="sub 58%" /></style:style>' ,
'<style:style style:name="supText" style:family="text"><style:text-properties style:text-position="super 58%" /></style:style>'
);
2020-10-15 19:36:08 +02:00
2021-08-08 12:57:00 +02:00
$customStyles = array ();
$fontDeclarations = array ();
2022-11-16 11:12:34 +01:00
$convertedValue = $this -> _replaceHtmlWithOdtTag ( $this -> _getDataFromHtml ( $value ), $customStyles , $fontDeclarations , $encode , $charset );
2020-10-15 19:36:08 +02:00
2020-03-04 08:54:49 +01:00
foreach ( $customStyles as $key => $val ) {
array_push ( $automaticStyles , '<style:style style:name="customStyle' . $key . '" style:family="text">' . $val . '</style:style>' );
}
2014-05-07 19:13:02 +02:00
2020-03-02 10:18:38 +01:00
// Join the styles and add them to the content xml
$styles = '' ;
2020-03-04 08:54:49 +01:00
foreach ( $automaticStyles as $style ) {
2020-03-02 10:18:38 +01:00
if ( strpos ( $this -> contentXml , $style ) === false ) {
$styles .= $style ;
}
}
$this -> contentXml = str_replace ( '</office:automatic-styles>' , $styles . '</office:automatic-styles>' , $this -> contentXml );
2020-03-04 08:54:49 +01:00
2020-03-03 10:19:05 +01:00
// Join the font declarations and add them to the content xml
$fonts = '' ;
2020-03-04 08:54:49 +01:00
foreach ( $fontDeclarations as $font ) {
2020-03-03 10:19:05 +01:00
if ( strpos ( $this -> contentXml , 'style:name="' . $font . '"' ) === false ) {
$fonts .= '<style:font-face style:name="' . $font . '" svg:font-family="\'' . $font . '\'" />' ;
}
}
$this -> contentXml = str_replace ( '</office:font-face-decls>' , $fonts . '</office:font-face-decls>' , $this -> contentXml );
2023-02-09 15:58:28 +01:00
} else {
2022-11-16 11:12:34 +01:00
$convertedValue = $this -> encode_chars ( $convertedValue , $encode , $charset );
2022-11-16 14:08:41 +01:00
$convertedValue = preg_replace ( '/(\r\n|\r|\n)/i' , " <text:line-break/> " , $convertedValue );
2023-02-09 15:58:28 +01:00
}
2020-09-29 12:17:25 +02:00
return $convertedValue ;
2013-04-20 11:03:23 +02:00
}
2020-03-04 08:54:49 +01:00
/**
2023-10-31 13:01:35 +01:00
* Replaces html tags in with odt tags and returns an odt string . Encodes and converts inner text .
2023-03-23 10:42:32 +01:00
* @ param array $tags An array with html tags generated by the getDataFromHtml () function
* @ param array $customStyles An array of style defenitions that should be included inside the odt file
* @ param array $fontDeclarations An array of font declarations that should be included inside the odt file
* @ param bool $encode If true , special XML characters are encoded
* @ param string $charset Charset . See encode_chars ()
2023-10-31 13:01:35 +01:00
* @ return string
*/
2023-03-23 10:42:32 +01:00
private function _replaceHtmlWithOdtTag ( $tags , & $customStyles , & $fontDeclarations , $encode = false , $charset = '' )
2020-03-04 08:54:49 +01:00
{
2021-07-05 23:07:40 +02:00
if ( $customStyles == null ) $customStyles = array ();
if ( $fontDeclarations == null ) $fontDeclarations = array ();
$odtResult = '' ;
foreach (( array ) $tags as $tag ) {
// Check if the current item is a tag or just plain text
if ( isset ( $tag [ 'text' ])) {
2022-11-16 11:12:34 +01:00
$text = $this -> encode_chars ( $tag [ 'text' ], $encode , $charset );
2023-03-23 10:42:32 +01:00
$odtResult .= $text ;
2021-07-05 23:07:40 +02:00
} elseif ( isset ( $tag [ 'name' ])) {
switch ( $tag [ 'name' ]) {
case 'br' :
$odtResult .= '<text:line-break/>' ;
break ;
case 'strong' :
case 'b' :
2023-12-29 16:51:05 +01:00
$odtResult .= '<text:span text:style-name="boldText">' . ( $tag [ 'children' ] != null ? $this -> _replaceHtmlWithOdtTag ( $tag [ 'children' ], $customStyles , $fontDeclarations , $encode ) : $this -> encode_chars ( $tag [ 'innerText' ], $encode , $charset )) . '</text:span>' ;
2021-07-05 23:07:40 +02:00
break ;
case 'i' :
case 'em' :
2023-12-29 16:51:05 +01:00
$odtResult .= '<text:span text:style-name="italicText">' . ( $tag [ 'children' ] != null ? $this -> _replaceHtmlWithOdtTag ( $tag [ 'children' ], $customStyles , $fontDeclarations , $encode ) : $this -> encode_chars ( $tag [ 'innerText' ], $encode , $charset )) . '</text:span>' ;
2021-07-05 23:07:40 +02:00
break ;
case 'u' :
2023-12-29 16:51:05 +01:00
$odtResult .= '<text:span text:style-name="underlineText">' . ( $tag [ 'children' ] != null ? $this -> _replaceHtmlWithOdtTag ( $tag [ 'children' ], $customStyles , $fontDeclarations , $encode ) : $this -> encode_chars ( $tag [ 'innerText' ], $encode , $charset )) . '</text:span>' ;
2021-07-05 23:07:40 +02:00
break ;
case 's' :
2023-12-29 16:51:05 +01:00
$odtResult .= '<text:span text:style-name="strikethroughText">' . ( $tag [ 'children' ] != null ? $this -> _replaceHtmlWithOdtTag ( $tag [ 'children' ], $customStyles , $fontDeclarations , $encode ) : $this -> encode_chars ( $tag [ 'innerText' ], $encode , $charset )) . '</text:span>' ;
2021-07-05 23:07:40 +02:00
break ;
case 'sub' :
2023-12-29 16:51:05 +01:00
$odtResult .= '<text:span text:style-name="subText">' . ( $tag [ 'children' ] != null ? $this -> _replaceHtmlWithOdtTag ( $tag [ 'children' ], $customStyles , $fontDeclarations , $encode ) : $this -> encode_chars ( $tag [ 'innerText' ], $encode , $charset )) . '</text:span>' ;
2021-07-05 23:07:40 +02:00
break ;
case 'sup' :
2023-12-29 16:51:05 +01:00
$odtResult .= '<text:span text:style-name="supText">' . ( $tag [ 'children' ] != null ? $this -> _replaceHtmlWithOdtTag ( $tag [ 'children' ], $customStyles , $fontDeclarations , $encode ) : $this -> encode_chars ( $tag [ 'innerText' ], $encode , $charset )) . '</text:span>' ;
2021-07-05 23:07:40 +02:00
break ;
case 'span' :
if ( isset ( $tag [ 'attributes' ][ 'style' ])) {
$odtStyles = '' ;
foreach ( $tag [ 'attributes' ][ 'style' ] as $styleName => $styleValue ) {
switch ( $styleName ) {
case 'font-family' :
$fontName = $styleValue ;
if ( strpos ( $fontName , ',' ) !== false ) {
$fontName = explode ( ',' , $fontName )[ 0 ];
}
if ( ! in_array ( $fontName , $fontDeclarations )) {
array_push ( $fontDeclarations , $fontName );
}
$odtStyles .= '<style:text-properties style:font-name="' . $fontName . '" />' ;
break ;
case 'font-size' :
if ( preg_match ( '/([0-9]+)\s?(px|pt)/' , $styleValue , $matches )) {
$fontSize = intval ( $matches [ 1 ]);
if ( $matches [ 2 ] == 'px' ) {
$fontSize = round ( $fontSize * 0.75 );
}
$odtStyles .= '<style:text-properties fo:font-size="' . $fontSize . 'pt" style:font-size-asian="' . $fontSize . 'pt" style:font-size-complex="' . $fontSize . 'pt" />' ;
}
break ;
case 'color' :
if ( preg_match ( '/#[0-9A-Fa-f]{3}(?:[0-9A-Fa-f]{3})?/' , $styleValue )) {
$odtStyles .= '<style:text-properties fo:color="' . $styleValue . '" />' ;
}
break ;
}
}
if ( strlen ( $odtStyles ) > 0 ) {
2020-03-04 08:54:49 +01:00
// Generate a unique id for the style (using microtime and random because some CPUs are really fast...)
2024-01-05 20:11:07 +01:00
$key = str_replace ( '.' , '' , ( string ) microtime ( true )) . uniqid ( mt_rand ());
2021-07-05 23:07:40 +02:00
$customStyles [ $key ] = $odtStyles ;
2023-12-29 16:51:05 +01:00
$odtResult .= '<text:span text:style-name="customStyle' . $key . '">' . ( $tag [ 'children' ] != null ? $this -> _replaceHtmlWithOdtTag ( $tag [ 'children' ], $customStyles , $fontDeclarations , $encode ) : $this -> encode_chars ( $tag [ 'innerText' ], $encode , $charset )) . '</text:span>' ;
2021-07-05 23:07:40 +02:00
}
}
break ;
default :
2023-12-29 16:51:05 +01:00
$odtResult .= $this -> _replaceHtmlWithOdtTag ( $tag [ 'children' ], $customStyles , $fontDeclarations , $encode );
2021-07-05 23:07:40 +02:00
break ;
}
}
}
return $odtResult ;
}
/**
2022-11-16 11:12:34 +01:00
* Correctly encode chars
* @ param string $text The text to encode or not
* @ param bool $encode If true , special XML characters are encoded
* @ param string $charset Charset
2023-10-31 13:01:35 +01:00
* @ return string The converted text
2022-11-16 11:12:34 +01:00
* @ see self :: convertVarToOdf ()
2021-07-05 23:07:40 +02:00
*/
2022-11-16 11:12:34 +01:00
private function encode_chars ( $text , $encode = false , $charset = '' )
2020-03-04 08:54:49 +01:00
{
2022-11-16 11:12:34 +01:00
$newtext = $encode ? htmlspecialchars ( $text , ENT_QUOTES | ENT_XML1 ) : $text ;
2023-12-07 03:37:05 +01:00
$newtext = ( $charset == 'ISO-8859' ) ? mb_convert_encoding ( $newtext , 'UTF-8' , 'ISO-8859-1' ) : $newtext ;
2022-11-16 11:12:34 +01:00
return $newtext ;
2021-07-05 23:07:40 +02:00
}
2023-10-31 13:01:35 +01:00
/**
* Checks if the given text is a html string
* @ param string $text The text to check
* @ return bool
*/
private function _isHtmlTag ( $text )
2020-03-04 08:54:49 +01:00
{
2023-10-31 13:01:35 +01:00
return preg_match ( self :: FIND_TAGS_REGEX , $text );
}
2020-03-04 08:54:49 +01:00
2021-07-05 23:07:40 +02:00
/**
* Checks if the given text includes a html string
* @ param string $text The text to check
* @ return bool
*/
private function _hasHtmlTag ( $text )
2020-03-04 08:54:49 +01:00
{
2023-03-23 10:42:32 +01:00
$result = preg_match_all ( self :: FIND_TAGS_REGEX , $text );
2021-07-05 23:07:40 +02:00
return is_numeric ( $result ) && $result > 0 ;
}
/**
* Returns an array of html elements
* @ param string $html A string with html tags
* @ return array
*/
private function _getDataFromHtml ( $html )
2020-03-04 08:54:49 +01:00
{
2021-07-05 23:07:40 +02:00
$tags = array ();
$tempHtml = $html ;
while ( strlen ( $tempHtml ) > 0 ) {
// Check if the string includes a html tag
2023-03-23 10:42:32 +01:00
if ( preg_match_all ( self :: FIND_TAGS_REGEX , $tempHtml , $matches )) {
2021-07-05 23:07:40 +02:00
$tagOffset = strpos ( $tempHtml , $matches [ 0 ][ 0 ]);
// Check if the string starts with the html tag
if ( $tagOffset > 0 ) {
// Push the text infront of the html tag to the result array
array_push ( $tags , array (
'text' => substr ( $tempHtml , 0 , $tagOffset )
));
// Remove the text from the string
$tempHtml = substr ( $tempHtml , $tagOffset );
}
// Extract the attribute data from the html tag
preg_match_all ( '/([0-9A-Za-z]+(?:="[0-9A-Za-z\:\-\s\,\;\#]*")?)+/' , $matches [ 2 ][ 0 ], $explodedAttributes );
$explodedAttributes = array_filter ( $explodedAttributes [ 0 ]);
$attributes = array ();
// Store each attribute with its name in the $attributes array
$explodedAttributesCount = count ( $explodedAttributes );
2023-03-23 10:42:32 +01:00
for ( $i = 0 ; $i < $explodedAttributesCount ; $i ++ ) {
2021-07-05 23:07:40 +02:00
$attribute = trim ( $explodedAttributes [ $i ]);
// Check if the attribute has a value (like style="") or has no value (like required)
if ( strpos ( $attribute , '=' ) !== false ) {
$splitAttribute = explode ( '=' , $attribute );
$attrName = trim ( $splitAttribute [ 0 ]);
$attrValue = trim ( str_replace ( '"' , '' , $splitAttribute [ 1 ]));
// check if the current attribute is a style attribute
if ( strtolower ( $attrName ) == 'style' ) {
$attributes [ $attrName ] = array ();
if ( strpos ( $attrValue , ';' ) !== false ) {
// Split the style properties and store them in an array
$explodedStyles = explode ( ';' , $attrValue );
$explodedStylesCount = count ( $explodedStyles );
2023-03-23 10:42:32 +01:00
for ( $n = 0 ; $n < $explodedStylesCount ; $n ++ ) {
2021-07-05 23:07:40 +02:00
$splitStyle = explode ( ':' , $explodedStyles [ $n ]);
$attributes [ $attrName ][ trim ( $splitStyle [ 0 ])] = trim ( $splitStyle [ 1 ]);
}
} else {
$splitStyle = explode ( ':' , $attrValue );
$attributes [ $attrName ][ trim ( $splitStyle [ 0 ])] = trim ( $splitStyle [ 1 ]);
}
} else {
// Store the value directly in the $attributes array if this is not the style attribute
$attributes [ $attrName ] = $attrValue ;
}
} else {
$attributes [ trim ( $attribute )] = true ;
}
}
// Push the html tag data to the result array
array_push ( $tags , array (
'name' => $matches [ 1 ][ 0 ],
'attributes' => $attributes ,
'innerText' => strip_tags ( $matches [ 3 ][ 0 ]),
'children' => $this -> _hasHtmlTag ( $matches [ 3 ][ 0 ]) ? $this -> _getDataFromHtml ( $matches [ 3 ][ 0 ]) : null
));
// Remove the processed html tag from the html string
$tempHtml = substr ( $tempHtml , strlen ( $matches [ 0 ][ 0 ]));
} else {
array_push ( $tags , array (
'text' => $tempHtml
));
$tempHtml = '' ;
}
}
return $tags ;
}
2020-03-04 08:54:49 +01:00
2014-05-07 19:13:02 +02:00
/**
* Function to convert a HTML string into an ODT string
*
* @ param string $value String to convert
2021-07-05 23:07:56 +02:00
* @ return string String converted
2014-05-07 19:13:02 +02:00
*/
public function htmlToUTFAndPreOdf ( $value )
{
// We decode into utf8, entities
2020-10-15 19:36:08 +02:00
$value = dol_html_entity_decode ( $value , ENT_QUOTES | ENT_HTML5 );
2014-05-07 19:13:02 +02:00
// We convert html tags
$ishtml = dol_textishtml ( $value );
2021-07-05 23:07:40 +02:00
if ( $ishtml ) {
// If string is "MYPODUCT - Desc <strong>bold</strong> with é accent<br />\n<br />\nUn texto en español ?"
// Result after clean must be "MYPODUCT - Desc bold with é accent\n\nUn texto en español ?"
2014-05-07 19:13:02 +02:00
// We want to ignore \n and we want all <br> to be \n
2021-07-05 23:07:40 +02:00
$value = preg_replace ( '/(\r\n|\r|\n)/i' , '' , $value );
$value = preg_replace ( '/<br>/i' , " \n " , $value );
$value = preg_replace ( '/<br\s+[^<>\/]*>/i' , " \n " , $value );
$value = preg_replace ( '/<br\s+[^<>\/]*\/>/i' , " \n " , $value );
2014-05-07 19:13:02 +02:00
//$value=preg_replace('/<strong>/','__lt__text:p text:style-name=__quot__bold__quot____gt__',$value);
//$value=preg_replace('/<\/strong>/','__lt__/text:p__gt__',$value);
$value = dol_string_nohtmltag ( $value , 0 );
}
return $value ;
}
/**
* Function to convert a HTML string into an ODT string
*
* @ param string $value String to convert
2021-07-05 23:07:56 +02:00
* @ return string String converted
2014-05-07 19:13:02 +02:00
*/
public function preOdfToOdf ( $value )
{
$value = str_replace ( " \n " , " <text:line-break/> " , $value );
//$value = str_replace("__lt__", "<", $value);
//$value = str_replace("__gt__", ">", $value);
//$value = str_replace("__quot__", '"', $value);
return $value ;
}
2013-04-20 11:03:23 +02:00
/**
* Assign a template variable as a picture
*
* @ param string $key name of the variable within the template
* @ param string $value path to the picture
* @ throws OdfException
* @ return odf
*/
public function setImage ( $key , $value )
{
$filename = strtok ( strrchr ( $value , '/' ), '/.' );
$file = substr ( strrchr ( $value , '/' ), 1 );
$size = @ getimagesize ( $value );
if ( $size === false ) {
throw new OdfException ( " Invalid image " );
}
list ( $width , $height ) = $size ;
$width *= self :: PIXEL_TO_CM ;
$height *= self :: PIXEL_TO_CM ;
$xml = <<< IMG
2013-04-07 17:27:35 +02:00
< draw : frame draw : style - name = " fr1 " draw : name = " $filename " text : anchor - type = " aschar " svg : width = " { $width } cm " svg : height = " { $height } cm " draw : z - index = " 3 " >< draw : image xlink : href = " Pictures/ $file " xlink : type = " simple " xlink : show = " embed " xlink : actuate = " onLoad " /></ draw : frame >
2010-02-15 23:39:13 +01:00
IMG ;
2013-04-20 11:03:23 +02:00
$this -> images [ $value ] = $file ;
$this -> setVars ( $key , $xml , false );
return $this ;
}
/**
* Move segment tags for lines of tables
2016-06-22 20:08:29 +02:00
* This function is called automatically within the constructor , so this -> contentXml is clean before any other thing
2013-04-20 11:03:23 +02:00
*
* @ return void
*/
private function _moveRowSegments ()
{
2021-07-05 23:07:40 +02:00
// Replace BEGIN<text:s/>xxx into BEGIN xxx
$this -> contentXml = preg_replace ( '/\[!--\sBEGIN<text:s[^>]>(row.[\S]*)\s--\]/sm' , '[!-- BEGIN \\1 --]' , $this -> contentXml );
// Replace END<text:s/>xxx into END xxx
$this -> contentXml = preg_replace ( '/\[!--\sEND<text:s[^>]>(row.[\S]*)\s--\]/sm' , '[!-- END \\1 --]' , $this -> contentXml );
2017-10-10 18:54:30 +02:00
2021-07-05 23:07:40 +02:00
// Search all possible rows in the document
2013-04-20 11:03:23 +02:00
$reg1 = " #<table:table-row[^>]*>(.*)</table:table-row>#smU " ;
2021-11-08 18:10:35 +01:00
$matches = array ();
2013-04-20 11:03:23 +02:00
preg_match_all ( $reg1 , $this -> contentXml , $matches );
for ( $i = 0 , $size = count ( $matches [ 0 ]); $i < $size ; $i ++ ) {
// Check if the current row contains a segment row.*
$reg2 = '#\[!--\sBEGIN\s(row.[\S]*)\s--\](.*)\[!--\sEND\s\\1\s--\]#sm' ;
2021-11-08 18:10:35 +01:00
$matches2 = array ();
2013-04-20 11:03:23 +02:00
if ( preg_match ( $reg2 , $matches [ 0 ][ $i ], $matches2 )) {
$balise = str_replace ( 'row.' , '' , $matches2 [ 1 ]);
// Move segment tags around the row
$replace = array (
2023-10-31 13:01:35 +01:00
'[!-- BEGIN ' . $matches2 [ 1 ] . ' --]' => '' ,
'[!-- END ' . $matches2 [ 1 ] . ' --]' => '' ,
'<table:table-row' => '[!-- BEGIN ' . $balise . ' --]<table:table-row' ,
'</table:table-row>' => '</table:table-row>[!-- END ' . $balise . ' --]'
2013-04-20 11:03:23 +02:00
);
$replacedXML = str_replace ( array_keys ( $replace ), array_values ( $replace ), $matches [ 0 ][ $i ]);
$this -> contentXml = str_replace ( $matches [ 0 ][ $i ], $replacedXML , $this -> contentXml );
}
}
}
/**
* Merge template variables
2016-07-29 16:24:02 +02:00
* Called at the beginning of the _save function
2013-04-20 11:03:23 +02:00
*
2015-09-05 00:43:13 +02:00
* @ param string $type 'content' , 'styles' or 'meta'
2013-04-20 11:03:23 +02:00
* @ return void
*/
2021-07-05 23:07:40 +02:00
private function _parse ( $type = 'content' )
2013-04-20 11:03:23 +02:00
{
2024-04-18 15:56:17 +02:00
if ( $type == 'content' ) $xml = & $this -> contentXml ;
elseif ( $type == 'styles' ) $xml = & $this -> stylesXml ;
elseif ( $type == 'meta' ) $xml = & $this -> metaXml ;
else return ;
2021-08-08 18:22:09 +02:00
// Search all tags found into condition to complete $this->vars, so we will proceed all tests even if not defined
2024-11-13 16:35:07 +01:00
$reg = '@\[!--\sIF\s([\[\]{}a-zA-Z0-9\.\,_]+)\s--\]@smU' ;
2021-08-08 18:22:09 +02:00
$matches = array ();
2024-04-18 15:56:17 +02:00
preg_match_all ( $reg , $xml , $matches , PREG_SET_ORDER );
2021-07-05 23:07:40 +02:00
foreach ( $matches as $match ) { // For each match, if there is no entry into this->vars, we add it
if ( ! empty ( $match [ 1 ]) && ! isset ( $this -> vars [ $match [ 1 ]])) {
$this -> vars [ $match [ 1 ]] = '' ; // Not defined, so we set it to '', we just need entry into this->vars for next loop
2015-11-02 15:44:15 +01:00
}
2021-07-05 23:07:40 +02:00
}
2017-10-10 18:54:30 +02:00
2013-04-20 11:03:23 +02:00
// Conditionals substitution
2015-09-05 00:43:13 +02:00
// Note: must be done before static substitution, else the variable will be replaced by its value and the conditional won't work anymore
2021-07-05 23:07:40 +02:00
foreach ( $this -> vars as $key => $value ) {
2013-04-20 11:03:23 +02:00
// If value is true (not 0 nor false nor null nor empty string)
2021-07-05 23:07:40 +02:00
if ( $value ) {
//dol_syslog("Var ".$key." is defined, we remove the IF, ELSE and ENDIF ");
2024-04-18 15:56:17 +02:00
//$sav=$xml;
2013-04-20 11:03:23 +02:00
// Remove the IF tag
2024-04-18 15:56:17 +02:00
$xml = str_replace ( '[!-- IF ' . $key . ' --]' , '' , $xml );
2013-04-20 11:03:23 +02:00
// Remove everything between the ELSE tag (if it exists) and the ENDIF tag
2024-11-13 16:18:08 +01:00
$reg = '@(\[!--\sELSE\s' . preg_quote ( $key , '@' ) . '\s--\](.*))?\[!--\sENDIF\s' . preg_quote ( $key , '@' ) . '\s--\]@smU' ; // U modifier = all quantifiers are non-greedy
2024-04-18 15:56:17 +02:00
$xml = preg_replace ( $reg , '' , $xml );
/* if ( $sav != $xml )
2023-10-31 13:01:35 +01:00
{
dol_syslog ( " We found a IF and it was processed " );
//var_dump($sav);exit;
} */
2021-07-05 23:07:56 +02:00
} else {
// Else the value is false, then two cases: no ELSE and we're done, or there is at least one place where there is an ELSE clause, then we replace it
2021-07-05 23:07:40 +02:00
//dol_syslog("Var ".$key." is not defined, we remove the IF, ELSE and ENDIF ");
2024-04-18 15:56:17 +02:00
//$sav=$xml;
2013-04-20 11:03:23 +02:00
// Find all conditional blocks for this variable: from IF to ELSE and to ENDIF
2024-11-13 16:18:08 +01:00
$reg = '@\[!--\sIF\s' . preg_quote ( $key , '@' ) . '\s--\](.*)(\[!--\sELSE\s' . preg_quote ( $key , '@' ) . '\s--\](.*))?\[!--\sENDIF\s' . preg_quote ( $key , '@' ) . '\s--\]@smU' ; // U modifier = all quantifiers are non-greedy
2024-04-18 15:56:17 +02:00
preg_match_all ( $reg , $xml , $matches , PREG_SET_ORDER );
2021-07-05 23:07:40 +02:00
foreach ( $matches as $match ) { // For each match, if there is an ELSE clause, we replace the whole block by the value in the ELSE clause
2024-04-18 15:56:17 +02:00
if ( ! empty ( $match [ 3 ])) $xml = str_replace ( $match [ 0 ], $match [ 3 ], $xml );
2010-03-13 17:05:36 +01:00
}
2013-04-20 11:03:23 +02:00
// Cleanup the other conditional blocks (all the others where there were no ELSE clause, we can just remove them altogether)
2024-04-18 15:56:17 +02:00
$xml = preg_replace ( $reg , '' , $xml );
/* if ( $sav != $xml )
2023-10-31 13:01:35 +01:00
{
dol_syslog ( " We found a IF and it was processed " );
//var_dump($sav);exit;
} */
2010-03-13 17:05:36 +01:00
}
}
2010-05-12 19:15:02 +02:00
2015-09-05 00:43:13 +02:00
// Static substitution
2024-04-18 15:56:17 +02:00
$xml = str_replace ( array_keys ( $this -> vars ), array_values ( $this -> vars ), $xml );
2013-04-20 11:03:23 +02:00
}
/**
* Add the merged segment to the document
*
2016-06-22 20:08:29 +02:00
* @ param Segment $segment Segment
2013-04-20 11:03:23 +02:00
* @ throws OdfException
* @ return odf
*/
public function mergeSegment ( Segment $segment )
{
if ( ! array_key_exists ( $segment -> getName (), $this -> segments )) {
throw new OdfException ( $segment -> getName () . 'cannot be parsed, has it been set yet ?' );
}
$string = $segment -> getName ();
// $reg = '@<text:p[^>]*>\[!--\sBEGIN\s' . $string . '\s--\](.*)\[!--.+END\s' . $string . '\s--\]<\/text:p>@smU';
$reg = '@\[!--\sBEGIN\s' . $string . '\s--\](.*)\[!--.+END\s' . $string . '\s--\]@smU' ;
$this -> contentXml = preg_replace ( $reg , $segment -> getXmlParsed (), $this -> contentXml );
return $this ;
}
/**
* Display all the current template variables
*
* @ return string
*/
public function printVars ()
{
return print_r ( '<pre>' . print_r ( $this -> vars , true ) . '</pre>' , true );
}
/**
* Display the XML content of the file from odt document
* as it is at the moment
*
* @ return string
*/
public function __toString ()
{
return $this -> contentXml ;
}
/**
* Display loop segments declared with setSegment ()
*
* @ return string
*/
public function printDeclaredSegments ()
{
return '<pre>' . print_r ( implode ( ' ' , array_keys ( $this -> segments )), true ) . '</pre>' ;
}
/**
2016-03-22 19:24:48 +01:00
* Declare a segment in order to use it in a loop .
* Extract the segment and store it into $this -> segments [] . Return it for next call .
2013-04-20 11:03:23 +02:00
*
2016-06-22 20:08:29 +02:00
* @ param string $segment Segment
2023-10-01 17:24:25 +02:00
* @ throws OdfExceptionSegmentNotFound
2013-04-20 11:03:23 +02:00
* @ return Segment
*/
public function setSegment ( $segment )
{
if ( array_key_exists ( $segment , $this -> segments )) {
return $this -> segments [ $segment ];
}
// $reg = "#\[!--\sBEGIN\s$segment\s--\]<\/text:p>(.*)<text:p\s.*>\[!--\sEND\s$segment\s--\]#sm";
$reg = " # \ [!-- \ sBEGIN \ s $segment\s -- \ ](.*) \ [!-- \ sEND \ s $segment\s -- \ ]#sm " ;
2021-11-08 18:10:35 +01:00
$m = array ();
2013-04-20 11:03:23 +02:00
if ( preg_match ( $reg , html_entity_decode ( $this -> contentXml ), $m ) == 0 ) {
2023-10-01 17:24:25 +02:00
throw new OdfExceptionSegmentNotFound ( " ' " . $segment . " ' segment not found in the document. The tag [!-- BEGIN xxx --] or [!-- END xxx --] is not present into content file. " );
2013-04-20 11:03:23 +02:00
}
$this -> segments [ $segment ] = new Segment ( $segment , $m [ 1 ], $this );
return $this -> segments [ $segment ];
}
/**
* Save the odt file on the disk
*
* @ param string $file name of the desired file
* @ throws OdfException
* @ return void
*/
public function saveToDisk ( $file = null )
{
if ( $file !== null && is_string ( $file )) {
if ( file_exists ( $file ) && ! ( is_file ( $file ) && is_writable ( $file ))) {
throw new OdfException ( 'Permission denied : can\'t create ' . $file );
2010-03-13 17:05:36 +01:00
}
2013-04-20 11:03:23 +02:00
$this -> _save ();
copy ( $this -> tmpfile , $file );
} else {
$this -> _save ();
2010-03-13 17:05:36 +01:00
}
2013-04-20 11:03:23 +02:00
}
/**
* Write output file onto disk
*
* @ throws OdfException
* @ return void
*/
private function _save ()
{
$res = $this -> file -> open ( $this -> tmpfile ); // tmpfile is odt template
$this -> _parse ( 'content' );
$this -> _parse ( 'styles' );
2015-09-05 00:43:13 +02:00
$this -> _parse ( 'meta' );
2013-04-20 11:03:23 +02:00
2015-09-05 00:43:13 +02:00
$this -> setMetaData ();
//print $this->metaXml;exit;
2017-10-10 18:54:30 +02:00
2013-04-20 11:03:23 +02:00
if ( ! $this -> file -> addFromString ( 'content.xml' , $this -> contentXml )) {
2015-09-05 00:43:13 +02:00
throw new OdfException ( 'Error during file export addFromString content' );
}
if ( ! $this -> file -> addFromString ( 'meta.xml' , $this -> metaXml )) {
throw new OdfException ( 'Error during file export addFromString meta' );
2013-04-20 11:03:23 +02:00
}
if ( ! $this -> file -> addFromString ( 'styles.xml' , $this -> stylesXml )) {
2015-09-05 00:43:13 +02:00
throw new OdfException ( 'Error during file export addFromString styles' );
2013-04-20 11:03:23 +02:00
}
2017-10-10 18:54:30 +02:00
2013-04-20 11:03:23 +02:00
foreach ( $this -> images as $imageKey => $imageValue ) {
// Add the image inside the ODT document
$this -> file -> addFile ( $imageKey , 'Pictures/' . $imageValue );
// Add the image to the Manifest (which maintains a list of images, necessary to avoid "Corrupt ODT file. Repair?" when opening the file with LibreOffice)
$this -> addImageToManifest ( $imageValue );
}
2024-10-17 00:27:18 +02:00
if ( ! $this -> file -> addFromString ( 'META-INF/manifest.xml' , $this -> manifestXml )) {
2013-04-20 11:03:23 +02:00
throw new OdfException ( 'Error during file export: manifest.xml' );
}
$this -> file -> close ();
}
2015-09-05 00:43:13 +02:00
/**
* Update Meta information
* < dc : date > 2013 - 03 - 16 T14 : 06 : 25 </ dc : date >
*
* @ return void
*/
public function setMetaData ()
{
2021-07-05 23:07:40 +02:00
if ( empty ( $this -> creator )) $this -> creator = '' ;
2017-10-10 18:54:30 +02:00
2015-09-05 00:43:13 +02:00
$this -> metaXml = preg_replace ( '/<dc:date>.*<\/dc:date>/' , '<dc:date>' . gmdate ( " Y-m-d \T H:i:s " ) . '</dc:date>' , $this -> metaXml );
$this -> metaXml = preg_replace ( '/<dc:creator>.*<\/dc:creator>/' , '<dc:creator>' . htmlspecialchars ( $this -> creator ) . '</dc:creator>' , $this -> metaXml );
$this -> metaXml = preg_replace ( '/<dc:title>.*<\/dc:title>/' , '<dc:title>' . htmlspecialchars ( $this -> title ) . '</dc:title>' , $this -> metaXml );
$this -> metaXml = preg_replace ( '/<dc:subject>.*<\/dc:subject>/' , '<dc:subject>' . htmlspecialchars ( $this -> subject ) . '</dc:subject>' , $this -> metaXml );
2017-10-10 18:54:30 +02:00
2021-07-05 23:07:40 +02:00
if ( count ( $this -> userdefined )) {
foreach ( $this -> userdefined as $key => $val ) {
$this -> metaXml = preg_replace ( '<meta:user-defined meta:name="' . $key . '"/>' , '' , $this -> metaXml );
$this -> metaXml = preg_replace ( '/<meta:user-defined meta:name="' . $key . '">.*<\/meta:user-defined>/' , '' , $this -> metaXml );
$this -> metaXml = str_replace ( '</office:meta>' , '<meta:user-defined meta:name="' . $key . '">' . htmlspecialchars ( $val ) . '</meta:user-defined></office:meta>' , $this -> metaXml );
}
2015-09-05 00:43:13 +02:00
}
}
2017-10-10 18:54:30 +02:00
2013-04-20 11:03:23 +02:00
/**
* Update Manifest file according to added image files
*
* @ param string $file Image file to add into manifest content
2015-09-05 00:43:13 +02:00
* @ return void
2013-04-20 11:03:23 +02:00
*/
public function addImageToManifest ( $file )
{
// Get the file extension
2020-01-07 21:13:31 +01:00
$ext = substr ( strrchr ( $file , '.' ), 1 );
2013-04-20 11:03:23 +02:00
// Create the correct image XML entry to add to the manifest (this is necessary because ODT format requires that we keep a list of the images in the manifest.xml)
$add = ' <manifest:file-entry manifest:media-type="image/' . $ext . '" manifest:full-path="Pictures/' . $file . '"/>' . " \n " ;
// Append the image to the manifest
$this -> manifestXml = str_replace ( '</manifest:manifest>' , $add . '</manifest:manifest>' , $this -> manifestXml ); // we replace the manifest closing tag by the image XML entry + manifest closing tag (this results in appending the data, we do not overwrite anything)
}
/**
* Export the file as attached file by HTTP
*
* @ param string $name ( optional )
* @ throws OdfException
* @ return void
*/
2020-01-07 21:13:31 +01:00
public function exportAsAttachedFile ( $name = " " )
2013-04-20 11:03:23 +02:00
{
$this -> _save ();
if ( headers_sent ( $filename , $linenum )) {
throw new OdfException ( " headers already sent ( $filename at $linenum ) " );
2010-03-13 17:05:36 +01:00
}
2010-05-12 19:15:02 +02:00
2021-07-05 23:07:40 +02:00
if ( $name == " " ) {
2013-04-20 11:03:23 +02:00
$name = md5 ( uniqid ()) . " .odt " ;
2010-03-13 17:05:36 +01:00
}
2010-05-12 19:15:02 +02:00
2013-04-20 11:03:23 +02:00
header ( 'Content-type: application/vnd.oasis.opendocument.text' );
header ( 'Content-Disposition: attachment; filename="' . $name . '"' );
header ( 'Content-Length: ' . filesize ( $this -> tmpfile ));
readfile ( $this -> tmpfile );
}
/**
* Convert the ODT file to PDF and export the file as attached file by HTTP
* Note : you need to have JODConverter and OpenOffice or LibreOffice installed and executable on the same system as where this php script will be executed . You also need to chmod + x odt2pdf . sh
*
2018-02-15 13:15:21 +01:00
* @ param string $name Name of ODT file to generate before generating PDF
2013-04-20 11:03:23 +02:00
* @ throws OdfException
* @ return void
*/
2021-07-05 23:07:40 +02:00
public function exportAsAttachedPDF ( $name = " " )
2013-04-20 11:03:23 +02:00
{
global $conf ;
2014-05-07 19:13:02 +02:00
2021-07-05 23:07:40 +02:00
if ( $name == " " ) $name = " temp " . md5 ( uniqid ());
2013-04-20 11:03:23 +02:00
2013-04-22 15:50:26 +02:00
dol_syslog ( get_class ( $this ) . '::exportAsAttachedPDF $name=' . $name , LOG_DEBUG );
2013-04-20 11:03:23 +02:00
$this -> saveToDisk ( $name );
$execmethod = ( empty ( $conf -> global -> MAIN_EXEC_USE_POPEN ) ? 1 : 2 ); // 1 or 2
2017-10-10 18:54:30 +02:00
// Method 1 sometimes hang the server.
2014-05-07 19:13:02 +02:00
2018-02-15 13:15:21 +01:00
// Export to PDF using LibreOffice
2022-12-17 12:55:05 +01:00
if ( getDolGlobalString ( 'MAIN_ODT_AS_PDF' ) == 'libreoffice' ) {
2021-01-26 16:33:47 +01:00
dol_mkdir ( $conf -> user -> dir_temp ); // We must be sure the directory exists and is writable
2021-01-26 16:49:54 +01:00
// We delete and recreate a subdir because the soffice may have change pemrissions on it
2024-12-14 18:38:48 +01:00
$countdeleted = 0 ;
dol_delete_dir_recursive ( $conf -> user -> dir_temp . '/odtaspdf' , 0 , 0 , 0 , $countdeleted , 0 , 1 );
2021-01-26 16:49:54 +01:00
dol_mkdir ( $conf -> user -> dir_temp . '/odtaspdf' );
2021-01-26 16:33:47 +01:00
2021-01-08 15:09:49 +01:00
// Install prerequisites: apt install soffice libreoffice-common libreoffice-writer
2018-02-15 14:38:21 +01:00
// using windows libreoffice that must be in path
// using linux/mac libreoffice that must be in path
// Note PHP Config "fastcgi.impersonate=0" must set to 0 - Default is 1
2024-11-06 17:16:36 +01:00
$command = 'soffice --headless -env:UserInstallation=file:' . ( getDolGlobalString ( 'MAIN_ODT_ADD_SLASH_FOR_WINDOWS' ) ? '///' : '' ) . '\'' . $conf -> user -> dir_temp . '/odtaspdf\' --convert-to pdf --outdir ' . escapeshellarg ( dirname ( $name )) . " " . escapeshellarg ( $name );
2022-12-17 12:55:05 +01:00
} elseif ( preg_match ( '/unoconv/' , getDolGlobalString ( 'MAIN_ODT_AS_PDF' ))) {
2017-10-10 18:54:30 +02:00
// If issue with unoconv, see https://github.com/dagwieers/unoconv/issues/87
// MAIN_ODT_AS_PDF should be "sudo -u unoconv /usr/bin/unoconv" and userunoconv must have sudo to be root by adding file /etc/sudoers.d/unoconv with content www-data ALL=(unoconv) NOPASSWD: /usr/bin/unoconv .
// Try this with www-data user: /usr/bin/unoconv -vvvv -f pdf /tmp/document-example.odt
// It must return:
//Verbosity set to level 4
//Using office base path: /usr/lib/libreoffice
//Using office binary path: /usr/lib/libreoffice/program
//DEBUG: Connection type: socket,host=127.0.0.1,port=2002;urp;StarOffice.ComponentContext
//DEBUG: Existing listener not found.
//DEBUG: Launching our own listener using /usr/lib/libreoffice/program/soffice.bin.
//LibreOffice listener successfully started. (pid=9287)
//Input file: /tmp/document-example.odt
//unoconv: file `/tmp/document-example.odt' does not exist.
//unoconv: RuntimeException during import phase:
//Office probably died. Unsupported URL <file:///tmp/document-example.odt>: "type detection failed"
//DEBUG: Terminating LibreOffice instance.
//DEBUG: Waiting for LibreOffice instance to exit
2018-02-15 13:23:13 +01:00
// If it fails:
// - set shell of user to bash instead of nologin.
2017-10-10 18:54:30 +02:00
// - set permission to read/write to user on home directory /var/www so user can create the libreoffice , dconf and .cache dir and files then set permission back
2022-12-17 12:55:05 +01:00
$command = getDolGlobalString ( 'MAIN_ODT_AS_PDF' ) . ' ' . escapeshellcmd ( $name );
2017-10-10 18:54:30 +02:00
//$command = '/usr/bin/unoconv -vvv '.escapeshellcmd($name);
2021-07-05 23:07:40 +02:00
} else {
2021-01-08 15:09:16 +01:00
// deprecated old method using odt2pdf.sh (native, jodconverter, ...)
2018-02-15 13:15:21 +01:00
$tmpname = preg_replace ( '/\.odt/i' , '' , $name );
2014-05-07 19:13:02 +02:00
2022-12-17 12:55:05 +01:00
if ( getDolGlobalString ( 'MAIN_DOL_SCRIPTS_ROOT' )) {
$command = getDolGlobalString ( 'MAIN_DOL_SCRIPTS_ROOT' ) . '/scripts/odt2pdf/odt2pdf.sh ' . escapeshellcmd ( $tmpname ) . ' ' . ( is_numeric ( getDolGlobalString ( 'MAIN_ODT_AS_PDF' )) ? 'jodconverter' : getDolGlobalString ( 'MAIN_ODT_AS_PDF' ));
2021-07-05 23:07:40 +02:00
} else {
dol_syslog ( get_class ( $this ) . '::exportAsAttachedPDF is used but the constant MAIN_DOL_SCRIPTS_ROOT with path to script directory was not defined.' , LOG_WARNING );
2022-12-17 12:55:05 +01:00
$command = '../../scripts/odt2pdf/odt2pdf.sh ' . escapeshellcmd ( $tmpname ) . ' ' . ( is_numeric ( getDolGlobalString ( 'MAIN_ODT_AS_PDF' )) ? 'jodconverter' : getDolGlobalString ( 'MAIN_ODT_AS_PDF' ));
2018-02-15 10:15:27 +01:00
}
2017-10-10 18:54:30 +02:00
}
2014-05-07 19:13:02 +02:00
2013-04-22 15:50:26 +02:00
//$dirname=dirname($name);
//$command = DOL_DOCUMENT_ROOT.'/includes/odtphp/odt2pdf.sh '.$name.' '.$dirname;
2017-10-10 18:54:30 +02:00
2021-07-05 23:07:40 +02:00
dol_syslog ( get_class ( $this ) . '::exportAsAttachedPDF $execmethod=' . $execmethod . ' Run command=' . $command , LOG_DEBUG );
2021-07-05 22:52:37 +02:00
// TODO Use:
// $outputfile = DOL_DATA_ROOT.'/odt2pdf.log';
// $result = $utils->executeCLI($command, $outputfile); and replace test on $execmethod.
// $retval will be $result['result']
// $errorstring will be $result['output']
2019-04-27 17:31:14 +02:00
$retval = 0 ; $output_arr = array ();
2021-07-05 23:07:40 +02:00
if ( $execmethod == 1 ) {
2013-04-20 11:03:23 +02:00
exec ( $command , $output_arr , $retval );
2010-03-13 17:05:36 +01:00
}
2021-07-05 23:07:40 +02:00
if ( $execmethod == 2 ) {
2017-10-10 18:54:30 +02:00
$outputfile = DOL_DATA_ROOT . '/odt2pdf.log' ;
2018-02-15 10:15:27 +01:00
2013-04-20 11:03:23 +02:00
$ok = 0 ;
$handle = fopen ( $outputfile , 'w' );
2021-07-05 23:07:40 +02:00
if ( $handle ) {
dol_syslog ( get_class ( $this ) . " Run command " . $command , LOG_DEBUG );
2017-10-10 18:54:30 +02:00
fwrite ( $handle , $command . " \n " );
2013-04-20 11:03:23 +02:00
$handlein = popen ( $command , 'r' );
2021-07-05 23:07:40 +02:00
while ( ! feof ( $handlein )) {
2013-04-20 11:03:23 +02:00
$read = fgets ( $handlein );
2017-10-10 18:54:30 +02:00
fwrite ( $handle , $read );
2013-04-20 11:03:23 +02:00
$output_arr [] = $read ;
2010-03-13 17:05:36 +01:00
}
2013-04-20 11:03:23 +02:00
pclose ( $handlein );
fclose ( $handle );
2010-03-13 17:05:36 +01:00
}
2023-02-17 19:30:50 +01:00
dolChmod ( $outputfile );
2010-03-13 17:05:36 +01:00
}
2010-05-12 19:15:02 +02:00
2021-07-05 22:52:37 +02:00
if ( $retval == 0 ) {
2013-04-22 12:08:35 +02:00
dol_syslog ( get_class ( $this ) . '::exportAsAttachedPDF $ret_val=' . $retval , LOG_DEBUG );
2019-04-27 17:31:14 +02:00
$filename = '' ; $linenum = 0 ;
2010-03-13 17:05:36 +01:00
2021-03-02 01:58:36 +01:00
if ( php_sapi_name () != 'cli' ) { // If we are in a web context (not into CLI context)
if ( headers_sent ( $filename , $linenum )) {
throw new OdfException ( " headers already sent ( $filename at $linenum ) " );
}
2025-01-05 18:34:01 +01:00
if ( getDolGlobalString ( 'MAIN_DISABLE_PDF_AUTOUPDATE' )) {
2021-03-02 01:58:36 +01:00
$name = preg_replace ( '/\.od(x|t)/i' , '' , $name );
header ( 'Content-type: application/pdf' );
2024-11-13 22:27:31 +01:00
header ( 'Content-Disposition: attachment; filename="' . basename ( $name ) . '.pdf"' );
2021-03-02 01:58:36 +01:00
readfile ( $name . " .pdf " );
}
2013-04-22 15:50:26 +02:00
}
2021-03-02 01:58:36 +01:00
2025-01-05 18:34:01 +01:00
if ( getDolGlobalString ( 'MAIN_ODT_AS_PDF_DEL_SOURCE' )) {
2018-02-15 13:15:21 +01:00
unlink ( $name );
}
2013-04-20 11:03:23 +02:00
} else {
2013-04-22 15:50:26 +02:00
dol_syslog ( get_class ( $this ) . '::exportAsAttachedPDF $ret_val=' . $retval , LOG_DEBUG );
2019-04-27 17:31:14 +02:00
dol_syslog ( get_class ( $this ) . '::exportAsAttachedPDF $output_arr=' . var_export ( $output_arr , true ), LOG_DEBUG );
2014-05-07 19:13:02 +02:00
2021-03-02 01:58:36 +01:00
if ( $retval == 126 ) {
2013-04-22 15:50:26 +02:00
throw new OdfException ( 'Permission execute convert script : ' . $command );
2021-07-05 23:07:40 +02:00
} else {
$errorstring = '' ;
foreach ( $output_arr as $line ) {
$errorstring .= $line . " <br> " ;
2013-04-22 15:50:26 +02:00
}
2019-04-27 17:31:14 +02:00
throw new OdfException ( 'ODT to PDF convert fail (option MAIN_ODT_AS_PDF is ' . $conf -> global -> MAIN_ODT_AS_PDF . ', command was ' . $command . ', retval=' . $retval . ') : ' . $errorstring );
2013-04-22 15:50:26 +02:00
}
2010-02-15 23:39:13 +01:00
}
2013-04-20 11:03:23 +02:00
}
/**
* Returns a variable of configuration
*
2017-04-29 01:01:12 +02:00
* @ param string $configKey Config key
* @ return string The requested variable of configuration
2013-04-20 11:03:23 +02:00
*/
public function getConfig ( $configKey )
{
if ( array_key_exists ( $configKey , $this -> config )) {
return $this -> config [ $configKey ];
}
return false ;
}
/**
* Returns the temporary working file
*
* @ return string le chemin vers le fichier temporaire de travail
*/
public function getTmpfile ()
{
return $this -> tmpfile ;
}
/**
* Delete the temporary file when the object is destroyed
*/
public function __destruct ()
{
if ( file_exists ( $this -> tmpfile )) {
unlink ( $this -> tmpfile );
2010-03-13 17:05:36 +01:00
}
2010-02-28 23:49:06 +01:00
2013-04-20 11:03:23 +02:00
if ( file_exists ( $this -> tmpdir )) {
$this -> _rrmdir ( $this -> tmpdir );
rmdir ( $this -> tmpdir );
}
}
/**
* Empty the temporary working directory recursively
2017-10-10 18:54:30 +02:00
*
2017-04-29 01:01:12 +02:00
* @ param string $dir The temporary working directory
2013-04-20 11:03:23 +02:00
* @ return void
*/
private function _rrmdir ( $dir )
{
if ( $handle = opendir ( $dir )) {
2019-06-18 20:15:04 +02:00
while (( $file = readdir ( $handle )) !== false ) {
2013-04-20 11:03:23 +02:00
if ( $file != '.' && $file != '..' ) {
if ( is_dir ( $dir . '/' . $file )) {
$this -> _rrmdir ( $dir . '/' . $file );
rmdir ( $dir . '/' . $file );
} else {
unlink ( $dir . '/' . $file );
2010-03-13 17:05:36 +01:00
}
}
}
2013-04-20 11:03:23 +02:00
closedir ( $handle );
2010-03-13 17:05:36 +01:00
}
2013-04-20 11:03:23 +02:00
}
2014-11-01 21:00:28 +01:00
/**
* return the value present on odt in [ valuename ][ / valuename ]
2017-10-10 18:54:30 +02:00
*
2017-04-29 01:01:12 +02:00
* @ param string $valuename Balise in the template
* @ return string The value inside the balise
2014-11-01 21:00:28 +01:00
*/
public function getvalue ( $valuename )
{
$searchreg = " / \\ [ " . $valuename . " \\ ](.*) \\ [ \\ / " . $valuename . " \\ ]/ " ;
2021-11-08 18:10:35 +01:00
$matches = array ();
2015-02-22 16:01:01 +01:00
preg_match ( $searchreg , $this -> contentXml , $matches );
2014-11-01 21:00:28 +01:00
$this -> contentXml = preg_replace ( $searchreg , " " , $this -> contentXml );
2023-12-11 14:25:20 +01:00
if ( $matches ) {
2023-12-11 12:36:54 +01:00
return $matches [ 1 ];
2023-12-11 14:25:20 +01:00
}
2023-12-11 12:36:54 +01:00
return " " ;
2014-11-01 21:00:28 +01:00
}
2023-12-11 12:36:54 +01:00
}