diff --git a/htdocs/core/lib/functions.lib.php b/htdocs/core/lib/functions.lib.php index 26030e25e38..98b14060c32 100644 --- a/htdocs/core/lib/functions.lib.php +++ b/htdocs/core/lib/functions.lib.php @@ -8118,13 +8118,12 @@ function dol_htmlwithnojs($stringtoencode, $nouseofiframesandbox = 0, $check = ' // like '

Foo

bar

' that wrongly ends up, without the trick, with '

Foo

bar

' // like 'abc' that wrongly ends up, without the trick, with '

abc

' - // TODO Must accept emoji with MAIN_RESTRICTHTML_ONLY_VALID_HTML... - if (dol_textishtml($out)) { $out = '
'.$out.'
'; } else { $out = '
'.dol_nl2br($out).'
'; } + $dom->loadHTML($out, LIBXML_HTML_NODEFDTD | LIBXML_ERR_NONE | LIBXML_HTML_NOIMPLIED | LIBXML_NONET | LIBXML_NOWARNING | LIBXML_NOERROR | LIBXML_NOXMLDECL); $out = trim($dom->saveHTML()); @@ -8195,7 +8194,7 @@ function dol_htmlwithnojs($stringtoencode, $nouseofiframesandbox = 0, $check = ' }, $out ); - + var_dump($out); // Now we remove all remaining HTML entities starting with a number. We don't want such entities. $out = preg_replace('/&#x?[0-9]+/i', '', $out); // For example if we have javascript with an entities without the ; to hide the 'a' of 'javascript'. diff --git a/htdocs/core/lib/functions2.lib.php b/htdocs/core/lib/functions2.lib.php index 648ec5f5f57..a4eb5ed814c 100644 --- a/htdocs/core/lib/functions2.lib.php +++ b/htdocs/core/lib/functions2.lib.php @@ -2982,3 +2982,62 @@ function removeGlobalParenthesis($string) return $string; } + + +/** + * Return array of Emojis + * + * @return array Array of Emojis in hexadecimal + */ +function getArrayOfEmoji() +{ + $arrayofcommonemoji = array( + 'misc' => array('2600', '26FF'), // Miscellaneous Symbols + 'ding' => array('2700', '27BF'), // Dingbats + '????' => array('9989', '9989'), // Variation Selectors + 'vars' => array('FE00', 'FE0F'), // Variation Selectors + 'pict' => array('1F300', '1F5FF'), // Miscellaneous Symbols and Pictographs + 'emot' => array('1F600', '1F64F'), // Emoticons + 'tran' => array('1F680', '1F6FF'), // Transport and Map Symbols + 'flag' => array('1F1E0', '1F1FF'), // Flags (note: may be 1F1E6 instead of 1F1E0) + 'supp' => array('1F900', '1F9FF'), // Supplemental Symbols and Pictographs + ); + + return $arrayofcommonemoji; +} + +/** + * Remove EMoji from email content + * + * @param string $text String to sanitize + * @param int $allowedemoji Mode to allow emoji + * @return string Sanitized string + */ +function removeEmoji($text, $allowedemoji = 1) +{ + // $allowedemoji can be + // 0=no emoji, 1=exclude the main known emojis (default), 2=keep only the main known (not implemented), 3=accept all + // Note that to accept emoji in database, you must use utf8mb4, utf8mb3 is not enough. + + $arrayofcommonemoji = getArrayOfEmoji(); + + if ($allowedemoji == 0) { + // For a large removal: + $text = preg_replace('/[\x{2600}-\x{FFFF}]/u', '', $text); + $text = preg_replace('/[\x{10000}-\x{10FFFF}]/u', '', $text); + } + + // Delete emoji chars with a regex + // See https://www.unicode.org/emoji/charts/full-emoji-list.html + if ($allowedemoji == 1) { + foreach ($arrayofcommonemoji as $key => $valarray) { + $text = preg_replace('/[\x{'.$valarray[0].'}-\x{'.$valarray[1].'}]/u', '', $text); + } + } + + if ($allowedemoji == 2) { + // TODO Not yet implemented + } + + return $text; +} diff --git a/htdocs/emailcollector/class/emailcollector.class.php b/htdocs/emailcollector/class/emailcollector.class.php index 8a44636eaf8..ba4db5d1b91 100644 --- a/htdocs/emailcollector/class/emailcollector.class.php +++ b/htdocs/emailcollector/class/emailcollector.class.php @@ -28,6 +28,7 @@ include_once DOL_DOCUMENT_ROOT .'/emailcollector/lib/emailcollector.lib.php'; require_once DOL_DOCUMENT_ROOT .'/core/class/commonobject.class.php'; require_once DOL_DOCUMENT_ROOT .'/core/lib/files.lib.php'; +require_once DOL_DOCUMENT_ROOT .'/core/lib/functions2.lib.php'; require_once DOL_DOCUMENT_ROOT .'/comm/propal/class/propal.class.php'; // Customer Proposal require_once DOL_DOCUMENT_ROOT .'/commande/class/commande.class.php'; // Sale Order @@ -1785,7 +1786,7 @@ class EmailCollector extends CommonObject dol_syslog("msgid=".$overview['message_id']." date=".dol_print_date($overview['date'], 'dayrfc', 'gmt')." from=".$overview['from']." to=".$overview['to']." subject=".$overview['subject']); // Removed emojis - $overview['subject'] = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $overview['subject']); + $overview['subject'] = removeEmoji($overview['subject'], getDolGlobalInt('MAIN_EMAIL_COLLECTOR_ACCEPT_EMOJIS', 1)); } else { dol_syslog("msgid=".$overview[0]->message_id." date=".dol_print_date($overview[0]->udate, 'dayrfc', 'gmt')." from=".$overview[0]->from." to=".$overview[0]->to." subject=".$overview[0]->subject); @@ -1794,7 +1795,7 @@ class EmailCollector extends CommonObject $overview[0]->from = $this->decodeSMTPSubject($overview[0]->from); // Removed emojis - $overview[0]->subject = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $overview[0]->subject); + $overview[0]->subject = removeEmoji($overview[0]->subject, getDolGlobalInt('MAIN_EMAIL_COLLECTOR_ACCEPT_EMOJIS', 1)); } // GET IMAP email structure/content global $htmlmsg, $plainmsg, $charset, $attachments; @@ -1825,8 +1826,7 @@ class EmailCollector extends CommonObject // Removed emojis if (utf8_valid($messagetext)) { - //$messagetext = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $messagetext); - $messagetext = $this->removeEmoji($messagetext); + $messagetext = removeEmoji($messagetext, getDolGlobalInt('MAIN_EMAIL_COLLECTOR_ACCEPT_EMOJIS', 1)); } else { $operationslog .= '
Discarded - Email body is not valid utf8'; dol_syslog(" Discarded - Email body is not valid utf8"); @@ -3714,26 +3714,6 @@ class EmailCollector extends CommonObject return $subject; } - /** - * Remove EMoji from email content - * - * @param string $text String to sanitize - * @return string Sanitized string - */ - protected function removeEmoji($text) - { - // Supprimer les caractères emoji en utilisant une expression régulière - $text = preg_replace('/[\x{1F600}-\x{1F64F}]/u', '', $text); - $text = preg_replace('/[\x{1F300}-\x{1F5FF}]/u', '', $text); - $text = preg_replace('/[\x{1F680}-\x{1F6FF}]/u', '', $text); - $text = preg_replace('/[\x{2600}-\x{26FF}]/u', '', $text); - $text = preg_replace('/[\x{2700}-\x{27BF}]/u', '', $text); - $text = preg_replace('/[\x{1F900}-\x{1F9FF}]/u', '', $text); - $text = preg_replace('/[\x{1F1E0}-\x{1F1FF}]/u', '', $text); - - return $text; - } - /** * saveAttachment * diff --git a/htdocs/main.inc.php b/htdocs/main.inc.php index 71294d9e894..344516cae1d 100644 --- a/htdocs/main.inc.php +++ b/htdocs/main.inc.php @@ -58,16 +58,18 @@ if (!empty($_SERVER['MAIN_SHOW_TUNING_INFO'])) { * Return the real char for a numeric entities. * WARNING: This function is required by testSqlAndScriptInject() and the GETPOST 'restricthtml'. Regex calling must be similar. * - * @param string $matches String of numeric entity - * @return string New value + * @param array $matches Array with a decimal numeric entity into key 0, value without the &# into the key 1 + * @return string New value */ function realCharForNumericEntities($matches) { $newstringnumentity = preg_replace('/;$/', '', $matches[1]); //print ' $newstringnumentity='.$newstringnumentity; - if (preg_match('/^x/i', $newstringnumentity)) { + if (preg_match('/^x/i', $newstringnumentity)) { // if numeric is hexadecimal $newstringnumentity = hexdec(preg_replace('/^x/i', '', $newstringnumentity)); + } else { + $newstringnumentity = (int) $newstringnumentity; } // The numeric value we don't want as entities because they encode ascii char, and why using html entities on ascii except for haking ? @@ -75,6 +77,16 @@ function realCharForNumericEntities($matches) return chr((int) $newstringnumentity); } + // The numeric value we want in UTF8 instead of entities because it is emoji + include_once DOL_DOCUMENT_ROOT.'/core/lib/functions2.lib.php'; + $arrayofemojis = getArrayOfEmoji(); + foreach ($arrayofemojis as $valarray) { + if ($newstringnumentity >= hexdec($valarray[0]) && $newstringnumentity <= hexdec($valarray[1])) { + // This is a known emoji + return html_entity_decode($matches[0], ENT_COMPAT | ENT_HTML5, 'UTF-8'); + } + } + return '&#'.$matches[1]; // Value will be unchanged because regex was /&#( )/ } diff --git a/test/phpunit/Functions2LibTest.php b/test/phpunit/Functions2LibTest.php index 725216c6cd6..02fab7f28e8 100644 --- a/test/phpunit/Functions2LibTest.php +++ b/test/phpunit/Functions2LibTest.php @@ -1,7 +1,7 @@ * Copyright (C) 2023 Alexandre Janniaux - * Copyright (C) 2024 MDW + * Copyright (C) 2024 MDW * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by @@ -92,6 +92,8 @@ class Functions2LibTest extends CommonClassTest */ public function testIsValidMailDomain() { + print __METHOD__."\n"; + $mail = 'bidon@invalid.invalid'; $result = isValidMailDomain($mail); $this->assertEquals(0, $result, 'Email isValidMailDomain('.$mail.') should return 0 (not valid) but returned '.$result); @@ -108,6 +110,8 @@ class Functions2LibTest extends CommonClassTest */ public function testIsValidUrl() { + print __METHOD__."\n"; + //Simple check $result = isValidUrl('http://google.com'); $this->assertEquals(1, $result); @@ -283,6 +287,30 @@ class Functions2LibTest extends CommonClassTest { $time = strtotime($time_str); $str = date(DATE_ATOM, $time).PHP_EOL; + print __METHOD__." time=".$time."\n"; $this->assertEquals($expected_week, numero_semaine($time), "Computed week incorrect for $str"); } + + + /** + * Test testRemoveEmoji + * + * @return void + */ + public function testRemoveEmoji() + { + print __METHOD__."\n"; + + $text = 'abc ✅ def'; + $result = removeEmoji($text, 0); + $this->assertEquals('abc def', $result, 'testRemoveEmoji 0'); + + $text = 'abc ✅ def'; + $result = removeEmoji($text, 1); + $this->assertEquals('abc def', $result, 'testRemoveEmoji 1'); + + $text = 'abc ✅ def'; + $result = removeEmoji($text, 2); + $this->assertEquals($text, $result, 'testRemoveEmoji 2'); + } } diff --git a/test/phpunit/SecurityTest.php b/test/phpunit/SecurityTest.php index 928faf56d03..e1d339e27cd 100644 --- a/test/phpunit/SecurityTest.php +++ b/test/phpunit/SecurityTest.php @@ -1110,44 +1110,6 @@ class SecurityTest extends CommonClassTest $this->assertStringContainsString('Bad string syntax to evaluate', $result); } - /** - * testDolHtmlWithNoJs() - * - * @return int - */ - public function testDolHtmlWithNoJs() - { - global $conf; - - $sav1 = $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML; - $sav2 = $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY; - - // Test with an emoji - $test = 'abc ✅ def'; - - $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = 0; - $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = 1; - $result = dol_htmlwithnojs($test); - $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = $sav1; - $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = $sav2; - - print __METHOD__." result for dol_htmlwithnojs and MAIN_RESTRICTHTML_ONLY_VALID_HTML=0 with emoji = ".$result."\n"; - $this->assertEquals($test, $result, 'dol_htmlwithnojs failed with an emoji when MAIN_RESTRICTHTML_ONLY_VALID_HTML=0'); - - /* - $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = 1; - $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = 1; - $result = dol_htmlwithnojs($test); - $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = $sav1; - $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = $sav2; - - print __METHOD__." result for dol_htmlwithnojs and MAIN_RESTRICTHTML_ONLY_VALID_HTML=1 with emoji = ".$result."\n"; - $this->assertEquals($test, $result, 'dol_htmlwithnojs failed with an emoji when MAIN_RESTRICTHTML_ONLY_VALID_HTML=1'); - */ - - return 0; - } - /** * testDolPrintHTML. * This method include calls to dol_htmlwithnojs() @@ -1246,4 +1208,70 @@ class SecurityTest extends CommonClassTest print __METHOD__." login=".$login."\n"; $this->assertEquals('', $login, 'Error'); // Expected '' because should failed because login 'auto' does not exists } + + + /** + * testRealCharforNumericEntities() + * + * @return int + */ + public function testRealCharforNumericEntities() + { + global $conf; + + // Test that testRealCharforNumericEntities return an ascii char when code is inside Ascii range + $arraytmp = array(0 => 'a', 1 => '97;'); + $result = realCharForNumericEntities($arraytmp); + $this->assertEquals('a', $result); + + // Test that testRealCharforNumericEntities return an emoji utf8 char when code is inside Emoji range + $arraytmp = array(0 => '✅', 1 => '9989;'); // Encoded as decimal + $result = realCharForNumericEntities($arraytmp); + $this->assertEquals('✅', $result); + + $arraytmp = array(0 => '✅', 1 => 'x2705;'); // Encoded as hexadecimal + $result = realCharForNumericEntities($arraytmp); + $this->assertEquals('✅', $result); + + return 0; + } + + + /** + * testDolHtmlWithNoJs() + * + * @return int + */ + public function testDolHtmlWithNoJs() + { + global $conf; + + $sav1 = $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML; + $sav2 = $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY; + + // Test with an emoji + $test = 'abc ✅ def'; + + $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = 0; + $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = 1; + $result = dol_htmlwithnojs($test); + $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = $sav1; + $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = $sav2; + + print __METHOD__." result for dol_htmlwithnojs and MAIN_RESTRICTHTML_ONLY_VALID_HTML=0 with emoji = ".$result."\n"; + $this->assertEquals($test, $result, 'dol_htmlwithnojs failed with an emoji when MAIN_RESTRICTHTML_ONLY_VALID_HTML=0'); + + + $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = 1; + $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = 0; + $result = dol_htmlwithnojs($test); + $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = $sav1; + $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = $sav2; + + print __METHOD__." result for dol_htmlwithnojs and MAIN_RESTRICTHTML_ONLY_VALID_HTML=1 with emoji = ".$result."\n"; + $this->assertEquals($test, $result, 'dol_htmlwithnojs failed with an emoji when MAIN_RESTRICTHTML_ONLY_VALID_HTML=1'); + + + return 0; + } }