diff --git a/htdocs/core/lib/functions.lib.php b/htdocs/core/lib/functions.lib.php
index 26030e25e38..98b14060c32 100644
--- a/htdocs/core/lib/functions.lib.php
+++ b/htdocs/core/lib/functions.lib.php
@@ -8118,13 +8118,12 @@ function dol_htmlwithnojs($stringtoencode, $nouseofiframesandbox = 0, $check = '
// like '
Foo
bar
' that wrongly ends up, without the trick, with 'Foo
bar
'
// like 'abc' that wrongly ends up, without the trick, with 'abc
'
- // TODO Must accept emoji with MAIN_RESTRICTHTML_ONLY_VALID_HTML...
-
if (dol_textishtml($out)) {
$out = ''.$out.'
';
} else {
$out = ''.dol_nl2br($out).'
';
}
+
$dom->loadHTML($out, LIBXML_HTML_NODEFDTD | LIBXML_ERR_NONE | LIBXML_HTML_NOIMPLIED | LIBXML_NONET | LIBXML_NOWARNING | LIBXML_NOERROR | LIBXML_NOXMLDECL);
$out = trim($dom->saveHTML());
@@ -8195,7 +8194,7 @@ function dol_htmlwithnojs($stringtoencode, $nouseofiframesandbox = 0, $check = '
},
$out
);
-
+ var_dump($out);
// Now we remove all remaining HTML entities starting with a number. We don't want such entities.
$out = preg_replace('/?[0-9]+/i', '', $out); // For example if we have javascript with an entities without the ; to hide the 'a' of 'javascript'.
diff --git a/htdocs/core/lib/functions2.lib.php b/htdocs/core/lib/functions2.lib.php
index 648ec5f5f57..a4eb5ed814c 100644
--- a/htdocs/core/lib/functions2.lib.php
+++ b/htdocs/core/lib/functions2.lib.php
@@ -2982,3 +2982,62 @@ function removeGlobalParenthesis($string)
return $string;
}
+
+
+/**
+ * Return array of Emojis
+ *
+ * @return array Array of Emojis in hexadecimal
+ */
+function getArrayOfEmoji()
+{
+ $arrayofcommonemoji = array(
+ 'misc' => array('2600', '26FF'), // Miscellaneous Symbols
+ 'ding' => array('2700', '27BF'), // Dingbats
+ '????' => array('9989', '9989'), // Variation Selectors
+ 'vars' => array('FE00', 'FE0F'), // Variation Selectors
+ 'pict' => array('1F300', '1F5FF'), // Miscellaneous Symbols and Pictographs
+ 'emot' => array('1F600', '1F64F'), // Emoticons
+ 'tran' => array('1F680', '1F6FF'), // Transport and Map Symbols
+ 'flag' => array('1F1E0', '1F1FF'), // Flags (note: may be 1F1E6 instead of 1F1E0)
+ 'supp' => array('1F900', '1F9FF'), // Supplemental Symbols and Pictographs
+ );
+
+ return $arrayofcommonemoji;
+}
+
+/**
+ * Remove EMoji from email content
+ *
+ * @param string $text String to sanitize
+ * @param int $allowedemoji Mode to allow emoji
+ * @return string Sanitized string
+ */
+function removeEmoji($text, $allowedemoji = 1)
+{
+ // $allowedemoji can be
+ // 0=no emoji, 1=exclude the main known emojis (default), 2=keep only the main known (not implemented), 3=accept all
+ // Note that to accept emoji in database, you must use utf8mb4, utf8mb3 is not enough.
+
+ $arrayofcommonemoji = getArrayOfEmoji();
+
+ if ($allowedemoji == 0) {
+ // For a large removal:
+ $text = preg_replace('/[\x{2600}-\x{FFFF}]/u', '', $text);
+ $text = preg_replace('/[\x{10000}-\x{10FFFF}]/u', '', $text);
+ }
+
+ // Delete emoji chars with a regex
+ // See https://www.unicode.org/emoji/charts/full-emoji-list.html
+ if ($allowedemoji == 1) {
+ foreach ($arrayofcommonemoji as $key => $valarray) {
+ $text = preg_replace('/[\x{'.$valarray[0].'}-\x{'.$valarray[1].'}]/u', '', $text);
+ }
+ }
+
+ if ($allowedemoji == 2) {
+ // TODO Not yet implemented
+ }
+
+ return $text;
+}
diff --git a/htdocs/emailcollector/class/emailcollector.class.php b/htdocs/emailcollector/class/emailcollector.class.php
index 8a44636eaf8..ba4db5d1b91 100644
--- a/htdocs/emailcollector/class/emailcollector.class.php
+++ b/htdocs/emailcollector/class/emailcollector.class.php
@@ -28,6 +28,7 @@ include_once DOL_DOCUMENT_ROOT .'/emailcollector/lib/emailcollector.lib.php';
require_once DOL_DOCUMENT_ROOT .'/core/class/commonobject.class.php';
require_once DOL_DOCUMENT_ROOT .'/core/lib/files.lib.php';
+require_once DOL_DOCUMENT_ROOT .'/core/lib/functions2.lib.php';
require_once DOL_DOCUMENT_ROOT .'/comm/propal/class/propal.class.php'; // Customer Proposal
require_once DOL_DOCUMENT_ROOT .'/commande/class/commande.class.php'; // Sale Order
@@ -1785,7 +1786,7 @@ class EmailCollector extends CommonObject
dol_syslog("msgid=".$overview['message_id']." date=".dol_print_date($overview['date'], 'dayrfc', 'gmt')." from=".$overview['from']." to=".$overview['to']." subject=".$overview['subject']);
// Removed emojis
- $overview['subject'] = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $overview['subject']);
+ $overview['subject'] = removeEmoji($overview['subject'], getDolGlobalInt('MAIN_EMAIL_COLLECTOR_ACCEPT_EMOJIS', 1));
} else {
dol_syslog("msgid=".$overview[0]->message_id." date=".dol_print_date($overview[0]->udate, 'dayrfc', 'gmt')." from=".$overview[0]->from." to=".$overview[0]->to." subject=".$overview[0]->subject);
@@ -1794,7 +1795,7 @@ class EmailCollector extends CommonObject
$overview[0]->from = $this->decodeSMTPSubject($overview[0]->from);
// Removed emojis
- $overview[0]->subject = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $overview[0]->subject);
+ $overview[0]->subject = removeEmoji($overview[0]->subject, getDolGlobalInt('MAIN_EMAIL_COLLECTOR_ACCEPT_EMOJIS', 1));
}
// GET IMAP email structure/content
global $htmlmsg, $plainmsg, $charset, $attachments;
@@ -1825,8 +1826,7 @@ class EmailCollector extends CommonObject
// Removed emojis
if (utf8_valid($messagetext)) {
- //$messagetext = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $messagetext);
- $messagetext = $this->removeEmoji($messagetext);
+ $messagetext = removeEmoji($messagetext, getDolGlobalInt('MAIN_EMAIL_COLLECTOR_ACCEPT_EMOJIS', 1));
} else {
$operationslog .= '
Discarded - Email body is not valid utf8';
dol_syslog(" Discarded - Email body is not valid utf8");
@@ -3714,26 +3714,6 @@ class EmailCollector extends CommonObject
return $subject;
}
- /**
- * Remove EMoji from email content
- *
- * @param string $text String to sanitize
- * @return string Sanitized string
- */
- protected function removeEmoji($text)
- {
- // Supprimer les caractères emoji en utilisant une expression régulière
- $text = preg_replace('/[\x{1F600}-\x{1F64F}]/u', '', $text);
- $text = preg_replace('/[\x{1F300}-\x{1F5FF}]/u', '', $text);
- $text = preg_replace('/[\x{1F680}-\x{1F6FF}]/u', '', $text);
- $text = preg_replace('/[\x{2600}-\x{26FF}]/u', '', $text);
- $text = preg_replace('/[\x{2700}-\x{27BF}]/u', '', $text);
- $text = preg_replace('/[\x{1F900}-\x{1F9FF}]/u', '', $text);
- $text = preg_replace('/[\x{1F1E0}-\x{1F1FF}]/u', '', $text);
-
- return $text;
- }
-
/**
* saveAttachment
*
diff --git a/htdocs/main.inc.php b/htdocs/main.inc.php
index 71294d9e894..344516cae1d 100644
--- a/htdocs/main.inc.php
+++ b/htdocs/main.inc.php
@@ -58,16 +58,18 @@ if (!empty($_SERVER['MAIN_SHOW_TUNING_INFO'])) {
* Return the real char for a numeric entities.
* WARNING: This function is required by testSqlAndScriptInject() and the GETPOST 'restricthtml'. Regex calling must be similar.
*
- * @param string $matches String of numeric entity
- * @return string New value
+ * @param array $matches Array with a decimal numeric entity into key 0, value without the into the key 1
+ * @return string New value
*/
function realCharForNumericEntities($matches)
{
$newstringnumentity = preg_replace('/;$/', '', $matches[1]);
//print ' $newstringnumentity='.$newstringnumentity;
- if (preg_match('/^x/i', $newstringnumentity)) {
+ if (preg_match('/^x/i', $newstringnumentity)) { // if numeric is hexadecimal
$newstringnumentity = hexdec(preg_replace('/^x/i', '', $newstringnumentity));
+ } else {
+ $newstringnumentity = (int) $newstringnumentity;
}
// The numeric value we don't want as entities because they encode ascii char, and why using html entities on ascii except for haking ?
@@ -75,6 +77,16 @@ function realCharForNumericEntities($matches)
return chr((int) $newstringnumentity);
}
+ // The numeric value we want in UTF8 instead of entities because it is emoji
+ include_once DOL_DOCUMENT_ROOT.'/core/lib/functions2.lib.php';
+ $arrayofemojis = getArrayOfEmoji();
+ foreach ($arrayofemojis as $valarray) {
+ if ($newstringnumentity >= hexdec($valarray[0]) && $newstringnumentity <= hexdec($valarray[1])) {
+ // This is a known emoji
+ return html_entity_decode($matches[0], ENT_COMPAT | ENT_HTML5, 'UTF-8');
+ }
+ }
+
return ''.$matches[1]; // Value will be unchanged because regex was /( )/
}
diff --git a/test/phpunit/Functions2LibTest.php b/test/phpunit/Functions2LibTest.php
index 725216c6cd6..02fab7f28e8 100644
--- a/test/phpunit/Functions2LibTest.php
+++ b/test/phpunit/Functions2LibTest.php
@@ -1,7 +1,7 @@
* Copyright (C) 2023 Alexandre Janniaux
- * Copyright (C) 2024 MDW
+ * Copyright (C) 2024 MDW
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -92,6 +92,8 @@ class Functions2LibTest extends CommonClassTest
*/
public function testIsValidMailDomain()
{
+ print __METHOD__."\n";
+
$mail = 'bidon@invalid.invalid';
$result = isValidMailDomain($mail);
$this->assertEquals(0, $result, 'Email isValidMailDomain('.$mail.') should return 0 (not valid) but returned '.$result);
@@ -108,6 +110,8 @@ class Functions2LibTest extends CommonClassTest
*/
public function testIsValidUrl()
{
+ print __METHOD__."\n";
+
//Simple check
$result = isValidUrl('http://google.com');
$this->assertEquals(1, $result);
@@ -283,6 +287,30 @@ class Functions2LibTest extends CommonClassTest
{
$time = strtotime($time_str);
$str = date(DATE_ATOM, $time).PHP_EOL;
+ print __METHOD__." time=".$time."\n";
$this->assertEquals($expected_week, numero_semaine($time), "Computed week incorrect for $str");
}
+
+
+ /**
+ * Test testRemoveEmoji
+ *
+ * @return void
+ */
+ public function testRemoveEmoji()
+ {
+ print __METHOD__."\n";
+
+ $text = 'abc ✅ def';
+ $result = removeEmoji($text, 0);
+ $this->assertEquals('abc def', $result, 'testRemoveEmoji 0');
+
+ $text = 'abc ✅ def';
+ $result = removeEmoji($text, 1);
+ $this->assertEquals('abc def', $result, 'testRemoveEmoji 1');
+
+ $text = 'abc ✅ def';
+ $result = removeEmoji($text, 2);
+ $this->assertEquals($text, $result, 'testRemoveEmoji 2');
+ }
}
diff --git a/test/phpunit/SecurityTest.php b/test/phpunit/SecurityTest.php
index 928faf56d03..e1d339e27cd 100644
--- a/test/phpunit/SecurityTest.php
+++ b/test/phpunit/SecurityTest.php
@@ -1110,44 +1110,6 @@ class SecurityTest extends CommonClassTest
$this->assertStringContainsString('Bad string syntax to evaluate', $result);
}
- /**
- * testDolHtmlWithNoJs()
- *
- * @return int
- */
- public function testDolHtmlWithNoJs()
- {
- global $conf;
-
- $sav1 = $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML;
- $sav2 = $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY;
-
- // Test with an emoji
- $test = 'abc ✅ def';
-
- $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = 0;
- $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = 1;
- $result = dol_htmlwithnojs($test);
- $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = $sav1;
- $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = $sav2;
-
- print __METHOD__." result for dol_htmlwithnojs and MAIN_RESTRICTHTML_ONLY_VALID_HTML=0 with emoji = ".$result."\n";
- $this->assertEquals($test, $result, 'dol_htmlwithnojs failed with an emoji when MAIN_RESTRICTHTML_ONLY_VALID_HTML=0');
-
- /*
- $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = 1;
- $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = 1;
- $result = dol_htmlwithnojs($test);
- $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = $sav1;
- $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = $sav2;
-
- print __METHOD__." result for dol_htmlwithnojs and MAIN_RESTRICTHTML_ONLY_VALID_HTML=1 with emoji = ".$result."\n";
- $this->assertEquals($test, $result, 'dol_htmlwithnojs failed with an emoji when MAIN_RESTRICTHTML_ONLY_VALID_HTML=1');
- */
-
- return 0;
- }
-
/**
* testDolPrintHTML.
* This method include calls to dol_htmlwithnojs()
@@ -1246,4 +1208,70 @@ class SecurityTest extends CommonClassTest
print __METHOD__." login=".$login."\n";
$this->assertEquals('', $login, 'Error'); // Expected '' because should failed because login 'auto' does not exists
}
+
+
+ /**
+ * testRealCharforNumericEntities()
+ *
+ * @return int
+ */
+ public function testRealCharforNumericEntities()
+ {
+ global $conf;
+
+ // Test that testRealCharforNumericEntities return an ascii char when code is inside Ascii range
+ $arraytmp = array(0 => 'a', 1 => '97;');
+ $result = realCharForNumericEntities($arraytmp);
+ $this->assertEquals('a', $result);
+
+ // Test that testRealCharforNumericEntities return an emoji utf8 char when code is inside Emoji range
+ $arraytmp = array(0 => '✅', 1 => '9989;'); // Encoded as decimal
+ $result = realCharForNumericEntities($arraytmp);
+ $this->assertEquals('✅', $result);
+
+ $arraytmp = array(0 => '✅', 1 => 'x2705;'); // Encoded as hexadecimal
+ $result = realCharForNumericEntities($arraytmp);
+ $this->assertEquals('✅', $result);
+
+ return 0;
+ }
+
+
+ /**
+ * testDolHtmlWithNoJs()
+ *
+ * @return int
+ */
+ public function testDolHtmlWithNoJs()
+ {
+ global $conf;
+
+ $sav1 = $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML;
+ $sav2 = $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY;
+
+ // Test with an emoji
+ $test = 'abc ✅ def';
+
+ $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = 0;
+ $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = 1;
+ $result = dol_htmlwithnojs($test);
+ $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = $sav1;
+ $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = $sav2;
+
+ print __METHOD__." result for dol_htmlwithnojs and MAIN_RESTRICTHTML_ONLY_VALID_HTML=0 with emoji = ".$result."\n";
+ $this->assertEquals($test, $result, 'dol_htmlwithnojs failed with an emoji when MAIN_RESTRICTHTML_ONLY_VALID_HTML=0');
+
+
+ $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = 1;
+ $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = 0;
+ $result = dol_htmlwithnojs($test);
+ $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = $sav1;
+ $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = $sav2;
+
+ print __METHOD__." result for dol_htmlwithnojs and MAIN_RESTRICTHTML_ONLY_VALID_HTML=1 with emoji = ".$result."\n";
+ $this->assertEquals($test, $result, 'dol_htmlwithnojs failed with an emoji when MAIN_RESTRICTHTML_ONLY_VALID_HTML=1');
+
+
+ return 0;
+ }
}