mirror of
https://github.com/Dolibarr/dolibarr.git
synced 2025-02-20 13:46:52 +01:00
NEW Add function to manipulate emojis
This commit is contained in:
parent
eacae6206b
commit
1b2bad3f42
|
|
@ -8118,13 +8118,12 @@ function dol_htmlwithnojs($stringtoencode, $nouseofiframesandbox = 0, $check = '
|
|||
// like '<h1>Foo</h1><p>bar</p>' that wrongly ends up, without the trick, with '<h1>Foo<p>bar</p></h1>'
|
||||
// like 'abc' that wrongly ends up, without the trick, with '<p>abc</p>'
|
||||
|
||||
// TODO Must accept emoji with MAIN_RESTRICTHTML_ONLY_VALID_HTML...
|
||||
|
||||
if (dol_textishtml($out)) {
|
||||
$out = '<?xml encoding="UTF-8"><div class="tricktoremove">'.$out.'</div>';
|
||||
} else {
|
||||
$out = '<?xml encoding="UTF-8"><div class="tricktoremove">'.dol_nl2br($out).'</div>';
|
||||
}
|
||||
|
||||
$dom->loadHTML($out, LIBXML_HTML_NODEFDTD | LIBXML_ERR_NONE | LIBXML_HTML_NOIMPLIED | LIBXML_NONET | LIBXML_NOWARNING | LIBXML_NOERROR | LIBXML_NOXMLDECL);
|
||||
$out = trim($dom->saveHTML());
|
||||
|
||||
|
|
@ -8195,7 +8194,7 @@ function dol_htmlwithnojs($stringtoencode, $nouseofiframesandbox = 0, $check = '
|
|||
},
|
||||
$out
|
||||
);
|
||||
|
||||
var_dump($out);
|
||||
|
||||
// Now we remove all remaining HTML entities starting with a number. We don't want such entities.
|
||||
$out = preg_replace('/&#x?[0-9]+/i', '', $out); // For example if we have javascript with an entities without the ; to hide the 'a' of 'javascript'.
|
||||
|
|
|
|||
|
|
@ -2982,3 +2982,62 @@ function removeGlobalParenthesis($string)
|
|||
|
||||
return $string;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Return array of Emojis
|
||||
*
|
||||
* @return array Array of Emojis in hexadecimal
|
||||
*/
|
||||
function getArrayOfEmoji()
|
||||
{
|
||||
$arrayofcommonemoji = array(
|
||||
'misc' => array('2600', '26FF'), // Miscellaneous Symbols
|
||||
'ding' => array('2700', '27BF'), // Dingbats
|
||||
'????' => array('9989', '9989'), // Variation Selectors
|
||||
'vars' => array('FE00', 'FE0F'), // Variation Selectors
|
||||
'pict' => array('1F300', '1F5FF'), // Miscellaneous Symbols and Pictographs
|
||||
'emot' => array('1F600', '1F64F'), // Emoticons
|
||||
'tran' => array('1F680', '1F6FF'), // Transport and Map Symbols
|
||||
'flag' => array('1F1E0', '1F1FF'), // Flags (note: may be 1F1E6 instead of 1F1E0)
|
||||
'supp' => array('1F900', '1F9FF'), // Supplemental Symbols and Pictographs
|
||||
);
|
||||
|
||||
return $arrayofcommonemoji;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove EMoji from email content
|
||||
*
|
||||
* @param string $text String to sanitize
|
||||
* @param int $allowedemoji Mode to allow emoji
|
||||
* @return string Sanitized string
|
||||
*/
|
||||
function removeEmoji($text, $allowedemoji = 1)
|
||||
{
|
||||
// $allowedemoji can be
|
||||
// 0=no emoji, 1=exclude the main known emojis (default), 2=keep only the main known (not implemented), 3=accept all
|
||||
// Note that to accept emoji in database, you must use utf8mb4, utf8mb3 is not enough.
|
||||
|
||||
$arrayofcommonemoji = getArrayOfEmoji();
|
||||
|
||||
if ($allowedemoji == 0) {
|
||||
// For a large removal:
|
||||
$text = preg_replace('/[\x{2600}-\x{FFFF}]/u', '', $text);
|
||||
$text = preg_replace('/[\x{10000}-\x{10FFFF}]/u', '', $text);
|
||||
}
|
||||
|
||||
// Delete emoji chars with a regex
|
||||
// See https://www.unicode.org/emoji/charts/full-emoji-list.html
|
||||
if ($allowedemoji == 1) {
|
||||
foreach ($arrayofcommonemoji as $key => $valarray) {
|
||||
$text = preg_replace('/[\x{'.$valarray[0].'}-\x{'.$valarray[1].'}]/u', '', $text);
|
||||
}
|
||||
}
|
||||
|
||||
if ($allowedemoji == 2) {
|
||||
// TODO Not yet implemented
|
||||
}
|
||||
|
||||
return $text;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -28,6 +28,7 @@ include_once DOL_DOCUMENT_ROOT .'/emailcollector/lib/emailcollector.lib.php';
|
|||
|
||||
require_once DOL_DOCUMENT_ROOT .'/core/class/commonobject.class.php';
|
||||
require_once DOL_DOCUMENT_ROOT .'/core/lib/files.lib.php';
|
||||
require_once DOL_DOCUMENT_ROOT .'/core/lib/functions2.lib.php';
|
||||
|
||||
require_once DOL_DOCUMENT_ROOT .'/comm/propal/class/propal.class.php'; // Customer Proposal
|
||||
require_once DOL_DOCUMENT_ROOT .'/commande/class/commande.class.php'; // Sale Order
|
||||
|
|
@ -1785,7 +1786,7 @@ class EmailCollector extends CommonObject
|
|||
dol_syslog("msgid=".$overview['message_id']." date=".dol_print_date($overview['date'], 'dayrfc', 'gmt')." from=".$overview['from']." to=".$overview['to']." subject=".$overview['subject']);
|
||||
|
||||
// Removed emojis
|
||||
$overview['subject'] = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $overview['subject']);
|
||||
$overview['subject'] = removeEmoji($overview['subject'], getDolGlobalInt('MAIN_EMAIL_COLLECTOR_ACCEPT_EMOJIS', 1));
|
||||
} else {
|
||||
dol_syslog("msgid=".$overview[0]->message_id." date=".dol_print_date($overview[0]->udate, 'dayrfc', 'gmt')." from=".$overview[0]->from." to=".$overview[0]->to." subject=".$overview[0]->subject);
|
||||
|
||||
|
|
@ -1794,7 +1795,7 @@ class EmailCollector extends CommonObject
|
|||
$overview[0]->from = $this->decodeSMTPSubject($overview[0]->from);
|
||||
|
||||
// Removed emojis
|
||||
$overview[0]->subject = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $overview[0]->subject);
|
||||
$overview[0]->subject = removeEmoji($overview[0]->subject, getDolGlobalInt('MAIN_EMAIL_COLLECTOR_ACCEPT_EMOJIS', 1));
|
||||
}
|
||||
// GET IMAP email structure/content
|
||||
global $htmlmsg, $plainmsg, $charset, $attachments;
|
||||
|
|
@ -1825,8 +1826,7 @@ class EmailCollector extends CommonObject
|
|||
// Removed emojis
|
||||
|
||||
if (utf8_valid($messagetext)) {
|
||||
//$messagetext = preg_replace('/[\x{10000}-\x{10FFFF}]/u', "\xEF\xBF\xBD", $messagetext);
|
||||
$messagetext = $this->removeEmoji($messagetext);
|
||||
$messagetext = removeEmoji($messagetext, getDolGlobalInt('MAIN_EMAIL_COLLECTOR_ACCEPT_EMOJIS', 1));
|
||||
} else {
|
||||
$operationslog .= '<br>Discarded - Email body is not valid utf8';
|
||||
dol_syslog(" Discarded - Email body is not valid utf8");
|
||||
|
|
@ -3714,26 +3714,6 @@ class EmailCollector extends CommonObject
|
|||
return $subject;
|
||||
}
|
||||
|
||||
/**
|
||||
* Remove EMoji from email content
|
||||
*
|
||||
* @param string $text String to sanitize
|
||||
* @return string Sanitized string
|
||||
*/
|
||||
protected function removeEmoji($text)
|
||||
{
|
||||
// Supprimer les caractères emoji en utilisant une expression régulière
|
||||
$text = preg_replace('/[\x{1F600}-\x{1F64F}]/u', '', $text);
|
||||
$text = preg_replace('/[\x{1F300}-\x{1F5FF}]/u', '', $text);
|
||||
$text = preg_replace('/[\x{1F680}-\x{1F6FF}]/u', '', $text);
|
||||
$text = preg_replace('/[\x{2600}-\x{26FF}]/u', '', $text);
|
||||
$text = preg_replace('/[\x{2700}-\x{27BF}]/u', '', $text);
|
||||
$text = preg_replace('/[\x{1F900}-\x{1F9FF}]/u', '', $text);
|
||||
$text = preg_replace('/[\x{1F1E0}-\x{1F1FF}]/u', '', $text);
|
||||
|
||||
return $text;
|
||||
}
|
||||
|
||||
/**
|
||||
* saveAttachment
|
||||
*
|
||||
|
|
|
|||
|
|
@ -58,16 +58,18 @@ if (!empty($_SERVER['MAIN_SHOW_TUNING_INFO'])) {
|
|||
* Return the real char for a numeric entities.
|
||||
* WARNING: This function is required by testSqlAndScriptInject() and the GETPOST 'restricthtml'. Regex calling must be similar.
|
||||
*
|
||||
* @param string $matches String of numeric entity
|
||||
* @return string New value
|
||||
* @param array<int:string> $matches Array with a decimal numeric entity into key 0, value without the &# into the key 1
|
||||
* @return string New value
|
||||
*/
|
||||
function realCharForNumericEntities($matches)
|
||||
{
|
||||
$newstringnumentity = preg_replace('/;$/', '', $matches[1]);
|
||||
//print ' $newstringnumentity='.$newstringnumentity;
|
||||
|
||||
if (preg_match('/^x/i', $newstringnumentity)) {
|
||||
if (preg_match('/^x/i', $newstringnumentity)) { // if numeric is hexadecimal
|
||||
$newstringnumentity = hexdec(preg_replace('/^x/i', '', $newstringnumentity));
|
||||
} else {
|
||||
$newstringnumentity = (int) $newstringnumentity;
|
||||
}
|
||||
|
||||
// The numeric value we don't want as entities because they encode ascii char, and why using html entities on ascii except for haking ?
|
||||
|
|
@ -75,6 +77,16 @@ function realCharForNumericEntities($matches)
|
|||
return chr((int) $newstringnumentity);
|
||||
}
|
||||
|
||||
// The numeric value we want in UTF8 instead of entities because it is emoji
|
||||
include_once DOL_DOCUMENT_ROOT.'/core/lib/functions2.lib.php';
|
||||
$arrayofemojis = getArrayOfEmoji();
|
||||
foreach ($arrayofemojis as $valarray) {
|
||||
if ($newstringnumentity >= hexdec($valarray[0]) && $newstringnumentity <= hexdec($valarray[1])) {
|
||||
// This is a known emoji
|
||||
return html_entity_decode($matches[0], ENT_COMPAT | ENT_HTML5, 'UTF-8');
|
||||
}
|
||||
}
|
||||
|
||||
return '&#'.$matches[1]; // Value will be unchanged because regex was /&#( )/
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -1,7 +1,7 @@
|
|||
<?php
|
||||
/* Copyright (C) 2010-2012 Laurent Destailleur <eldy@users.sourceforge.net>
|
||||
* Copyright (C) 2023 Alexandre Janniaux <alexandre.janniaux@gmail.com>
|
||||
* Copyright (C) 2024 MDW <mdeweerd@users.noreply.github.com>
|
||||
* Copyright (C) 2024 MDW <mdeweerd@users.noreply.github.com>
|
||||
*
|
||||
* This program is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
|
|
@ -92,6 +92,8 @@ class Functions2LibTest extends CommonClassTest
|
|||
*/
|
||||
public function testIsValidMailDomain()
|
||||
{
|
||||
print __METHOD__."\n";
|
||||
|
||||
$mail = 'bidon@invalid.invalid';
|
||||
$result = isValidMailDomain($mail);
|
||||
$this->assertEquals(0, $result, 'Email isValidMailDomain('.$mail.') should return 0 (not valid) but returned '.$result);
|
||||
|
|
@ -108,6 +110,8 @@ class Functions2LibTest extends CommonClassTest
|
|||
*/
|
||||
public function testIsValidUrl()
|
||||
{
|
||||
print __METHOD__."\n";
|
||||
|
||||
//Simple check
|
||||
$result = isValidUrl('http://google.com');
|
||||
$this->assertEquals(1, $result);
|
||||
|
|
@ -283,6 +287,30 @@ class Functions2LibTest extends CommonClassTest
|
|||
{
|
||||
$time = strtotime($time_str);
|
||||
$str = date(DATE_ATOM, $time).PHP_EOL;
|
||||
print __METHOD__." time=".$time."\n";
|
||||
$this->assertEquals($expected_week, numero_semaine($time), "Computed week incorrect for $str");
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* Test testRemoveEmoji
|
||||
*
|
||||
* @return void
|
||||
*/
|
||||
public function testRemoveEmoji()
|
||||
{
|
||||
print __METHOD__."\n";
|
||||
|
||||
$text = 'abc ✅ def';
|
||||
$result = removeEmoji($text, 0);
|
||||
$this->assertEquals('abc def', $result, 'testRemoveEmoji 0');
|
||||
|
||||
$text = 'abc ✅ def';
|
||||
$result = removeEmoji($text, 1);
|
||||
$this->assertEquals('abc def', $result, 'testRemoveEmoji 1');
|
||||
|
||||
$text = 'abc ✅ def';
|
||||
$result = removeEmoji($text, 2);
|
||||
$this->assertEquals($text, $result, 'testRemoveEmoji 2');
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1110,44 +1110,6 @@ class SecurityTest extends CommonClassTest
|
|||
$this->assertStringContainsString('Bad string syntax to evaluate', $result);
|
||||
}
|
||||
|
||||
/**
|
||||
* testDolHtmlWithNoJs()
|
||||
*
|
||||
* @return int
|
||||
*/
|
||||
public function testDolHtmlWithNoJs()
|
||||
{
|
||||
global $conf;
|
||||
|
||||
$sav1 = $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML;
|
||||
$sav2 = $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY;
|
||||
|
||||
// Test with an emoji
|
||||
$test = 'abc ✅ def';
|
||||
|
||||
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = 0;
|
||||
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = 1;
|
||||
$result = dol_htmlwithnojs($test);
|
||||
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = $sav1;
|
||||
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = $sav2;
|
||||
|
||||
print __METHOD__." result for dol_htmlwithnojs and MAIN_RESTRICTHTML_ONLY_VALID_HTML=0 with emoji = ".$result."\n";
|
||||
$this->assertEquals($test, $result, 'dol_htmlwithnojs failed with an emoji when MAIN_RESTRICTHTML_ONLY_VALID_HTML=0');
|
||||
|
||||
/*
|
||||
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = 1;
|
||||
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = 1;
|
||||
$result = dol_htmlwithnojs($test);
|
||||
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = $sav1;
|
||||
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = $sav2;
|
||||
|
||||
print __METHOD__." result for dol_htmlwithnojs and MAIN_RESTRICTHTML_ONLY_VALID_HTML=1 with emoji = ".$result."\n";
|
||||
$this->assertEquals($test, $result, 'dol_htmlwithnojs failed with an emoji when MAIN_RESTRICTHTML_ONLY_VALID_HTML=1');
|
||||
*/
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* testDolPrintHTML.
|
||||
* This method include calls to dol_htmlwithnojs()
|
||||
|
|
@ -1246,4 +1208,70 @@ class SecurityTest extends CommonClassTest
|
|||
print __METHOD__." login=".$login."\n";
|
||||
$this->assertEquals('', $login, 'Error'); // Expected '' because should failed because login 'auto' does not exists
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* testRealCharforNumericEntities()
|
||||
*
|
||||
* @return int
|
||||
*/
|
||||
public function testRealCharforNumericEntities()
|
||||
{
|
||||
global $conf;
|
||||
|
||||
// Test that testRealCharforNumericEntities return an ascii char when code is inside Ascii range
|
||||
$arraytmp = array(0 => 'a', 1 => '97;');
|
||||
$result = realCharForNumericEntities($arraytmp);
|
||||
$this->assertEquals('a', $result);
|
||||
|
||||
// Test that testRealCharforNumericEntities return an emoji utf8 char when code is inside Emoji range
|
||||
$arraytmp = array(0 => '✅', 1 => '9989;'); // Encoded as decimal
|
||||
$result = realCharForNumericEntities($arraytmp);
|
||||
$this->assertEquals('✅', $result);
|
||||
|
||||
$arraytmp = array(0 => '✅', 1 => 'x2705;'); // Encoded as hexadecimal
|
||||
$result = realCharForNumericEntities($arraytmp);
|
||||
$this->assertEquals('✅', $result);
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/**
|
||||
* testDolHtmlWithNoJs()
|
||||
*
|
||||
* @return int
|
||||
*/
|
||||
public function testDolHtmlWithNoJs()
|
||||
{
|
||||
global $conf;
|
||||
|
||||
$sav1 = $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML;
|
||||
$sav2 = $conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY;
|
||||
|
||||
// Test with an emoji
|
||||
$test = 'abc ✅ def';
|
||||
|
||||
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = 0;
|
||||
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = 1;
|
||||
$result = dol_htmlwithnojs($test);
|
||||
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = $sav1;
|
||||
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = $sav2;
|
||||
|
||||
print __METHOD__." result for dol_htmlwithnojs and MAIN_RESTRICTHTML_ONLY_VALID_HTML=0 with emoji = ".$result."\n";
|
||||
$this->assertEquals($test, $result, 'dol_htmlwithnojs failed with an emoji when MAIN_RESTRICTHTML_ONLY_VALID_HTML=0');
|
||||
|
||||
|
||||
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = 1;
|
||||
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = 0;
|
||||
$result = dol_htmlwithnojs($test);
|
||||
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = $sav1;
|
||||
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = $sav2;
|
||||
|
||||
print __METHOD__." result for dol_htmlwithnojs and MAIN_RESTRICTHTML_ONLY_VALID_HTML=1 with emoji = ".$result."\n";
|
||||
$this->assertEquals($test, $result, 'dol_htmlwithnojs failed with an emoji when MAIN_RESTRICTHTML_ONLY_VALID_HTML=1');
|
||||
|
||||
|
||||
return 0;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
Loading…
Reference in New Issue
Block a user