SEC: Add option MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY

This commit is contained in:
Laurent Destailleur 2023-11-29 20:19:21 +01:00
parent 3a78357253
commit 43f9210ab4
4 changed files with 217 additions and 19 deletions

View File

@ -175,6 +175,7 @@ NEW: When an user unset the batch management of products, transformation of each
SEC: #25512 applicative anti bruteforce - security on too many login attempts (#25520)
SEC: Add action confirm_... as sensitive to need a CSRF token
SEC: Disable not used PHP streams
SEC: Add option MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY
For developers or integrators:

View File

@ -632,10 +632,16 @@ print '<br>';
print '<strong>MAIN_SECURITY_MAXFILESIZE_DOWNLOADED</strong> = '.getDolGlobalString('MAIN_SECURITY_MAXFILESIZE_DOWNLOADED', '<span class="opacitymedium">'.$langs->trans("Undefined").' &nbsp; ('.$langs->trans("Recommended").': 100000000)</span>')."<br>";
print '<br>';
print '<strong>MAIN_RESTRICTHTML_ONLY_VALID_HTML</strong> = '.getDolGlobalString('MAIN_RESTRICTHTML_ONLY_VALID_HTML', '<span class="opacitymedium">'.$langs->trans("Undefined").' &nbsp; ('.$langs->trans("Recommended").': 1)</span>')."<br>";
print '<strong>MAIN_RESTRICTHTML_ONLY_VALID_HTML</strong> = '.(getDolGlobalString('MAIN_RESTRICTHTML_ONLY_VALID_HTML') ? '1' : '<span class="opacitymedium">'.$langs->trans("Undefined").'</span>');
print ' &nbsp; <span class="opacitymedium">('.$langs->trans("Recommended").": 1)</span><br>";
print '<br>';
print '<strong>MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES</strong> = '.getDolGlobalString('MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES', '<span class="opacitymedium">'.$langs->trans("Undefined").' &nbsp; ('.$langs->trans("Recommended").': 1)</span>')."<br>";
print '<strong>MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY</strong> = '.(getDolGlobalString('MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY') ? '1' : '<span class="opacitymedium">'.$langs->trans("Undefined").'</span>');
print ' &nbsp; <span class="opacitymedium">('.$langs->trans("Recommended").': 1) &nbsp; - &nbsp; Module "tidy" must be enabled (currently: '.((extension_loaded('tidy') && class_exists("tidy")) ? 'Enabled' : 'Not available').")</span><br>";
print '<br>';
print '<strong>MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES</strong> = '.(getDolGlobalString('MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES') ? '1' : '<span class="opacitymedium">'.$langs->trans("Undefined").'</span>');
print ' &nbsp; <span class="opacitymedium">('.$langs->trans("Recommended").": 1)</span><br>";
print '<br>';
print '<strong>MAIN_DISALLOW_URL_INTO_DESCRIPTIONS</strong> = '.getDolGlobalString('MAIN_DISALLOW_URL_INTO_DESCRIPTIONS', '<span class="opacitymedium">'.$langs->trans("Undefined").' &nbsp; ('.$langs->trans("Recommended").': 1)</span>')."<br>";

View File

@ -1704,6 +1704,8 @@ function dol_escape_htmltag($stringtoescape, $keepb = 0, $keepn = 0, $noescapeta
{
if ($noescapetags == 'common') {
$noescapetags = 'html,body,a,b,em,hr,i,u,ul,li,br,div,img,font,p,span,strong,table,tr,td,th,tbody,h1,h2,h3,h4,h5,h6,h7,h8,h9';
// Add also html5 tags
$noescapetags .= ',header,footer,nav,section,menu,menuitem';
}
if ($cleanalsojavascript) {
$stringtoescape = dol_string_onlythesehtmltags($stringtoescape, 0, 0, $cleanalsojavascript, 0, array(), 0);
@ -7318,7 +7320,8 @@ function dol_string_onlythesehtmltags($stringtoclean, $cleanalsosomestyles = 1,
if (empty($allowed_tags)) {
$allowed_tags = array(
"html", "head", "meta", "body", "article", "a", "abbr", "b", "blockquote", "br", "cite", "div", "dl", "dd", "dt", "em", "font", "img", "ins", "hr", "i", "li",
"ol", "p", "q", "s", "section", "span", "strike", "strong", "title", "table", "tr", "th", "td", "u", "ul", "sup", "sub", "blockquote", "pre", "h1", "h2", "h3", "h4", "h5", "h6"
"ol", "p", "q", "s", "span", "strike", "strong", "title", "table", "tr", "th", "td", "u", "ul", "sup", "sub", "blockquote", "pre", "h1", "h2", "h3", "h4", "h5", "h6",
"header", "footer", "nav", "section", "menu", "menuitem" // html5 tags
);
}
$allowed_tags[] = "comment"; // this tags is added to manage comment <!--...--> that are replaced into <comment>...</comment>
@ -7605,6 +7608,42 @@ function dol_htmlwithnojs($stringtoencode, $nouseofiframesandbox = 0, $check = '
}
}
if (!empty($out) && getDolGlobalString('MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY') && $check != 'restricthtmlallowunvalid') {
try {
// Try cleaning using tidy
if (extension_loaded('tidy') && class_exists("tidy")) {
//print "aaa".$out."\n";
// See options at https://tidy.sourceforge.net/docs/quickref.html
$config = array(
'clean' => false,
'quote-marks' => false, // do not replace " that are used for real text content (not a string symbol for html attribute) into &quot;
'doctype' => 'strict',
'show-body-only' => true,
"indent-attributes" => false,
"vertical-space" => false,
'ident' => false,
"wrap" => 0
// HTML5 tags
//'new-blocklevel-tags' => 'article aside audio bdi canvas details dialog figcaption figure footer header hgroup main menu menuitem nav section source summary template track video',
//'new-blocklevel-tags' => 'footer header section menu menuitem'
//'new-empty-tags' => 'command embed keygen source track wbr',
//'new-inline-tags' => 'audio command datalist embed keygen mark menuitem meter output progress source time video wbr',
);
// Tidy
$tidy = new tidy();
$out = $tidy->repairString($out, $config, 'utf8');
//print "xxx".$out;exit;
}
} catch (Exception $e) {
// If error, invalid HTML string with no way to clean it
//print $e->getMessage();
$out = 'InvalidHTMLStringCantBeCleaned';
}
}
// Clean some html entities that are useless so text is cleaner
$out = preg_replace('/&(tab|newline);/i', ' ', $out);

View File

@ -176,6 +176,7 @@ class SecurityTest extends PHPUnit\Framework\TestCase
$this->assertEquals($tmplangs->defaultlang, 'malicioustextwithquote_MALICIOUSTEXTWITHQUOTE');
}
/**
* testSqlAndScriptInjectWithPHPUnit
*
@ -379,9 +380,11 @@ class SecurityTest extends PHPUnit\Framework\TestCase
// Force default mode
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = 0;
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = 0;
$conf->global->MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES = 0;
$_COOKIE["id"]=111;
$_POST["param0"]='A real string with <a href="rrr" title="aa&quot;bb">aaa</a> and " and \' and &amp; inside content';
$_GET["param1"]="222";
$_POST["param1"]="333";
$_GET["param2"]='a/b#e(pr)qq-rr\cc';
@ -413,20 +416,27 @@ class SecurityTest extends PHPUnit\Framework\TestCase
$_POST["param19"]='<a href="j&Tab;a&Tab;v&Tab;asc&NewLine;ri&Tab;pt:&lpar;alert(document.cookie)&rpar;">XSS</a>';
//$_POST["param19"]='<a href="javascript:alert(document.cookie)">XSS</a>';
$result=GETPOST('id', 'int'); // Must return nothing
print __METHOD__." result=".$result."\n";
$this->assertEquals($result, '');
$this->assertEquals('', $result);
$result=GETPOST("param1", 'int');
print __METHOD__." result=".$result."\n";
$this->assertEquals($result, 222, 'Test on param1 with no 3rd param');
$this->assertEquals(222, $result, 'Test on param1 with no 3rd param');
$result=GETPOST("param1", 'int', 2);
print __METHOD__." result=".$result."\n";
$this->assertEquals($result, 333, 'Test on param1 with 3rd param = 2');
$this->assertEquals(333, $result, 'Test on param1 with 3rd param = 2');
// Test with alpha
$result=GETPOST("param0", 'alpha'); // a simple format, so " completely removed
$resultexpected = 'A real string with aaa and and \' and & inside content';
print __METHOD__." result=".$result."\n";
$this->assertEquals($resultexpected, $result, 'Test on param0');
$result=GETPOST("param2", 'alpha');
print __METHOD__." result=".$result."\n";
$this->assertEquals($result, $_GET["param2"], 'Test on param2');
@ -472,7 +482,7 @@ class SecurityTest extends PHPUnit\Framework\TestCase
// Test with nohtml
$result=GETPOST("param6", 'nohtml');
print __METHOD__." result=".$result."\n";
print __METHOD__." result6=".$result."\n";
$this->assertEquals('">', $result);
// Test with alpha = alphanohtml. We must convert the html entities like &#110; and disable all entities
@ -525,16 +535,23 @@ class SecurityTest extends PHPUnit\Framework\TestCase
print __METHOD__." result=".$result."\n";
$this->assertEquals('n n > < XSS', $result, 'Test that html entities are decoded with alpha');
// Test with alphawithlgt
$result=GETPOST("param11", 'alphawithlgt');
print __METHOD__." result=".$result."\n";
$this->assertEquals(trim($_POST["param11"]), $result, 'Test an email string with alphawithlgt');
// Test with restricthtml: we must remove html open/close tag and content but not htmlentities (we can decode html entities for ascii chars like &#110;)
$result=GETPOST("param0", 'restricthtml');
$resultexpected = 'A real string with <a href="rrr" title="aa&quot;bb">aaa</a> and " and \' and &amp; inside content';
print __METHOD__." result=".$result."\n";
$this->assertEquals($resultexpected, $result, 'Test on param0');
$result=GETPOST("param6", 'restricthtml');
print __METHOD__." result param6=".$result."\n";
print __METHOD__." result for param6=".$result." - before=".$_POST["param6"]."\n";
$this->assertEquals('&quot;&gt;', $result);
$result=GETPOST("param7", 'restricthtml');
@ -570,19 +587,83 @@ class SecurityTest extends PHPUnit\Framework\TestCase
$this->assertEquals('<a href="&lpar;alert(document.cookie)&rpar;">XSS</a>', $result, 'Test 19');
// Test with restricthtml + MAIN_RESTRICTHTML_ONLY_VALID_HTML to test disabling of bad atrributes
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = 1;
// Test with restricthtml + MAIN_RESTRICTHTML_ONLY_VALID_HTML only to test disabling of bad atrributes
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = 1;
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = 0;
//$_POST["param0"] = 'A real string with <a href="rrr" title="aabb">aaa</a> and " inside content';
$result=GETPOST("param0", 'restricthtml');
$resultexpected = 'A real string with <a href="rrr" title=\'aa"bb\'>aaa</a> and " and \' and &amp; inside content';
print __METHOD__." result for param0=".$result."\n";
$this->assertEquals($resultexpected, $result, 'Test on param0');
$result=GETPOST("param15", 'restricthtml'); // param15 = <img onerror<=alert(document.domain)> src=>0xbeefed that is a dangerous string
print __METHOD__." result=".$result."\n";
// $this->assertEquals('InvalidHTMLStringCantBeCleaned', $result, 'Test 15b'); // With some PHP and libxml version, we got this result when parsing invalid HTML, but ...
print __METHOD__." result for param15=".$result."\n";
//$this->assertEquals('InvalidHTMLStringCantBeCleaned', $result, 'Test 15b'); // With some PHP and libxml version, we got this result when parsing invalid HTML, but ...
//$this->assertEquals('<img onerror> src=&gt;0xbeefed', $result, 'Test 15b'); // ... on other PHP and libxml versions, we got a HTML that has been cleaned
$result=GETPOST("param6", 'restricthtml'); // param6 = "&gt;
print __METHOD__." result for param6=".$result." - before=".$_POST["param6"]."\n";
$this->assertEquals('"&gt;', $result);
$result=GETPOST("param7", 'restricthtml'); // param7 = "c:\this is a path~1\aaa&#110; &#x&#x31;&#x31;&#x30;;" abc<bad>def</bad>
print __METHOD__." result param7 = ".$result."\n";
$this->assertEquals('"c:\this is a path~1\aaan 110;" abcdef', $result);
// Test with restricthtml + MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY only to test disabling of bad atrributes
if (extension_loaded('tidy') && class_exists("tidy")) {
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = 0;
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = 1;
$result=GETPOST("param0", 'restricthtml');
$resultexpected = 'A real string with <a href="rrr" title="aa&quot;bb">aaa</a> and " and \' and & inside content';
print __METHOD__." result for param0=".$result."\n";
$this->assertEquals($resultexpected, $result, 'Test on param0');
$result=GETPOST("param15", 'restricthtml'); // param15 = <img onerror<=alert(document.domain)> src=>0xbeefed that is a dangerous string
print __METHOD__." result=".$result."\n";
$result=GETPOST("param6", 'restricthtml');
print __METHOD__." result for param6=".$result." - before=".$_POST["param6"]."\n";
$this->assertEquals('"&gt;', $result);
$result=GETPOST("param7", 'restricthtml');
print __METHOD__." result param7 = ".$result."\n";
$this->assertEquals('"c:\this is a path~1\aaan &amp;#x110;" abcdef', $result);
}
// Test with restricthtml + MAIN_RESTRICTHTML_ONLY_VALID_HTML + MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY to test disabling of bad atrributes
if (extension_loaded('tidy') && class_exists("tidy")) {
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = 1;
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = 1;
$result=GETPOST("param0", 'restricthtml');
$resultexpected = 'A real string with <a href="rrr" title=\'aa"bb\'>aaa</a> and " and \' and & inside content';
print __METHOD__." result for param0=".$result."\n";
$this->assertEquals($resultexpected, $result, 'Test on param0');
$result=GETPOST("param15", 'restricthtml'); // param15 = <img onerror<=alert(document.domain)> src=>0xbeefed that is a dangerous string
print __METHOD__." result=".$result."\n";
$result=GETPOST("param6", 'restricthtml');
print __METHOD__." result for param6=".$result." - before=".$_POST["param6"]."\n";
$this->assertEquals('"&gt;', $result);
$result=GETPOST("param7", 'restricthtml');
print __METHOD__." result param7 = ".$result."\n";
$this->assertEquals('"c:\this is a path~1\aaan 110;" abcdef', $result);
}
unset($conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML);
// Test with restricthtml + MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES to test disabling of bad atrributes
unset($conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML);
unset($conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY);
$conf->global->MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES = 1;
$result=GETPOST("param15", 'restricthtml');
@ -664,7 +745,7 @@ class SecurityTest extends PHPUnit\Framework\TestCase
/**
* testEncodeDecode
*
* @return number
* @return int
*/
public function testEncodeDecode()
{
@ -686,7 +767,7 @@ class SecurityTest extends PHPUnit\Framework\TestCase
/**
* testDolStringOnlyTheseHtmlTags
*
* @return number
* @return int
*/
public function testDolHTMLEntityDecode()
{
@ -704,7 +785,7 @@ class SecurityTest extends PHPUnit\Framework\TestCase
/**
* testDolStringOnlyTheseHtmlTags
*
* @return number
* @return int
*/
public function testDolStringOnlyTheseHtmlTags()
{
@ -734,7 +815,7 @@ class SecurityTest extends PHPUnit\Framework\TestCase
/**
* testDolStringOnlyTheseHtmlAttributes
*
* @return number
* @return int
*/
public function testDolStringOnlyTheseHtmlAttributes()
{
@ -753,7 +834,7 @@ class SecurityTest extends PHPUnit\Framework\TestCase
/**
* testGetRandomPassword
*
* @return number
* @return int
*/
public function testGetRandomPassword()
{
@ -804,7 +885,7 @@ class SecurityTest extends PHPUnit\Framework\TestCase
/**
* testGetRandomPassword
*
* @return number
* @return int
*/
public function testGetURLContent()
{
@ -1058,6 +1139,77 @@ class SecurityTest extends PHPUnit\Framework\TestCase
}
/**
* testDolPrintHTML.
* This method include calls to dol_htmlwithnojs()
*
* @return int
*/
public function testDolPrintHTML()
{
global $conf;
// Set options for cleaning data
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = 1;
// Enabled option MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY if possible
if (extension_loaded('tidy') && class_exists("tidy")) {
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = 1;
}
$conf->global->MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES = 1;
// For a string that is already HTML (contains HTML tags) with special tags but badly formated
$stringtotest = "&quot;&gt;";
$stringfixed = "&quot;&gt;";
//$result = dol_htmlentitiesbr($stringtotest);
//$result = dol_string_onlythesehtmltags(dol_htmlentitiesbr($stringtotest), 1, 1, 1, 0);
//$result = dol_htmlwithnojs(dol_string_onlythesehtmltags(dol_htmlentitiesbr($stringtotest), 1, 1, 1, 0));
//$result = dol_escape_htmltag(dol_htmlwithnojs(dol_string_onlythesehtmltags(dol_htmlentitiesbr($stringtotest), 1, 1, 1, 0)), 1, 1, 'common', 0, 1);
$result = dolPrintHTML($stringtotest);
print __METHOD__." result=".$result."\n";
$this->assertEquals($stringfixed, $result, 'Error'); // Expected '' because should failed because login 'auto' does not exists
// For a string that is already HTML (contains HTML tags) with special tags but badly formated
$stringtotest = "testA\n<h1>hhhh</h1><z>ddd</z><header>aaa</header><footer>bbb</footer>";
if (getDolGlobalString("MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY")) {
$stringfixed = "testA\n<h1>hhhh</h1>\nddd\n<header>aaa</header>\n<footer>bbb</footer>";
} else {
$stringfixed = "testA\n<h1>hhhh</h1>ddd<header>aaa</header><footer>bbb</footer>";
}
//$result = dol_htmlentitiesbr($stringtotest);
//$result = dol_string_onlythesehtmltags(dol_htmlentitiesbr($stringtotest), 1, 1, 1, 0);
//$result = dol_htmlwithnojs(dol_string_onlythesehtmltags(dol_htmlentitiesbr($stringtotest), 1, 1, 1, 0));
//$result = dol_escape_htmltag(dol_htmlwithnojs(dol_string_onlythesehtmltags(dol_htmlentitiesbr($stringtotest), 1, 1, 1, 0)), 1, 1, 'common', 0, 1);
$result = dolPrintHTML($stringtotest);
print __METHOD__." result=".$result."\n";
$this->assertEquals($stringfixed, $result, 'Error'); // Expected '' because should failed because login 'auto' does not exists
// For a string that is already HTML (contains HTML tags) but badly formated
$stringtotest = "testB\n<h1>hhh</h1>\n<td>td alone</td><h1>iii</h1>";
if (getDolGlobalString("MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY")) {
$stringfixed = "testB\n<h1>hhh</h1>\n<h1>iii</h1>\n<table>\n<tr>\n<td>td alone</td>\n</tr>\n</table>";
} else {
$stringfixed = "testB\n<h1>hhh</h1>\n<td>td alone</td><h1>iii</h1>";
}
$result = dolPrintHTML($stringtotest);
print __METHOD__." result=".$result."\n";
$this->assertEquals($stringfixed, $result, 'Error'); // Expected '' because should failed because login 'auto' does not exists
// For a string with no HTML tags
$stringtotest = "testC\ntest";
$stringfixed = "testC<br>\ntest";
$result = dolPrintHTML($stringtotest);
print __METHOD__." result=".$result."\n";
$this->assertEquals($stringfixed, $result, 'Error'); // Expected '' because should failed because login 'auto' does not exists
return 0;
}
/**
* testCheckLoginPassEntity
*