FIX Better sanitizing for javascript. Fix <> bypass.

This commit is contained in:
Laurent Destailleur 2024-07-27 18:07:37 +02:00
parent 73e4aa3147
commit 8ac368ce17
4 changed files with 183 additions and 21 deletions

View File

@ -7859,14 +7859,14 @@ function dol_string_nohtmltag($stringtoclean, $removelinefeed = 1, $pagecodeto =
* Clean a string to keep only desirable HTML tags.
* WARNING: This also clean HTML comments (because they can be used to obfuscate tag name).
*
* @param string $stringtoclean String to clean
* @param int $cleanalsosomestyles Remove absolute/fixed positioning from inline styles
* @param int $removeclassattribute 1=Remove the class attribute from tags
* @param int $cleanalsojavascript Remove also occurrence of 'javascript:'.
* @param int $allowiframe Allow iframe tags.
* @param string[] $allowed_tags List of allowed tags to replace the default list
* @param int $allowlink Allow "link" tags.
* @return string String cleaned
* @param string $stringtoclean String to clean
* @param int $cleanalsosomestyles Remove absolute/fixed positioning from inline styles
* @param int $removeclassattribute 1=Remove the class attribute from tags
* @param int $cleanalsojavascript Remove also occurrence of 'javascript:'.
* @param int $allowiframe Allow iframe tags.
* @param string[] $allowed_tags List of allowed tags to replace the default list
* @param int $allowlink Allow "link" tags.
* @return string String cleaned
*
* @see dol_htmlwithnojs() dol_escape_htmltag() strip_tags() dol_string_nohtmltag() dol_string_neverthesehtmltags()
*/
@ -7904,9 +7904,10 @@ function dol_string_onlythesehtmltags($stringtoclean, $cleanalsosomestyles = 1,
$stringtoclean = preg_replace('/&colon;/i', ':', $stringtoclean);
$stringtoclean = preg_replace('/&#58;|&#0+58|&#x3A/i', '', $stringtoclean); // refused string ':' encoded (no reason to have a : encoded like this) to disable 'javascript:...'
// Remove all HTML tags
$temp = strip_tags($stringtoclean, $allowed_tags_string); // Warning: This remove also undesired </>, so may changes string obfuscated with </> that pass the injection detection into a harmfull string
if ($cleanalsosomestyles) { // Clean for remaining html tags
if ($cleanalsosomestyles) { // Clean for remaining html tags
$temp = preg_replace('/position\s*:\s*(absolute|fixed)\s*!\s*important/i', '', $temp); // Note: If hacker try to introduce css comment into string to bypass this regex, the string must also be encoded by the dol_htmlentitiesbr during output so it become harmless
}
if ($removeclassattribute) { // Clean for remaining html tags
@ -8159,6 +8160,7 @@ function dol_htmlwithnojs($stringtoencode, $nouseofiframesandbox = 0, $check = '
} else {
$out = $stringtoencode;
// First clean HTML content
do {
$oldstringtoclean = $out;
@ -8270,6 +8272,17 @@ function dol_htmlwithnojs($stringtoencode, $nouseofiframesandbox = 0, $check = '
// Restore entity &apos; into &#39; (restricthtml is for html content so we can use html entity)
$out = preg_replace('/&apos;/i', "&#39;", $out);
// Now remove js
// List of dom events is on https://www.w3schools.com/jsref/dom_obj_event.asp and https://developer.mozilla.org/en-US/docs/Web/Events
$out = preg_replace('/on(mouse|drag|key|load|touch|pointer|select|transition)[a-z]*\s*=/i', '', $out); // onmousexxx can be set on img or any html tag like <img title='...' onmouseover=alert(1)>
$out = preg_replace('/on(abort|after|animation|auxclick|before|blur|cancel|canplay|canplaythrough|change|click|close|contextmenu|cuechange|copy|cut)[a-z]*\s*=/i', '', $out);
$out = preg_replace('/on(dblclick|drop|durationchange|emptied|end|ended|error|focus|focusin|focusout|formdata|gotpointercapture|hashchange|input|invalid)[a-z]*\s*=/i', '', $out);
$out = preg_replace('/on(lostpointercapture|offline|online|pagehide|pageshow)[a-z]*\s*=/i', '', $out);
$out = preg_replace('/on(paste|pause|play|playing|progress|ratechange|reset|resize|scroll|search|seeked|seeking|show|stalled|start|submit|suspend)[a-z]*\s*=/i', '', $out);
$out = preg_replace('/on(timeupdate|toggle|unload|volumechange|waiting|wheel)[a-z]*\s*=/i', '', $out);
// More not into the previous list
$out = preg_replace('/on(repeat|begin|finish|beforeinput)[a-z]*\s*=/i', '', $out);
} while ($oldstringtoclean != $out);
// Check the limit of external links that are automatically executed in a Rich text content. We count:

View File

@ -211,6 +211,7 @@ function testSqlAndScriptInject($val, $type)
}
$inj += preg_match('/base\s+href/si', $val);
$inj += preg_match('/=data:/si', $val);
// List of dom events is on https://www.w3schools.com/jsref/dom_obj_event.asp and https://developer.mozilla.org/en-US/docs/Web/Events
$inj += preg_match('/on(mouse|drag|key|load|touch|pointer|select|transition)[a-z]*\s*=/i', $val); // onmousexxx can be set on img or any html tag like <img title='...' onmouseover=alert(1)>
$inj += preg_match('/on(abort|after|animation|auxclick|before|blur|cancel|canplay|canplaythrough|change|click|close|contextmenu|cuechange|copy|cut)[a-z]*\s*=/i', $val);
@ -219,11 +220,12 @@ function testSqlAndScriptInject($val, $type)
$inj += preg_match('/on(paste|pause|play|playing|progress|ratechange|reset|resize|scroll|search|seeked|seeking|show|stalled|start|submit|suspend)[a-z]*\s*=/i', $val);
$inj += preg_match('/on(timeupdate|toggle|unload|volumechange|waiting|wheel)[a-z]*\s*=/i', $val);
// More not into the previous list
$inj += preg_match('/on(repeat|begin|finish|beforeinput)[a-z]*\s*=/i', $val);
// We refuse html into html because some hacks try to obfuscate evil strings by inserting HTML into HTML. Example: <img on<a>error=alert(1) to bypass test on onerror
$tmpval = preg_replace('/<[^<]+>/', '', $val);
// We refuse html into html because some hacks try to obfuscate evil strings by inserting HTML into HTML.
// Example: <img on<a>error=alert(1) or <img onerror<>=alert(1) to bypass test on onerror=
$tmpval = preg_replace('/<[^<]*>/', '', $val);
// List of dom events is on https://www.w3schools.com/jsref/dom_obj_event.asp and https://developer.mozilla.org/en-US/docs/Web/Events
$inj += preg_match('/on(mouse|drag|key|load|touch|pointer|select|transition)[a-z]*\s*=/i', $tmpval); // onmousexxx can be set on img or any html tag like <img title='...' onmouseover=alert(1)>
$inj += preg_match('/on(abort|after|animation|auxclick|before|blur|cancel|canplay|canplaythrough|change|click|close|contextmenu|cuechange|copy|cut)[a-z]*\s*=/i', $tmpval);

View File

@ -0,0 +1,102 @@
<?php
/* Copyright (C) 2010 Laurent Destailleur <eldy@users.sourceforge.net>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <https://www.gnu.org/licenses/>.
* or see https://www.gnu.org/
*/
/**
* \file test/phpunit/ExampleTest.php
* \ingroup test
* \brief PHPUnit test to use as example or test. this one is not called by AllTest.php
* \remarks To run this script as CLI: phpunit filename.php
*/
global $conf,$user,$langs,$db;
//define('TEST_DB_FORCE_TYPE','mysql'); // This is to force using mysql driver
//require_once 'PHPUnit/Autoload.php';
if (! defined('NOREQUIRESOC')) {
define('NOREQUIRESOC', '1');
}
if (! defined('NOCSRFCHECK')) {
define('NOCSRFCHECK', '1');
}
if (! defined('NOTOKENRENEWAL')) {
define('NOTOKENRENEWAL', '1');
}
if (! defined('NOREQUIREMENU')) {
define('NOREQUIREMENU', '1'); // If there is no menu to show
}
if (! defined('NOREQUIREHTML')) {
define('NOREQUIREHTML', '1'); // If we don't need to load the html.form.class.php
}
if (! defined('NOREQUIREAJAX')) {
define('NOREQUIREAJAX', '1');
}
if (! defined("NOLOGIN")) {
define("NOLOGIN", '1'); // If this page is public (can be called outside logged session)
}
if (! defined("NOSESSION")) {
define("NOSESSION", '1');
}
require_once dirname(__FILE__).'/../../htdocs/main.inc.php'; // We force include of main.inc.php instead of master.inc.php even if we are in CLI mode because it contains a lot of security components we want to test.
require_once dirname(__FILE__).'/../../htdocs/core/lib/security.lib.php';
require_once dirname(__FILE__).'/../../htdocs/core/lib/security2.lib.php';
require_once dirname(__FILE__).'/CommonClassTest.class.php';
if (empty($user->id)) {
print "Load permissions for admin user nb 1\n";
$user->fetch(1);
$user->getrights();
}
$conf->global->MAIN_DISABLE_ALL_MAILS = 1;
/**
* Class for PHPUnit tests
*
* @backupGlobals disabled
* @backupStaticAttributes enabled
* @remarks backupGlobals must be disabled to have db,conf,user and lang not erased.
*/
class SecurityTest extends CommonClassTest
{
/**
* testExample
*
* @return string
*/
public function testExample()
{
global $conf,$user,$langs,$db;
$conf = $this->savconf;
$user = $this->savuser;
$langs = $this->savlangs;
$db = $this->savdb;
// Force default mode
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = 0;
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = 0;
$conf->global->MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES = 0;
$conf->global->MAIN_DISALLOW_URL_INTO_DESCRIPTIONS = 0;
/*
$result = testSqlAndScriptInject('<img onerror<>=alert(document.domain)', 0);
print __METHOD__." result=".$result."\n";
$this->assertEquals(1, $result, 'Test example a');
*/
}
}

View File

@ -280,6 +280,14 @@ class SecurityTest extends CommonClassTest
$result = testSqlAndScriptInject($test, 2);
//print "test=".$test." result=".$result."\n";
$this->assertGreaterThanOrEqual($expectedresult, $result, 'Error on testSqlAndScriptInject with a non valid UTF8 char');
$test = '<img onerror<>=alert(document.domain)';
$result = testSqlAndScriptInject($test, 0);
$this->assertEquals($expectedresult, $result, 'Error on testSqlAndScriptInject with an obfuscated string that bypass the WAF');
$test = '<img onerror<abc>=alert(document.domain)';
$result = testSqlAndScriptInject($test, 0);
$this->assertEquals($expectedresult, $result, 'Error on testSqlAndScriptInject with an obfuscated string that bypass the WAF');
}
/**
@ -328,8 +336,8 @@ class SecurityTest extends CommonClassTest
$_POST["param13b"] = '&#110; &#x6E; &gt; &lt; &quot; <a href=\"j&#x61vascript:alert(document.domain)\">XSS</a>';
$_POST["param13c"] = 'aaa:<:bbb';
$_POST["param14"] = "Text with ' encoded with the numeric html entity converted into text entity &#39; (like when submitted by CKEditor)";
$_POST["param15"] = "<img onerror<=alert(document.domain)> src=>0xbeefed";
//$_POST["param15b"]="<html><head><title>Example HTML</title></head><body><div><p>This is a paragraph.</div><ul><li>Item 1</li><li>Item 2</li></ol></body><html>";
$_POST["param15"] = "<img onxxxx<=alert(document.domain)> src=>0xbeefed";
$_POST["param15b"] = "<img onerror<=alert(document.domain)> src=>0xbeefed";
$_POST["param16"] = '<a style="z-index: 1000">abc</a>';
$_POST["param17"] = '<span style="background-image: url(logout.php)">abc</span>';
$_POST["param18"] = '<span style="background-image: url(...?...action=aaa)">abc</span>';
@ -506,14 +514,19 @@ class SecurityTest extends CommonClassTest
print __METHOD__." result=".$result."\n";
$this->assertEquals("Text with ' encoded with the numeric html entity converted into text entity &#39; (like when submitted by CKEditor)", $result, 'Test 14');
$result = GETPOST("param15", 'restricthtml'); // param15 = <img onerror<=alert(document.domain)> src=>0xbeefed that is a dangerous string
$result = GETPOST("param15", 'restricthtml'); // param15 = <img onxxxx<=alert(document.domain)> src=>0xbeefed that is a dangerous string
print __METHOD__." result=".$result."\n";
$this->assertEquals("<img onerror=alert(document.domain) src=>0xbeefed", $result, 'Test 15'); // The GETPOST return a harmull string
$this->assertEquals("<img onxxxx=alert(document.domain) src=>0xbeefed", $result, 'Test 15'); // The GETPOST return a harmull string
$result = GETPOST("param15b", 'restricthtml'); // param15b = <img onerror<=alert(document.domain)> src=>0xbeefed that is a dangerous string
print __METHOD__." result=".$result."\n";
$this->assertEquals("<img alert(document.domain) src=>0xbeefed", $result, 'Test 15b'); // The GETPOST return a harmull string
$result = GETPOST("param19", 'restricthtml');
print __METHOD__." result=".$result."\n";
$this->assertEquals('<a href="&lpar;alert(document.cookie)&rpar;">XSS</a>', $result, 'Test 19');
// Test with restricthtml + MAIN_RESTRICTHTML_ONLY_VALID_HTML only to test disabling of bad attributes
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = 1;
@ -525,8 +538,8 @@ class SecurityTest extends CommonClassTest
print __METHOD__." result for param0=".$result."\n";
$this->assertEquals($resultexpected, $result, 'Test on param0');
$result = GETPOST("param15", 'restricthtml'); // param15 = <img onerror<=alert(document.domain)> src=>0xbeefed that is a dangerous string
print __METHOD__." result for param15=".$result."\n";
$result = GETPOST("param15b", 'restricthtml'); // param15b = <img onerror<=alert(document.domain)> src=>0xbeefed that is a dangerous string
print __METHOD__." result for param15b=".$result."\n";
//$this->assertEquals('InvalidHTMLStringCantBeCleaned', $result, 'Test 15b'); // With some PHP and libxml version, we got this result when parsing invalid HTML, but ...
//$this->assertEquals('<img onerror> src=&gt;0xbeefed', $result, 'Test 15b'); // ... on other PHP and libxml versions, we got a HTML that has been cleaned
@ -552,15 +565,19 @@ class SecurityTest extends CommonClassTest
print __METHOD__." result for param0=".$result."\n";
$this->assertEquals($resultexpected, $result, 'Test on param0');
$result = GETPOST("param15", 'restricthtml'); // param15 = <img onerror<=alert(document.domain)> src=>0xbeefed that is a dangerous string
print __METHOD__." result=".$result."\n";
$result = GETPOST("param15b", 'restricthtml'); // param15b = <img onerror<=alert(document.domain)> src=>0xbeefed that is a dangerous string
print __METHOD__." result for param15b=".$result."\n";
//$this->assertEquals('InvalidHTMLStringCantBeCleaned', $result, 'Test 15b'); // With some PHP and libxml version, we got this result when parsing invalid HTML, but ...
//$this->assertEquals('<img onerror> src=&gt;0xbeefed', $result, 'Test 15b'); // ... on other PHP and libxml versions, we got a HTML that has been cleaned
$result = GETPOST("param6", 'restricthtml');
print __METHOD__." result for param6=".$result." - before=".$_POST["param6"]."\n";
//$this->assertEquals('InvalidHTMLStringCantBeCleaned', $result, 'Test 15b'); // With some PHP and libxml version, we got this result when parsing invalid HTML, but ...
$this->assertEquals('"&gt;', $result);
$result = GETPOST("param7", 'restricthtml');
print __METHOD__." result param7 = ".$result."\n";
//$this->assertEquals('InvalidHTMLStringCantBeCleaned', $result, 'Test 15b'); // With some PHP and libxml version, we got this result when parsing invalid HTML, but ...
$this->assertEquals('"c:\this is a path~1\aaan &amp;#x110;" abcdef', $result);
}
@ -576,15 +593,19 @@ class SecurityTest extends CommonClassTest
print __METHOD__." result for param0=".$result."\n";
$this->assertEquals($resultexpected, $result, 'Test on param0');
$result = GETPOST("param15", 'restricthtml'); // param15 = <img onerror<=alert(document.domain)> src=>0xbeefed that is a dangerous string
$result = GETPOST("param15b", 'restricthtml'); // param15b = <img onerror<=alert(document.domain)> src=>0xbeefed that is a dangerous string
print __METHOD__." result=".$result."\n";
//$this->assertEquals('InvalidHTMLStringCantBeCleaned', $result, 'Test 15b'); // With some PHP and libxml version, we got this result when parsing invalid HTML, but ...
//$this->assertEquals('<img onerror> src=&gt;0xbeefed', $result, 'Test 15b'); // ... on other PHP and libxml versions, we got a HTML that has been cleaned
$result = GETPOST("param6", 'restricthtml');
print __METHOD__." result for param6=".$result." - before=".$_POST["param6"]."\n";
//$this->assertEquals('InvalidHTMLStringCantBeCleaned', $result, 'Test 15b'); // With some PHP and libxml version, we got this result when parsing invalid HTML, but ...
$this->assertEquals('"&gt;', $result);
$result = GETPOST("param7", 'restricthtml');
print __METHOD__." result param7 = ".$result."\n";
//$this->assertEquals('InvalidHTMLStringCantBeCleaned', $result, 'Test 15b'); // With some PHP and libxml version, we got this result when parsing invalid HTML, but ...
$this->assertEquals('"c:\this is a path~1\aaan 110;" abcdef', $result);
}
@ -1208,6 +1229,7 @@ class SecurityTest extends CommonClassTest
print __METHOD__." result=".$result."\n";
$this->assertEquals($stringtotest, $result, 'Error');
$conf->global->MAIN_RESTRICTHTML_REMOVE_ALSO_BAD_ATTRIBUTES = 0;
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = 0;
// Enabled option MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY if possible
@ -1298,6 +1320,29 @@ class SecurityTest extends CommonClassTest
$this->assertEquals($test, $result, 'dol_htmlwithnojs failed with an emoji when MAIN_RESTRICTHTML_ONLY_VALID_HTML=1');
// For a string with js on attribute
// Without HTML_TIDY
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = 0;
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = 0;
$result = dol_htmlwithnojs('<img onerror=alert(document.domain) src=x>', 1, 'restricthtml');
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = $sav1;
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = $sav2;
print __METHOD__." result=".$result."\n";
$this->assertEquals('<img alert(document.domain) src=x>', $result, 'Test example');
// With HTML TIDY
if (extension_loaded('tidy') && class_exists("tidy")) {
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = 0;
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = 1;
$result = dol_htmlwithnojs('<img onerror=alert(document.domain) src=x>', 1, 'restricthtml');
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML = $sav1;
$conf->global->MAIN_RESTRICTHTML_ONLY_VALID_HTML_TIDY = $sav2;
//$result = dol_string_onlythesehtmltags($aa, 0, 1, 1);
print __METHOD__." result=".$result."\n";
$this->assertEquals('<img src="x">', $result, 'Test example');
}
return 0;
}