diff --git a/wp-includes/html-api/class-wp-html-tag-processor.php b/wp-includes/html-api/class-wp-html-tag-processor.php
index 541b5baf0b..79d96dc8be 100644
--- a/wp-includes/html-api/class-wp-html-tag-processor.php
+++ b/wp-includes/html-api/class-wp-html-tag-processor.php
@@ -242,6 +242,8 @@
* unquoted values will appear in the output with double-quotes.
*
* @since 6.2.0
+ * @since 6.2.1 Fix: Support for various invalid comments; attribute updates are case-insensitive.
+ * @since 6.3.2 Fix: Skip HTML-like content inside rawtext elements such as STYLE.
*/
class WP_HTML_Tag_Processor {
/**
@@ -568,7 +570,14 @@ class WP_HTML_Tag_Processor {
* of the tag name as a pre-check avoids a string allocation when it's not needed.
*/
$t = $this->html[ $this->tag_name_starts_at ];
- if ( ! $this->is_closing_tag && ( 's' === $t || 'S' === $t || 't' === $t || 'T' === $t ) ) {
+ if (
+ ! $this->is_closing_tag &&
+ (
+ 'i' === $t || 'I' === $t ||
+ 'n' === $t || 'N' === $t ||
+ 's' === $t || 'S' === $t ||
+ 't' === $t || 'T' === $t
+ ) ) {
$tag_name = $this->get_tag();
if ( 'SCRIPT' === $tag_name && ! $this->skip_script_data() ) {
@@ -580,6 +589,25 @@ class WP_HTML_Tag_Processor {
) {
$this->bytes_already_parsed = strlen( $this->html );
return false;
+ } elseif (
+ (
+ 'IFRAME' === $tag_name ||
+ 'NOEMBED' === $tag_name ||
+ 'NOFRAMES' === $tag_name ||
+ 'NOSCRIPT' === $tag_name ||
+ 'STYLE' === $tag_name
+ ) &&
+ ! $this->skip_rawtext( $tag_name )
+ ) {
+ /*
+ * "XMP" should be here too but its rules are more complicated and require the
+ * complexity of the HTML Processor (it needs to close out any open P element,
+ * meaning it can't be skipped here or else the HTML Processor will lose its
+ * place). For now, it can be ignored as it's a rare HTML tag in practice and
+ * any normative HTML should be using PRE instead.
+ */
+ $this->bytes_already_parsed = strlen( $this->html );
+ return false;
}
}
} while ( $already_found < $this->sought_match_offset );
@@ -710,15 +738,33 @@ class WP_HTML_Tag_Processor {
return true;
}
+ /**
+ * Skips contents of generic rawtext elements.
+ *
+ * @since 6.3.2
+ *
+ * @see https://html.spec.whatwg.org/#generic-raw-text-element-parsing-algorithm
+ *
+ * @param string $tag_name The uppercase tag name which will close the RAWTEXT region.
+ * @return bool Whether an end to the RAWTEXT region was found before the end of the document.
+ */
+ private function skip_rawtext( $tag_name ) {
+ /*
+ * These two functions distinguish themselves on whether character references are
+ * decoded, and since functionality to read the inner markup isn't supported, it's
+ * not necessary to implement these two functions separately.
+ */
+ return $this->skip_rcdata( $tag_name );
+ }
/**
- * Skips contents of title and textarea tags.
+ * Skips contents of RCDATA elements, namely title and textarea tags.
*
* @since 6.2.0
*
* @see https://html.spec.whatwg.org/multipage/parsing.html#rcdata-state
*
- * @param string $tag_name The lowercase tag name which will close the RCDATA region.
+ * @param string $tag_name The uppercase tag name which will close the RCDATA region.
* @return bool Whether an end to the RCDATA region was found before the end of the document.
*/
private function skip_rcdata( $tag_name ) {
diff --git a/wp-includes/version.php b/wp-includes/version.php
index 91a967d4c5..934c6dc88c 100644
--- a/wp-includes/version.php
+++ b/wp-includes/version.php
@@ -16,7 +16,7 @@
*
* @global string $wp_version
*/
-$wp_version = '6.4-alpha-56562';
+$wp_version = '6.4-alpha-56563';
/**
* Holds the WordPress DB revision, increments when changes are made to the WordPress DB schema.