From d9cd72430fe5ee46ed4ecef10f083543eec33d92 Mon Sep 17 00:00:00 2001 From: Andrew Nacin Date: Thu, 20 Nov 2014 14:30:37 +0000 Subject: [PATCH] Anchor texturize to shortcodes to improve regex efficiency. For the 4.0 branch; see [30449] for trunk. props miqrogroove. see #29557 for segfault issues. Built from https://develop.svn.wordpress.org/branches/4.0@30450 git-svn-id: http://core.svn.wordpress.org/branches/4.0@30444 1a063a9b-81f0-0310-95a4-ce76da25c4cd --- wp-includes/formatting.php | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/wp-includes/formatting.php b/wp-includes/formatting.php index 1f18ab9d91..e79043bccc 100644 --- a/wp-includes/formatting.php +++ b/wp-includes/formatting.php @@ -28,7 +28,7 @@ * @return string The string replaced with html entities */ function wptexturize($text, $reset = false) { - global $wp_cockneyreplace; + global $wp_cockneyreplace, $shortcode_tags; static $static_characters, $static_replacements, $dynamic_characters, $dynamic_replacements, $default_no_texturize_tags, $default_no_texturize_shortcodes, $run_texturize = true; @@ -205,21 +205,34 @@ function wptexturize($text, $reset = false) { // Look for shortcodes and HTML elements. + $tagnames = array_keys( $shortcode_tags ); + $tagregexp = join( '|', array_map( 'preg_quote', $tagnames ) ); + $tagregexp = "(?:$tagregexp)(?![\\w-])"; // Excerpt of get_shortcode_regex(). + + $comment_regex = + '!' // Start of comment, after the <. + . '(?:' // Unroll the loop: Consume everything until --> is found. + . '-(?!->)' // Dash not followed by end of comment. + . '[^\-]*+' // Consume non-dashes. + . ')*+' // Loop possessively. + . '-->'; // End of comment. + $regex = '/(' // Capture the entire match. . '<' // Find start of element. . '(?(?=!--)' // Is this a comment? - . '.+?--\s*>' // Find end of comment + . $comment_regex // Find end of comment . '|' . '[^>]+>' // Find end of element . ')' . '|' . '\[' // Find start of shortcode. - . '\[?' // Shortcodes may begin with [[ + . '[\/\[]?' // Shortcodes may begin with [/ or [[ + . $tagregexp // Only match registered shortcodes, because performance. . '(?:' - . '[^\[\]<>]' // Shortcodes do not contain other shortcodes. + . '[^\[\]<>]+' // Shortcodes do not contain other shortcodes. Quantifier critical. . '|' - . '<[^>]+>' // HTML elements permitted. Prevents matching ] before >. - . ')++' + . '<[^\[\]>]*>' // HTML elements permitted. Prevents matching ] before >. + . ')*+' // Possessive critical. . '\]' // Find end of shortcode. . '\]?' // Shortcodes may end with ]] . ')/s'; @@ -241,12 +254,12 @@ function wptexturize($text, $reset = false) { continue; - } elseif ( '[' === $first && 1 === preg_match( '/^\[(?:[^\[\]<>]|<[^>]+>)++\]$/', $curl ) ) { + } elseif ( '[' === $first && 1 === preg_match( '/^\[\/?' . $tagregexp . '(?:[^\[\]<>]+|<[^\[\]>]*>)*+\]$/', $curl ) ) { // This is a shortcode delimiter. _wptexturize_pushpop_element( $curl, $no_texturize_shortcodes_stack, $no_texturize_shortcodes ); - } elseif ( '[' === $first && 1 === preg_match( '/^\[\[?(?:[^\[\]<>]|<[^>]+>)++\]\]?$/', $curl ) ) { + } elseif ( '[' === $first && 1 === preg_match( '/^\[[\/\[]?' . $tagregexp . '(?:[^\[\]<>]+|<[^\[\]>]*>)*+\]\]?$/', $curl ) ) { // This is an escaped shortcode delimiter. // Do not texturize.