From 512b9f80df24a5cc99c78dd7336b736453b3d823 Mon Sep 17 00:00:00 2001 From: Jeff Ong Date: Wed, 23 Oct 2024 13:06:06 -0400 Subject: [PATCH] Process inner html of blocks when escaping text content (#719) * WIP working to convert attributes and URLs. * Ensure media is added to local. * Use a list of tokens and sprintf to generate the formatted string. * Only format the string if tokens are present. * Update content with html test. Update remaining tests with inner markup. Try and fix tests. Format tests. * Remove whitespace. * Process all attributes Escape at the end. Provide better translation note. * Refactor token processing to its own class. * Update tests with string replacements and translation. * Check if % exists in the text and escape it. * Add a test case for a localizing text that includes a %. * Add new line at the end of translators note * Reformat numbers in translators note * Update test * Move new line to theme-locale * Handle self closing tags * Refactor how new line is added * Attempt to fix tests * Attempt to fix tests again --------- Co-authored-by: Grant Kinney <1699996+creativecoder@users.noreply.github.com> Co-authored-by: Sarah Norris Co-authored-by: Matias Benedetto --- includes/create-theme/theme-locale.php | 24 ++++ .../create-theme/theme-token-processor.php | 134 ++++++++++++++++++ tests/CbtThemeLocale/escapeTextContent.php | 7 +- .../escapeTextContentOfBlocks.php | 2 +- tests/test-theme-templates.php | 24 +++- 5 files changed, 182 insertions(+), 9 deletions(-) create mode 100644 includes/create-theme/theme-token-processor.php diff --git a/includes/create-theme/theme-locale.php b/includes/create-theme/theme-locale.php index 783d8715..7f105022 100644 --- a/includes/create-theme/theme-locale.php +++ b/includes/create-theme/theme-locale.php @@ -2,6 +2,9 @@ /* * Locale related functionality */ + +require_once __DIR__ . '/theme-token-processor.php'; + class CBT_Theme_Locale { /** @@ -28,6 +31,27 @@ private static function escape_text_content( $string ) { $string = addcslashes( $string, "'" ); + $p = new CBT_Token_Processor( $string ); + $p->process_tokens(); + $text = $p->get_text(); + $tokens = $p->get_tokens(); + $translators_note = $p->get_translators_note(); + + if ( ! empty( $tokens ) ) { + $php_tag = 'get( 'TextDomain' ) . "' ), " . implode( + ', ', + array_map( + function( $token ) { + return "'$token'"; + }, + $tokens + ) + ) . ' ); ?>'; + return $php_tag; + } + return "get( 'TextDomain' ) . "');?>"; } diff --git a/includes/create-theme/theme-token-processor.php b/includes/create-theme/theme-token-processor.php new file mode 100644 index 00000000..a2404e41 --- /dev/null +++ b/includes/create-theme/theme-token-processor.php @@ -0,0 +1,134 @@ +p = new WP_HTML_Tag_Processor( $string ); + } + + /** + * Processes the HTML tags in the string and updates tokens, text, and translators' note. + * + * @param $p The string to process. + * @return void + */ + public function process_tokens() { + while ( $this->p->next_token() ) { + $token_type = $this->p->get_token_type(); + $token_name = strtolower( $this->p->get_token_name() ); + $is_tag_closer = $this->p->is_tag_closer(); + $has_self_closer = $this->p->has_self_closing_flag(); + + if ( '#tag' === $token_type ) { + $this->increment++; + $this->text .= '%' . $this->increment . '$s'; + $token_label = $this->increment . '.'; + + if ( 1 !== $this->increment ) { + $this->translators_note .= ', '; + } + + if ( $is_tag_closer ) { + $this->tokens[] = ""; + $this->translators_note .= $token_label . " is the end of a '" . $token_name . "' HTML element"; + } else { + $token = '<' . $token_name; + $attributes = $this->p->get_attribute_names_with_prefix( '' ); + + foreach ( $attributes as $attr_name ) { + $attr_value = $this->p->get_attribute( $attr_name ); + $token .= $this->process_attribute( $attr_name, $attr_value ); + } + + $token .= '>'; + $this->tokens[] = $token; + + if ( $has_self_closer || 'br' === $token_name ) { + $this->translators_note .= $token_label . " is a '" . $token_name . "' HTML element"; + } else { + $this->translators_note .= $token_label . " is the start of a '" . $token_name . "' HTML element"; + } + } + } else { + // Escape text content. + $temp_text = $this->p->get_modifiable_text(); + + // If the text contains a %, we need to escape it. + if ( false !== strpos( $temp_text, '%' ) ) { + $temp_text = str_replace( '%', '%%', $temp_text ); + } + + $this->text .= $temp_text; + } + } + + if ( ! empty( $this->tokens ) ) { + $this->translators_note .= ' */ '; + } + } + + /** + * Processes individual tag attributes and escapes where necessary. + * + * @param string $attr_name The name of the attribute. + * @param string $attr_value The value of the attribute. + * @return string The processed attribute. + */ + private function process_attribute( $attr_name, $attr_value ) { + $token_part = ''; + if ( empty( $attr_value ) ) { + $token_part .= ' ' . $attr_name; + } elseif ( 'src' === $attr_name ) { + CBT_Theme_Media::add_media_to_local( array( $attr_value ) ); + $relative_src = CBT_Theme_Media::get_media_folder_path_from_url( $attr_value ) . basename( $attr_value ); + $attr_value = "' . esc_url( get_stylesheet_directory_uri() ) . '{$relative_src}"; + $token_part .= ' ' . $attr_name . '="' . $attr_value . '"'; + } elseif ( 'href' === $attr_name ) { + $attr_value = "' . esc_url( '$attr_value' ) . '"; + $token_part .= ' ' . $attr_name . '="' . $attr_value . '"'; + } else { + $token_part .= ' ' . $attr_name . '="' . $attr_value . '"'; + } + + return $token_part; + } + + /** + * Gets the processed text. + * + * @return string + */ + public function get_text() { + return $this->text; + } + + /** + * Gets the processed tokens. + * + * @return array + */ + public function get_tokens() { + return $this->tokens; + } + + /** + * Gets the generated translators' note. + * + * @return string + */ + public function get_translators_note() { + return $this->translators_note; + } +} diff --git a/tests/CbtThemeLocale/escapeTextContent.php b/tests/CbtThemeLocale/escapeTextContent.php index 32609243..82e6a08b 100644 --- a/tests/CbtThemeLocale/escapeTextContent.php +++ b/tests/CbtThemeLocale/escapeTextContent.php @@ -37,9 +37,10 @@ public function test_escape_text_content_with_double_quote() { } public function test_escape_text_content_with_html() { - $string = '

This is a test text with HTML.

'; - $escaped_string = $this->call_private_method( 'escape_text_content', array( $string ) ); - $this->assertEquals( "This is a test text with HTML.

', 'test-locale-theme');?>", $escaped_string ); + $string = '

This is a test text with HTML.

'; + $escaped_string = $this->call_private_method( 'escape_text_content', array( $string ) ); + $expected_output = '\', \'

\' ); ?>'; + $this->assertEquals( $expected_output, $escaped_string ); } public function test_escape_text_content_with_already_escaped_string() { diff --git a/tests/CbtThemeLocale/escapeTextContentOfBlocks.php b/tests/CbtThemeLocale/escapeTextContentOfBlocks.php index 2fee4adf..50e85891 100644 --- a/tests/CbtThemeLocale/escapeTextContentOfBlocks.php +++ b/tests/CbtThemeLocale/escapeTextContentOfBlocks.php @@ -108,7 +108,7 @@ public function data_test_escape_text_content_of_blocks() { ', 'expected_markup' => ' -
El polvo elemental que nos ignora
y que fue el rojo Adán y que es ahora
todos los hombres, y que no veremos.\', \'test-locale-theme\');?>
+
\', \'
\', \'
\' ); ?>
', ), diff --git a/tests/test-theme-templates.php b/tests/test-theme-templates.php index e2842800..15bdc8f0 100644 --- a/tests/test-theme-templates.php +++ b/tests/test-theme-templates.php @@ -159,12 +159,12 @@ public function test_properly_encode_lessthan_and_greaterthan() { public function test_properly_encode_html_markup() { $template = new stdClass(); - $template->content = ' -

Bold text has feelings <> TOO

- '; + $template->content = '

Bold text has feelings <> TOO

'; $escaped_template = CBT_Theme_Templates::escape_text_in_template( $template ); - $this->assertStringContainsString( "Bold text has feelings <> TOO', '');?>", $escaped_template->content ); + $expected_output = '

TOO\', \'\' ), \'\', \'\' ); ?>

'; + + $this->assertStringContainsString( $expected_output, $escaped_template->content ); } public function test_empty_alt_text_is_not_localized() { @@ -262,7 +262,21 @@ public function test_localize_verse() {
Here is some verse to localize
'; $new_template = CBT_Theme_Templates::escape_text_in_template( $template ); - $this->assertStringContainsString( "verse to localize', '');?>", $new_template->content ); + + $expected_output = ' +
\', \'\' ); ?>
+ '; + + $this->assertStringContainsString( $expected_output, $new_template->content ); + } + + public function test_localize_text_with_placeholders() { + $template = new stdClass(); + $template->content = ' +

This is bold text with a %s placeholder

+ '; + $new_template = CBT_Theme_Templates::escape_text_in_template( $template ); + $this->assertStringContainsString( '\', \'\' ); ?>', $new_template->content ); } public function test_localize_table() {