From 512b9f80df24a5cc99c78dd7336b736453b3d823 Mon Sep 17 00:00:00 2001
From: Jeff Ong
Date: Wed, 23 Oct 2024 13:06:06 -0400
Subject: [PATCH] Process inner html of blocks when escaping text content
(#719)
* WIP working to convert attributes and URLs.
* Ensure media is added to local.
* Use a list of tokens and sprintf to generate the formatted string.
* Only format the string if tokens are present.
* Update content with html test.
Update remaining tests with inner markup.
Try and fix tests.
Format tests.
* Remove whitespace.
* Process all attributes
Escape at the end.
Provide better translation note.
* Refactor token processing to its own class.
* Update tests with string replacements and translation.
* Check if % exists in the text and escape it.
* Add a test case for a localizing text that includes a %.
* Add new line at the end of translators note
* Reformat numbers in translators note
* Update test
* Move new line to theme-locale
* Handle self closing tags
* Refactor how new line is added
* Attempt to fix tests
* Attempt to fix tests again
---------
Co-authored-by: Grant Kinney <1699996+creativecoder@users.noreply.github.com>
Co-authored-by: Sarah Norris
Co-authored-by: Matias Benedetto
---
includes/create-theme/theme-locale.php | 24 ++++
.../create-theme/theme-token-processor.php | 134 ++++++++++++++++++
tests/CbtThemeLocale/escapeTextContent.php | 7 +-
.../escapeTextContentOfBlocks.php | 2 +-
tests/test-theme-templates.php | 24 +++-
5 files changed, 182 insertions(+), 9 deletions(-)
create mode 100644 includes/create-theme/theme-token-processor.php
diff --git a/includes/create-theme/theme-locale.php b/includes/create-theme/theme-locale.php
index 783d8715..7f105022 100644
--- a/includes/create-theme/theme-locale.php
+++ b/includes/create-theme/theme-locale.php
@@ -2,6 +2,9 @@
/*
* Locale related functionality
*/
+
+require_once __DIR__ . '/theme-token-processor.php';
+
class CBT_Theme_Locale {
/**
@@ -28,6 +31,27 @@ private static function escape_text_content( $string ) {
$string = addcslashes( $string, "'" );
+ $p = new CBT_Token_Processor( $string );
+ $p->process_tokens();
+ $text = $p->get_text();
+ $tokens = $p->get_tokens();
+ $translators_note = $p->get_translators_note();
+
+ if ( ! empty( $tokens ) ) {
+ $php_tag = 'get( 'TextDomain' ) . "' ), " . implode(
+ ', ',
+ array_map(
+ function( $token ) {
+ return "'$token'";
+ },
+ $tokens
+ )
+ ) . ' ); ?>';
+ return $php_tag;
+ }
+
return "get( 'TextDomain' ) . "');?>";
}
diff --git a/includes/create-theme/theme-token-processor.php b/includes/create-theme/theme-token-processor.php
new file mode 100644
index 00000000..a2404e41
--- /dev/null
+++ b/includes/create-theme/theme-token-processor.php
@@ -0,0 +1,134 @@
+p = new WP_HTML_Tag_Processor( $string );
+ }
+
+ /**
+ * Processes the HTML tags in the string and updates tokens, text, and translators' note.
+ *
+ * @param $p The string to process.
+ * @return void
+ */
+ public function process_tokens() {
+ while ( $this->p->next_token() ) {
+ $token_type = $this->p->get_token_type();
+ $token_name = strtolower( $this->p->get_token_name() );
+ $is_tag_closer = $this->p->is_tag_closer();
+ $has_self_closer = $this->p->has_self_closing_flag();
+
+ if ( '#tag' === $token_type ) {
+ $this->increment++;
+ $this->text .= '%' . $this->increment . '$s';
+ $token_label = $this->increment . '.';
+
+ if ( 1 !== $this->increment ) {
+ $this->translators_note .= ', ';
+ }
+
+ if ( $is_tag_closer ) {
+ $this->tokens[] = "{$token_name}>";
+ $this->translators_note .= $token_label . " is the end of a '" . $token_name . "' HTML element";
+ } else {
+ $token = '<' . $token_name;
+ $attributes = $this->p->get_attribute_names_with_prefix( '' );
+
+ foreach ( $attributes as $attr_name ) {
+ $attr_value = $this->p->get_attribute( $attr_name );
+ $token .= $this->process_attribute( $attr_name, $attr_value );
+ }
+
+ $token .= '>';
+ $this->tokens[] = $token;
+
+ if ( $has_self_closer || 'br' === $token_name ) {
+ $this->translators_note .= $token_label . " is a '" . $token_name . "' HTML element";
+ } else {
+ $this->translators_note .= $token_label . " is the start of a '" . $token_name . "' HTML element";
+ }
+ }
+ } else {
+ // Escape text content.
+ $temp_text = $this->p->get_modifiable_text();
+
+ // If the text contains a %, we need to escape it.
+ if ( false !== strpos( $temp_text, '%' ) ) {
+ $temp_text = str_replace( '%', '%%', $temp_text );
+ }
+
+ $this->text .= $temp_text;
+ }
+ }
+
+ if ( ! empty( $this->tokens ) ) {
+ $this->translators_note .= ' */ ';
+ }
+ }
+
+ /**
+ * Processes individual tag attributes and escapes where necessary.
+ *
+ * @param string $attr_name The name of the attribute.
+ * @param string $attr_value The value of the attribute.
+ * @return string The processed attribute.
+ */
+ private function process_attribute( $attr_name, $attr_value ) {
+ $token_part = '';
+ if ( empty( $attr_value ) ) {
+ $token_part .= ' ' . $attr_name;
+ } elseif ( 'src' === $attr_name ) {
+ CBT_Theme_Media::add_media_to_local( array( $attr_value ) );
+ $relative_src = CBT_Theme_Media::get_media_folder_path_from_url( $attr_value ) . basename( $attr_value );
+ $attr_value = "' . esc_url( get_stylesheet_directory_uri() ) . '{$relative_src}";
+ $token_part .= ' ' . $attr_name . '="' . $attr_value . '"';
+ } elseif ( 'href' === $attr_name ) {
+ $attr_value = "' . esc_url( '$attr_value' ) . '";
+ $token_part .= ' ' . $attr_name . '="' . $attr_value . '"';
+ } else {
+ $token_part .= ' ' . $attr_name . '="' . $attr_value . '"';
+ }
+
+ return $token_part;
+ }
+
+ /**
+ * Gets the processed text.
+ *
+ * @return string
+ */
+ public function get_text() {
+ return $this->text;
+ }
+
+ /**
+ * Gets the processed tokens.
+ *
+ * @return array
+ */
+ public function get_tokens() {
+ return $this->tokens;
+ }
+
+ /**
+ * Gets the generated translators' note.
+ *
+ * @return string
+ */
+ public function get_translators_note() {
+ return $this->translators_note;
+ }
+}
diff --git a/tests/CbtThemeLocale/escapeTextContent.php b/tests/CbtThemeLocale/escapeTextContent.php
index 32609243..82e6a08b 100644
--- a/tests/CbtThemeLocale/escapeTextContent.php
+++ b/tests/CbtThemeLocale/escapeTextContent.php
@@ -37,9 +37,10 @@ public function test_escape_text_content_with_double_quote() {
}
public function test_escape_text_content_with_html() {
- $string = 'This is a test text with HTML.
';
- $escaped_string = $this->call_private_method( 'escape_text_content', array( $string ) );
- $this->assertEquals( "This is a test text with HTML.
', 'test-locale-theme');?>", $escaped_string );
+ $string = 'This is a test text with HTML.
';
+ $escaped_string = $this->call_private_method( 'escape_text_content', array( $string ) );
+ $expected_output = '\', \'\' ); ?>';
+ $this->assertEquals( $expected_output, $escaped_string );
}
public function test_escape_text_content_with_already_escaped_string() {
diff --git a/tests/CbtThemeLocale/escapeTextContentOfBlocks.php b/tests/CbtThemeLocale/escapeTextContentOfBlocks.php
index 2fee4adf..50e85891 100644
--- a/tests/CbtThemeLocale/escapeTextContentOfBlocks.php
+++ b/tests/CbtThemeLocale/escapeTextContentOfBlocks.php
@@ -108,7 +108,7 @@ public function data_test_escape_text_content_of_blocks() {
',
'expected_markup' =>
'
- El polvo elemental que nos ignora
y que fue el rojo Adán y que es ahora
todos los hombres, y que no veremos.\', \'test-locale-theme\');?>
+ \', \'
\', \'
\' ); ?>
',
),
diff --git a/tests/test-theme-templates.php b/tests/test-theme-templates.php
index e2842800..15bdc8f0 100644
--- a/tests/test-theme-templates.php
+++ b/tests/test-theme-templates.php
@@ -159,12 +159,12 @@ public function test_properly_encode_lessthan_and_greaterthan() {
public function test_properly_encode_html_markup() {
$template = new stdClass();
- $template->content = '
- Bold text has feelings <> TOO
- ';
+ $template->content = 'Bold text has feelings <> TOO
';
$escaped_template = CBT_Theme_Templates::escape_text_in_template( $template );
- $this->assertStringContainsString( "Bold text has feelings <> TOO', '');?>", $escaped_template->content );
+ $expected_output = ' TOO\', \'\' ), \'\', \'\' ); ?>
';
+
+ $this->assertStringContainsString( $expected_output, $escaped_template->content );
}
public function test_empty_alt_text_is_not_localized() {
@@ -262,7 +262,21 @@ public function test_localize_verse() {
Here is some verse to localize
';
$new_template = CBT_Theme_Templates::escape_text_in_template( $template );
- $this->assertStringContainsString( "verse to localize', '');?>", $new_template->content );
+
+ $expected_output = '
+ \', \'\' ); ?>
+ ';
+
+ $this->assertStringContainsString( $expected_output, $new_template->content );
+ }
+
+ public function test_localize_text_with_placeholders() {
+ $template = new stdClass();
+ $template->content = '
+ This is bold text with a %s placeholder
+ ';
+ $new_template = CBT_Theme_Templates::escape_text_in_template( $template );
+ $this->assertStringContainsString( '\', \'\' ); ?>', $new_template->content );
}
public function test_localize_table() {