From 2b435abfbb78014b8c6ef0fab1e1576bcee5c76f Mon Sep 17 00:00:00 2001 From: James Hatmaker Date: Fri, 8 Oct 2021 10:39:10 -0600 Subject: [PATCH 1/7] Added a regex replace to remove multiple // in a URI --- src/SitemapParser.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/SitemapParser.php b/src/SitemapParser.php index 27ac19c3..57c35bbf 100644 --- a/src/SitemapParser.php +++ b/src/SitemapParser.php @@ -174,7 +174,8 @@ public function getQueue() { */ public function parse( $url, $url_content = null ) { $this->clean(); - $this->current_url = $this->urlEncode( $url ); + $replacePattern = '/(?current_url = $this->urlEncode( preg_replace($replacePattern, '/', $url )); if ( ! $this->urlValidate( $this->current_url ) ) { throw new WP2StaticException( 'Invalid URL' ); } From 0f7078912374d39d8d92d5d743b90e7e312ff6b0 Mon Sep 17 00:00:00 2001 From: James Hatmaker Date: Fri, 8 Oct 2021 11:14:21 -0600 Subject: [PATCH 2/7] Fixed lint messages --- src/SitemapParser.php | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/SitemapParser.php b/src/SitemapParser.php index 57c35bbf..f8790c5a 100644 --- a/src/SitemapParser.php +++ b/src/SitemapParser.php @@ -174,8 +174,8 @@ public function getQueue() { */ public function parse( $url, $url_content = null ) { $this->clean(); - $replacePattern = '/(?current_url = $this->urlEncode( preg_replace($replacePattern, '/', $url )); + $replace_pattern = '/(?current_url = $this->urlEncode( preg_replace( $replace_pattern, '/', $url ) ); if ( ! $this->urlValidate( $this->current_url ) ) { throw new WP2StaticException( 'Invalid URL' ); } From 5000ded91d7b6f5227830c8700f7f0429fe75199 Mon Sep 17 00:00:00 2001 From: James Hatmaker Date: Fri, 8 Oct 2021 11:57:37 -0600 Subject: [PATCH 3/7] Fix input string | null to just pass in url on null --- src/SitemapParser.php | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/SitemapParser.php b/src/SitemapParser.php index f8790c5a..de40dace 100644 --- a/src/SitemapParser.php +++ b/src/SitemapParser.php @@ -175,7 +175,7 @@ public function getQueue() { public function parse( $url, $url_content = null ) { $this->clean(); $replace_pattern = '/(?current_url = $this->urlEncode( preg_replace( $replace_pattern, '/', $url ) ); + $this->current_url = $this->urlEncode( preg_replace( $replace_pattern, '/', $url ) ?: $url ); if ( ! $this->urlValidate( $this->current_url ) ) { throw new WP2StaticException( 'Invalid URL' ); } From bda40547612aba1f5515dec42d9e4130b711d6c6 Mon Sep 17 00:00:00 2001 From: James Hatmaker Date: Fri, 8 Oct 2021 13:17:13 -0600 Subject: [PATCH 4/7] Attempt to match coding style --- src/SitemapParser.php | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/SitemapParser.php b/src/SitemapParser.php index de40dace..061359bc 100644 --- a/src/SitemapParser.php +++ b/src/SitemapParser.php @@ -175,7 +175,11 @@ public function getQueue() { public function parse( $url, $url_content = null ) { $this->clean(); $replace_pattern = '/(?current_url = $this->urlEncode( preg_replace( $replace_pattern, '/', $url ) ?: $url ); + $check_url = $url; + if ( $clean_url = preg_replace( $replace_pattern, '/', $url ) ) { + $check_url = $clean_url; + } + $this->current_url = $this->urlEncode( $check_url ); if ( ! $this->urlValidate( $this->current_url ) ) { throw new WP2StaticException( 'Invalid URL' ); } From b834d4bd5cceac864fb31330abcc9f33a4366648 Mon Sep 17 00:00:00 2001 From: James Hatmaker Date: Fri, 8 Oct 2021 14:47:14 -0600 Subject: [PATCH 5/7] and again. - sorry --- src/SitemapParser.php | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/SitemapParser.php b/src/SitemapParser.php index 061359bc..d08da512 100644 --- a/src/SitemapParser.php +++ b/src/SitemapParser.php @@ -173,10 +173,9 @@ public function getQueue() { * @throws WP2StaticException */ public function parse( $url, $url_content = null ) { - $this->clean(); - $replace_pattern = '/(?clean(); + if ( $clean_url = preg_replace( '/(?current_url = $this->urlEncode( $check_url ); From 9fbe18017532dd34b658ca882426a0865c48f872 Mon Sep 17 00:00:00 2001 From: James Hatmaker Date: Fri, 8 Oct 2021 16:09:14 -0600 Subject: [PATCH 6/7] Cleaned up code --- src/SitemapParser.php | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/SitemapParser.php b/src/SitemapParser.php index d08da512..0c5e4150 100644 --- a/src/SitemapParser.php +++ b/src/SitemapParser.php @@ -174,8 +174,9 @@ public function getQueue() { */ public function parse( $url, $url_content = null ) { $check_url = $url; + $clean_url = preg_replace( '/(?clean(); - if ( $clean_url = preg_replace( '/(?current_url = $this->urlEncode( $check_url ); From cc158ecfe85c9b2552397383d1f06b81a1ff8046 Mon Sep 17 00:00:00 2001 From: James Hatmaker Date: Fri, 8 Oct 2021 18:52:37 -0600 Subject: [PATCH 7/7] Fix array_map namespace callback issue --- src/SitemapParser.php | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/src/SitemapParser.php b/src/SitemapParser.php index 0c5e4150..2812a67b 100644 --- a/src/SitemapParser.php +++ b/src/SitemapParser.php @@ -247,6 +247,16 @@ protected function getContent() { } } + /** + * callable trim function + * + * @param string $string + * @return string + */ + protected function trim( $string ) { + return trim( $string ); + } + /** * Search for sitemaps in the robots.txt content * @@ -257,7 +267,7 @@ protected function parseRobotstxt( $robotstxt ) { // Split lines into array $lines = array_filter( array_map( - 'trim', + [ $this, 'trim' ], (array) preg_split( '/\r\n|\n|\r/', $robotstxt ) ) ); @@ -274,7 +284,7 @@ protected function parseRobotstxt( $robotstxt ) { $line = $line[0]; // Split by directive and rule - $pair = array_map( 'trim', (array) preg_split( '/:/', $line, 2 ) ); + $pair = array_map( [ $this, 'trim' ], (array) preg_split( '/:/', $line, 2 ) ); // Check if the line contains a sitemap if ( strtolower( $pair[0] ) !== self::XML_TAG_SITEMAP || @@ -379,7 +389,12 @@ protected function parseString( $string ) { // Strings are not part of any documented sitemap standard return false; } - $array = array_filter( array_map( 'trim', (array) preg_split( '/\r\n|\n|\r/', $string ) ) ); + $array = array_filter( + array_map( + [ $this, 'trim' ], + (array) preg_split( '/\r\n|\n|\r/', $string ) + ) + ); foreach ( $array as $line ) { if ( $this->isSitemapURL( $line ) ) { $this->addArray( self::XML_TAG_SITEMAP, [ 'loc' => $line ] );