From 28463f2a450b0fcef6c849772a49b6af91c0a213 Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Mon, 25 Nov 2024 14:07:53 -0500 Subject: [PATCH 1/5] fix: drop redundant final h to recognize rectangles --- pdfminer/converter.py | 5 +++++ tests/test_converter.py | 12 ++++++++++++ 2 files changed, 17 insertions(+) diff --git a/pdfminer/converter.py b/pdfminer/converter.py index 7563c7af..8f985b19 100644 --- a/pdfminer/converter.py +++ b/pdfminer/converter.py @@ -162,6 +162,11 @@ def paint_path( for o, p in zip(operators, transformed_points) ] + # Drop redundant final "h" on an already closed path + if len(shape) > 3 and shape[-1] == "h" and pts[-2] == pts[0]: + shape = shape[:-1] + pts.pop() + if shape in {"mlh", "ml"}: # single line segment # diff --git a/tests/test_converter.py b/tests/test_converter.py index afe01372..271c479c 100644 --- a/tests/test_converter.py +++ b/tests/test_converter.py @@ -81,6 +81,18 @@ def get_types(path): ], ) == [LTRect] + # Same but mllllh variation + assert get_types( + [ + ("m", 10, 90), + ("l", 90, 90), + ("l", 90, 10), + ("l", 10, 10), + ("l", 10, 90), + ('h',) + ], + ) == [LTRect] + # Bowtie shape assert get_types( [ From cf7d2c2df2eb3a7108ad4b4e788b9ccf3e2601eb Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Mon, 25 Nov 2024 14:37:00 -0500 Subject: [PATCH 2/5] chore: changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5425c5d3..bf391ff3 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Changed - Using absolute instead of relative imports ([[#995](https://github.com/pdfminer/pdfminer.six/pull/995)]) +- Remove redundant 'h' to be able to recognize rectangles ([#1066](https://github.com/pdfminer/pdfminer.six/pull/1066)) ### Deprecated From 7a3c66ea271398e7c8ad9a51347e895a9435bf7c Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Mon, 25 Nov 2024 14:40:42 -0500 Subject: [PATCH 3/5] chore: nox --- tests/test_converter.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_converter.py b/tests/test_converter.py index 271c479c..33599e4b 100644 --- a/tests/test_converter.py +++ b/tests/test_converter.py @@ -89,7 +89,7 @@ def get_types(path): ("l", 90, 10), ("l", 10, 10), ("l", 10, 90), - ('h',) + ("h",), ], ) == [LTRect] From d8c9875effb7c01aff8e48cf96c8e833a86de21f Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Mon, 25 Nov 2024 19:46:10 -0500 Subject: [PATCH 4/5] fix: better to leave the final h in place --- pdfminer/converter.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/pdfminer/converter.py b/pdfminer/converter.py index 8f985b19..3ce782e4 100644 --- a/pdfminer/converter.py +++ b/pdfminer/converter.py @@ -162,9 +162,9 @@ def paint_path( for o, p in zip(operators, transformed_points) ] - # Drop redundant final "h" on an already closed path - if len(shape) > 3 and shape[-1] == "h" and pts[-2] == pts[0]: - shape = shape[:-1] + # Drop a redundant "l" on a path closed with "h" + if len(shape) > 3 and shape[-2:] == "lh" and pts[-2] == pts[0]: + shape = shape[:-2] + "h" pts.pop() if shape in {"mlh", "ml"}: From 8445d1663475012242f2a864c6516ee99e2d0c94 Mon Sep 17 00:00:00 2001 From: David Huggins-Daines Date: Mon, 25 Nov 2024 19:48:33 -0500 Subject: [PATCH 5/5] fix(docs): update changelog --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index bf391ff3..79e2bfa8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,7 +8,7 @@ The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). ### Changed - Using absolute instead of relative imports ([[#995](https://github.com/pdfminer/pdfminer.six/pull/995)]) -- Remove redundant 'h' to be able to recognize rectangles ([#1066](https://github.com/pdfminer/pdfminer.six/pull/1066)) +- Remove redundant line to be able to recognize rectangles ([#1066](https://github.com/pdfminer/pdfminer.six/pull/1066)) ### Deprecated