From cc01f84166bec578829165de99f607212786ab6b Mon Sep 17 00:00:00 2001 From: Matt Donoughe Date: Tue, 10 Dec 2024 17:44:05 -0500 Subject: [PATCH 1/2] fix encoding of non-ascii characters --- src/main/java/com/github/packageurl/PackageURL.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/java/com/github/packageurl/PackageURL.java b/src/main/java/com/github/packageurl/PackageURL.java index 4824b42..a124d92 100644 --- a/src/main/java/com/github/packageurl/PackageURL.java +++ b/src/main/java/com/github/packageurl/PackageURL.java @@ -453,7 +453,7 @@ private static String uriEncode(String source, Charset charset) { else { // Substitution: A '%' followed by the hexadecimal representation of the ASCII value of the replaced character builder.append('%'); - builder.append(Integer.toHexString(b).toUpperCase()); + builder.append(Integer.toHexString(b & 0xff).toUpperCase()); } } return builder.toString(); From 2e0589bed5c9674da0762ad85b3c277efd7f6069 Mon Sep 17 00:00:00 2001 From: Matt Donoughe Date: Tue, 10 Dec 2024 18:02:22 -0500 Subject: [PATCH 2/2] also handle small bytes --- src/main/java/com/github/packageurl/PackageURL.java | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/main/java/com/github/packageurl/PackageURL.java b/src/main/java/com/github/packageurl/PackageURL.java index a124d92..7bc9fe2 100644 --- a/src/main/java/com/github/packageurl/PackageURL.java +++ b/src/main/java/com/github/packageurl/PackageURL.java @@ -28,6 +28,7 @@ import java.nio.charset.StandardCharsets; import java.util.Arrays; import java.util.Collections; +import java.util.Formatter; import java.util.Map; import java.util.Objects; import java.util.TreeMap; @@ -446,14 +447,15 @@ private static String uriEncode(String source, Charset charset) { } StringBuilder builder = new StringBuilder(); + Formatter formatter = new Formatter(builder); for (byte b : source.getBytes(charset)) { if (isUnreserved(b)) { builder.append((char) b); } else { - // Substitution: A '%' followed by the hexadecimal representation of the ASCII value of the replaced character - builder.append('%'); - builder.append(Integer.toHexString(b & 0xff).toUpperCase()); + // Substitution: A '%' followed by the hexadecimal representation of the charset encoded byte value + formatter.format("%%%02X", b); + formatter.flush(); } } return builder.toString();