-
-
Notifications
You must be signed in to change notification settings - Fork 949
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Revert "Revert "SAK-49209 - Avoid double URL encoding - use Base62 fo…
- Loading branch information
Showing
4 changed files
with
279 additions
and
7 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
129 changes: 129 additions & 0 deletions
129
basiclti/tsugi-util/src/java/org/tsugi/util/Base62.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,129 @@ | ||
/** | ||
* This class provides utility methods for encoding and decoding data using the Base62 encoding scheme. | ||
* Base62 is a binary-to-text encoding scheme that represents binary data using a set of 62 alphanumeric characters (A-Z, a-z, and 0-9). | ||
* It is useful for converting binary data (e.g., byte arrays) into strings that are safe for use in URLs and other text-based contexts. | ||
*/ | ||
|
||
/* This code was written using the following ChatGPT prompts: | ||
* | ||
* Can you write a base62 encoder and decoder in java | ||
* Could you make this encode and decode strings and not numbers | ||
* Please complete the implementation details | ||
* Please write the JavaDoc for this class. | ||
* Please write a unit test for this class. | ||
*/ | ||
|
||
package org.tsugi.util; | ||
|
||
import java.util.HashMap; | ||
import java.util.Map; | ||
|
||
public class Base62 { | ||
|
||
/** | ||
* The characters used for Base62 encoding. | ||
*/ | ||
private static final String BASE62_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; | ||
|
||
/** | ||
* A mapping of characters to their corresponding index in the Base62 character set. | ||
*/ | ||
private static final Map<Character, Integer> CHAR_TO_INDEX_MAP = new HashMap<>(); | ||
|
||
/** | ||
* Initializes the CHAR_TO_INDEX_MAP with character-to-index mappings for Base62 characters. | ||
*/ | ||
static { | ||
for (int i = 0; i < BASE62_CHARS.length(); i++) { | ||
CHAR_TO_INDEX_MAP.put(BASE62_CHARS.charAt(i), i); | ||
} | ||
} | ||
|
||
/** | ||
* Encodes a given string into a Base62-encoded string. | ||
* | ||
* @param data The input string to be encoded. | ||
* @return The Base62-encoded string. | ||
*/ | ||
public static String encode(String data) { | ||
if ( data == null ) return null; | ||
byte[] bytes = data.getBytes(); | ||
StringBuilder encoded = new StringBuilder(); | ||
long value = 0; | ||
int bits = 0; | ||
|
||
for (byte b : bytes) { | ||
value = (value << 8) | (b & 0xFF); | ||
bits += 8; | ||
|
||
while (bits >= 6) { | ||
int index = (int) ((value >> (bits - 6)) & 0x3F); | ||
encoded.append(BASE62_CHARS.charAt(index)); | ||
bits -= 6; | ||
} | ||
} | ||
|
||
if (bits > 0) { | ||
int index = (int) ((value << (6 - bits)) & 0x3F); | ||
encoded.append(BASE62_CHARS.charAt(index)); | ||
} | ||
|
||
return encoded.toString(); | ||
} | ||
|
||
/** | ||
* Decodes a Base62-encoded string into its original string representation. | ||
* | ||
* @param base62 The Base62-encoded string to be decoded. | ||
* @return The decoded original string. | ||
* @throws IllegalArgumentException If the input string contains invalid Base62 characters. | ||
*/ | ||
public static String decode(String base62) throws IllegalArgumentException { | ||
|
||
if ( base62 == null ) return null; | ||
|
||
long value = 0; | ||
int bits = 0; | ||
byte[] bytes = new byte[base62.length() * 6 / 8]; | ||
int byteIndex = 0; | ||
|
||
for (int i = 0; i < base62.length(); i++) { | ||
char c = base62.charAt(i); | ||
int charValue = CHAR_TO_INDEX_MAP.getOrDefault(c, -1); | ||
if (charValue == -1) { | ||
throw new IllegalArgumentException("Invalid character in base62 string: " + c); | ||
} | ||
|
||
value = (value << 6) | charValue; | ||
bits += 6; | ||
|
||
while (bits >= 8) { | ||
bytes[byteIndex++] = (byte) ((value >> (bits - 8)) & 0xFF); | ||
bits -= 8; | ||
} | ||
} | ||
|
||
return new String(bytes, 0, byteIndex); | ||
} | ||
|
||
/** | ||
* Decodes a Base62-encoded string into its original string representation or returning the original string | ||
* | ||
* This is a way to easily recover from a double decode. This is a little soft in its approach | ||
* and only works if the strings being encoded and decoded have non-base62 characters. | ||
* | ||
* @param base62 The Base62-encoded string to be decoded. | ||
* @return The decoded original string or the original string if the input string is not Base62 | ||
*/ | ||
public static String decodeSafe(String base62) throws IllegalArgumentException { | ||
|
||
if ( base62 == null ) return null; | ||
|
||
try { | ||
return decode(base62); | ||
} catch (IllegalArgumentException e) { | ||
return base62; | ||
} | ||
} | ||
|
||
} |
142 changes: 142 additions & 0 deletions
142
basiclti/tsugi-util/src/test/org/tsugi/util/Base62Test.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,142 @@ | ||
package org.tsugi.util; | ||
|
||
import static org.junit.Assert.assertEquals; | ||
import static org.junit.Assert.fail; | ||
import static org.junit.Assert.assertFalse; | ||
|
||
import org.junit.Test; | ||
|
||
import org.tsugi.util.Base62; | ||
|
||
/* This code was written by ChatGPT with the following prompt | ||
* Please write a unit test for a Base62 class | ||
*/ | ||
|
||
public class Base62Test { | ||
|
||
@Test | ||
public void testEncodeAndDecode() { | ||
String[] testStrings = { | ||
"Hello, Base62!", | ||
"Testing encoding and decoding.", | ||
"12345", | ||
"This is a longer string with more characters.", | ||
"A tree 🌲 was here", | ||
"A", | ||
"" | ||
}; | ||
|
||
for (String input : testStrings) { | ||
String encoded = Base62.encode(input); | ||
String decoded = Base62.decode(encoded); | ||
assertEquals("Decoding did not produce the original string for input: " + input, input, decoded); | ||
|
||
try { | ||
String urlEncoded = java.net.URLEncoder.encode(encoded, "UTF-8"); | ||
assertEquals("UrlEncoding changed the string: " + encoded, encoded, urlEncoded); | ||
} catch (java.io.UnsupportedEncodingException e) { | ||
fail("Unxpected UnsupportedEncodingException for URLEncoder "+encoded); | ||
} | ||
} | ||
|
||
assertEquals("Encode of null should produce null: ", null, Base62.encode(null)); | ||
assertEquals("Decode of null should produce null: ", null, Base62.decode(null)); | ||
} | ||
|
||
@Test | ||
public void testInvalidCharacters() { | ||
// Test invalid characters in decoding | ||
String invalidBase62String = "InvalidString$#@!"; | ||
try { | ||
Base62.decode(invalidBase62String); | ||
fail("Expected IllegalArgumentException for invalid Base62 string"); | ||
} catch (IllegalArgumentException e) { | ||
// Expected exception | ||
} | ||
assertEquals("Encode of null should produce null: ", null, Base62.encode(null)); | ||
} | ||
|
||
@Test | ||
public void testInvalidCharactersSafe() { | ||
// Test invalid characters in decoding | ||
String invalidBase62String = "InvalidString$#@!"; | ||
String decoded = Base62.decodeSafe(invalidBase62String); | ||
|
||
assertEquals("Safe decode of non-base22 string should return the string: ", invalidBase62String, decoded); | ||
|
||
String encoded = Base62.encode(invalidBase62String); | ||
decoded = Base62.decodeSafe(encoded); | ||
assertEquals("Safe decode of base22 string should return the decoded string: ", invalidBase62String, decoded); | ||
} | ||
|
||
/** | ||
* As is always the case, developers resist the idea of using Base62 with things like | ||
* Base64 URL safe encoding. Which as the example below shows that whilst the resulting | ||
* encoded strings *are* URL Encodable - they will suffer when double URL encoding happens. | ||
*/ | ||
@Test | ||
public void testVerifyJavaBase64UrlSafeEncodingFails() throws Exception { | ||
String[] testStrings = { | ||
"Hello, World!", | ||
"12345", | ||
"abcABC", | ||
"A tree 🌲 was here", | ||
"!@#$%^&*()_+{}[]|\\:;<>,.?/~`" | ||
}; | ||
|
||
int failcount = 0; | ||
|
||
for (String input : testStrings) { | ||
String base64UrlEncoded = java.util.Base64.getUrlEncoder().encodeToString(input.getBytes("UTF-8")); | ||
|
||
// URL-encode the base64 URL encoded string | ||
String urlEncoded = java.net.URLEncoder.encode(base64UrlEncoded, "UTF-8"); | ||
|
||
// Compare the original encoded string with the URL-encoded string | ||
if ( ! base64UrlEncoded.equals(urlEncoded)) failcount ++; | ||
} | ||
assertFalse(failcount == 0); | ||
} | ||
|
||
/** | ||
* Also suggested in code review was the serialization used by io.jsonwebtoken.io.Base64 | ||
* which does not pad. But the writers of said code made it private on purpose - probably | ||
* because it is not precisely Base64 because the lack of padding. | ||
* | ||
* This test might work if it were not for the fact that io.jsonwebtoken.io.Base64 is | ||
* explicitly not public by its creators. Adding this import above will fail to compile: | ||
* | ||
* import io.jsonwebtoken.io.Base64; | ||
* | ||
* [ERROR] Base62Test.java:[10,25] error: Base64 is not public in io.jsonwebtoken.io; cannot be accessed from outside package | ||
* | ||
* The test below might work if it were not for the fact that io.jsonwebtoken.io.Base64 is | ||
* explicitly not public by its creators. Adding this import above will fail to compile: | ||
*/ | ||
|
||
/* | ||
@Test | ||
public void testIoJsonWebTokenBase64UrlIsPrivate() throws Exception { | ||
String[] testStrings = { | ||
"Hello, World!", | ||
"12345", | ||
"abcABC", | ||
"A tree 🌲 was here", | ||
"!@#$%^&*()_+{}[]|\\:;<>,.?/~`" | ||
}; | ||
for (String input : testStrings) { | ||
// Encode using jjwt-api base64 URL encoding | ||
boolean linesep = false; | ||
String base64UrlEncoded = io.jsonwebtoken.io.Base64.URL_SAFE.encodeToString(input.getBytes("UTF-8"), linesep); | ||
// URL-encode the base64 URL encoded string | ||
String urlEncoded = java.net.URLEncoder.encode(base64UrlEncoded, "UTF-8"); | ||
// Compare the original encoded string with the URL-encoded string | ||
assertEquals(base64UrlEncoded, urlEncoded); | ||
} | ||
} | ||
*/ | ||
} | ||
|