Skip to content

Commit

Permalink
LibURL: Use UTF-8 for percent encoding URL fragments
Browse files Browse the repository at this point in the history
(cherry picked from commit c10cb8ac8d6c897c8fb184d5f0c2a09b8f699b1d)
  • Loading branch information
Gingeh authored and nico committed Nov 15, 2024
1 parent 0206d2d commit 0cf4dd8
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 10 deletions.
37 changes: 32 additions & 5 deletions Tests/LibURL/TestURL.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -339,11 +339,38 @@ TEST_CASE(unicode)

TEST_CASE(query_with_non_ascii)
{
URL::URL url { "http://example.com/?utf8=✓"sv };
EXPECT(url.is_valid());
EXPECT_EQ(url.serialize_path(), "/"sv);
EXPECT_EQ(url.query(), "utf8=%E2%9C%93");
EXPECT(!url.fragment().has_value());
{
URL::URL url = URL::Parser::basic_parse("http://example.com/?utf8=✓"sv);
EXPECT(url.is_valid());
EXPECT_EQ(url.serialize_path(), "/"sv);
EXPECT_EQ(url.query(), "utf8=%E2%9C%93");
EXPECT(!url.fragment().has_value());
}
{
URL::URL url = URL::Parser::basic_parse("http://example.com/?shift_jis=✓"sv, {}, nullptr, {}, "shift_jis"sv);
EXPECT(url.is_valid());
EXPECT_EQ(url.serialize_path(), "/"sv);
EXPECT_EQ(url.query(), "shift_jis=%26%2310003%3B");
EXPECT(!url.fragment().has_value());
}
}

TEST_CASE(fragment_with_non_ascii)
{
{
URL::URL url = URL::Parser::basic_parse("http://example.com/#✓"sv);
EXPECT(url.is_valid());
EXPECT_EQ(url.serialize_path(), "/"sv);
EXPECT(!url.query().has_value());
EXPECT_EQ(url.fragment(), "%E2%9C%93");
}
{
URL::URL url = URL::Parser::basic_parse("http://example.com/#✓"sv, {}, nullptr, {}, "shift_jis"sv);
EXPECT(url.is_valid());
EXPECT_EQ(url.serialize_path(), "/"sv);
EXPECT(!url.query().has_value());
EXPECT_EQ(url.fragment(), "%E2%9C%93");
}
}

TEST_CASE(complete_file_url_with_base)
Expand Down
12 changes: 7 additions & 5 deletions Userland/Libraries/LibURL/Parser.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1688,10 +1688,12 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, URL
break;
// -> query state, https://url.spec.whatwg.org/#query-state
case State::Query:
// FIXME: 1. If encoding is not UTF-8 and one of the following is true:
// * url is not special
// * url’s scheme is "ws" or "wss"
// then set encoding to UTF-8.
// 1. If encoding is not UTF-8 and one of the following is true:
// * url is not special
// * url’s scheme is "ws" or "wss"
// then set encoding to UTF-8.
if (!url->is_special() || url->m_data->scheme == "ws" || url->m_data->scheme == "wss")
encoder = TextCodec::encoder_for("utf-8"sv);

// 2. If one of the following is true:
// * state override is not given and c is U+0023 (#)
Expand Down Expand Up @@ -1746,7 +1748,7 @@ URL Parser::basic_parse(StringView raw_input, Optional<URL> const& base_url, URL
// NOTE: The percent-encode is done on EOF on the entire buffer.
buffer.append_code_point(code_point);
} else {
url->m_data->fragment = percent_encode_after_encoding(*encoder, buffer.string_view(), PercentEncodeSet::Fragment);
url->m_data->fragment = percent_encode_after_encoding(*TextCodec::encoder_for("utf-8"sv), buffer.string_view(), PercentEncodeSet::Fragment);
buffer.clear();
}
break;
Expand Down

0 comments on commit 0cf4dd8

Please sign in to comment.