Skip to content

Commit

Permalink
feat: add snbt slash u parse
Browse files Browse the repository at this point in the history
  • Loading branch information
OEOTYAN committed Oct 6, 2023
1 parent 1a1e32b commit 7bf74f2
Show file tree
Hide file tree
Showing 4 changed files with 101 additions and 12 deletions.
14 changes: 10 additions & 4 deletions src/liteloader/test/TestNbt.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,8 @@ LL_AUTO_TYPED_INSTANCE_HOOK(
}
};

nbt["some"]["new"]["compound"] = nbt;
nbt["hello"]["world"]["s56"] = StringTag{R"(\n\t\r\b\u1234\uffffffff)"};
nbt["some"]["new"]["compound"] = nbt;
nbt["hello"]["world"]["\u123456"] = StringTag{R"(\n\t\r\b\u1234\uffffffff)"};


auto nbt2 = *CompoundTag::fromSnbt(R"(
Expand All @@ -57,7 +57,7 @@ LL_AUTO_TYPED_INSTANCE_HOOK(
},
"hello": {
"world": {
"s56": "\\n\\t\\r\\b\\u1234\\uffffffff"
"\u123456": "\\n\\t\\r\\b\\u1234\\uffffffff"
}
},
"intarray": [I;1, 2, 3, 4, 5, -2, -3, -6],
Expand Down Expand Up @@ -98,13 +98,19 @@ LL_AUTO_TYPED_INSTANCE_HOOK(

ll::logger.info("\n{}", nbt.toBinaryNBT() == nbt2.toBinaryNBT());

ll::logger.info("\n{}", nbt.toNetworkNBT() == nbt2.toNetworkNBT());

ll::logger.info("\n{}", StringTag{nbt.toBinaryNBT()}.toSnbt(SnbtFormat::PrettyConsolePrint));
ll::logger.info("\n{}", StringTag{nbt2.toBinaryNBT()}.toSnbt(SnbtFormat::PrettyConsolePrint));

ll::logger.info("\n{}", StringTag{nbt.toNetworkNBT()}.toSnbt(SnbtFormat::PrettyConsolePrint));
ll::logger.info("\n{}", StringTag{nbt2.toNetworkNBT()}.toSnbt(SnbtFormat::PrettyConsolePrint));

ll::logger.info("\n{}", nbt.toNetworkNBT() == nbt2.toNetworkNBT());
ll::logger.info(
"\n{}",
((StringTag*)(Tag::parseSnbt(StringTag{nbt2.toNetworkNBT()}.toSnbt()).get()))
->toSnbt(SnbtFormat::PrettyConsolePrint | SnbtFormat::ForceAscii)
);

ll::logger.info("\n{}", nbt.toNetworkNBT() == nbt.toNetworkNBT());

Expand Down
89 changes: 81 additions & 8 deletions src/mc/nbt/SnbtParseImpl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,13 +143,33 @@ std::optional<CompoundTagVariant> parseNumber(std::string_view& s) {

bool isTrivialChar(char c) { return isalnum(c) || c == '-' || c == '+' || c == '_' || c == '.'; }

int get_codepoint(std::string_view& s) {
int codepoint = 0;

for (const auto factor : {12u, 8u, 4u, 0u}) {
auto current = get(s);

if (current >= '0' && current <= '9') {
codepoint += static_cast<int>((static_cast<unsigned int>(current) - 0x30u) << factor);
} else if (current >= 'A' && current <= 'F') {
codepoint += static_cast<int>((static_cast<unsigned int>(current) - 0x37u) << factor);
} else if (current >= 'a' && current <= 'f') {
codepoint += static_cast<int>((static_cast<unsigned int>(current) - 0x57u) << factor);
} else {
return -1;
}
}

return codepoint;
}

std::optional<std::string> parseString(std::string_view& s) {

char starts = s.front();

if (starts != '\"' && starts != '\'' && !isTrivialChar(starts)) { return std::nullopt; }

auto res = std::string{};
auto res = std::vector<char>{};

if (starts == '\"' || starts == '\'') {
s.remove_prefix(1);
Expand All @@ -160,7 +180,7 @@ std::optional<std::string> parseString(std::string_view& s) {
s.remove_prefix(1);
res.push_back(fc);
} else {
return res;
return std::string{res.begin(), res.end()};
}
}
}
Expand All @@ -174,11 +194,11 @@ std::optional<std::string> parseString(std::string_view& s) {

// closing quote
case '\"': {
if (starts == '\"') { return res; }
if (starts == '\"') { return std::string{res.begin(), res.end()}; }
res.push_back('\"');
} break;
case '\'': {
if (starts == '\'') { return res; }
if (starts == '\'') { return std::string{res.begin(), res.end()}; }
res.push_back('\'');
} break;

Expand Down Expand Up @@ -237,8 +257,63 @@ std::optional<std::string> parseString(std::string_view& s) {
// unicode escapes
case 'u': {

return std::nullopt; // TODO
}
const int codepoint1 = get_codepoint(s);
int codepoint = codepoint1; // start with codepoint1

if (codepoint1 == -1) { return std::nullopt; }

// check if code point is a high surrogate
if (0xD800 <= codepoint1 && codepoint1 <= 0xDBFF) {
// expect next \uxxxx entry
if (get(s) == '\\' && get(s) == 'u') {
const int codepoint2 = get_codepoint(s);

if (codepoint2 == -1) { return std::nullopt; }

// check if codepoint2 is a low surrogate
if ((0xDC00 <= codepoint2 && codepoint2 <= 0xDFFF)) {
// overwrite codepoint
codepoint = static_cast<int>(
// high surrogate occupies the most significant 22 bits
(static_cast<uint>(codepoint1) << 10u)
// low surrogate occupies the least significant 15 bits
+ static_cast<uint>(codepoint2)
// there is still the 0xD800, 0xDC00 and 0x10000 noise
// in the result, so we have to subtract with:
// (0xD800 << 10) + DC00 - 0x10000 = 0x35FDC00
- 0x35FDC00u
);
} else {
return std::nullopt;
}
} else {
return std::nullopt;
}
} else {
if (0xDC00 <= codepoint1 && codepoint1 <= 0xDFFF) { return std::nullopt; }
}

// translate codepoint into bytes
if (codepoint < 0x80) {
// 1-byte characters: 0xxxxxxx (ASCII)
res.push_back(static_cast<char>(codepoint));
} else if (codepoint <= 0x7FF) {
// 2-byte characters: 110xxxxx 10xxxxxx
res.push_back(static_cast<char>(0xC0u | (static_cast<uint>(codepoint) >> 6u)));
res.push_back(static_cast<char>(0x80u | (static_cast<uint>(codepoint) & 0x3Fu)));
} else if (codepoint <= 0xFFFF) {
// 3-byte characters: 1110xxxx 10xxxxxx 10xxxxxx
res.push_back(static_cast<char>(0xE0u | (static_cast<uint>(codepoint) >> 12u)));
res.push_back(static_cast<char>(0x80u | ((static_cast<uint>(codepoint) >> 6u) & 0x3Fu)));
res.push_back(static_cast<char>(0x80u | (static_cast<uint>(codepoint) & 0x3Fu)));
} else {
// 4-byte characters: 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
res.push_back(static_cast<char>(0xF0u | (static_cast<uint>(codepoint) >> 18u)));
res.push_back(static_cast<char>(0x80u | ((static_cast<uint>(codepoint) >> 12u) & 0x3Fu)));
res.push_back(static_cast<char>(0x80u | ((static_cast<uint>(codepoint) >> 6u) & 0x3Fu)));
res.push_back(static_cast<char>(0x80u | (static_cast<uint>(codepoint) & 0x3Fu)));
}
} break;

// other characters after escape
default:
Expand Down Expand Up @@ -392,7 +467,6 @@ std::optional<CompoundTag> parseCompound(std::string_view& s) {

auto value = parseSnbtValue(s);


if (!value) { return std::nullopt; }

res[key.value()] = value.value();
Expand Down Expand Up @@ -449,7 +523,6 @@ std::optional<CompoundTagVariant> parseSnbtValue(std::string_view& s) {
} else {
return std::nullopt;
}
break;
default:
break;
}
Expand Down
8 changes: 8 additions & 0 deletions src/mc/nbt/Tag.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,11 @@ std::string Tag::toSnbt(SnbtFormat snbtFormat, uchar indent) const {
return TypedToSnbt(*(EndTag*)this, indent, snbtFormat);
}
}

extern std::optional<CompoundTagVariant> parseSnbtValue(std::string_view&);

std::unique_ptr<Tag> Tag::parseSnbt(std::string_view s) {
auto tag = parseSnbtValue(s);
if (tag) { return tag.value().toUnique(); }
return nullptr;
}
2 changes: 2 additions & 0 deletions src/mc/nbt/Tag.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ class Tag {

LLNDAPI std::string toSnbt(SnbtFormat snbtFormat = SnbtFormat::PrettyFilePrint, uchar indent = 4) const;

LLNDAPI static std::unique_ptr<Tag> parseSnbt(std::string_view);

public:
// NOLINTBEGIN
// vIndex: 0, symbol: ??1Tag@@UEAA@XZ
Expand Down

0 comments on commit 7bf74f2

Please sign in to comment.