Skip to content

Commit

Permalink
fix(rapid): to fix using uncompressed text
Browse files Browse the repository at this point in the history
to fix mis-using uncompressed string.
  • Loading branch information
ShannonBase committed Nov 19, 2024
1 parent f57f39b commit d6c5068
Show file tree
Hide file tree
Showing 4 changed files with 15 additions and 32 deletions.
1 change: 1 addition & 0 deletions storage/rapid_engine/compress/algorithms.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,6 +153,7 @@ std::string &lz4_compress::decompressString(std::string &compressed_str) {
void CompressFactory::make_elements() {
// important: the inserted index should same as its algo type value.
m_factory.emplace_back(std::make_unique<default_compress>());
m_factory.emplace_back(std::make_unique<default_compress>());
m_factory.emplace_back(std::make_unique<zlib_compress>());
m_factory.emplace_back(std::make_unique<zstd_compress>());
m_factory.emplace_back(std::make_unique<lz4_compress>());
Expand Down
2 changes: 1 addition & 1 deletion storage/rapid_engine/compress/algorithms.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
namespace ShannonBase {
namespace Compress {

enum compress_algos { NONE = 0, ZLIB, ZSTD, LZ4 };
enum compress_algos { DEFAULT = 0, NONE, ZLIB, ZSTD, LZ4 };

class Compress_algorithm {
public:
Expand Down
36 changes: 9 additions & 27 deletions storage/rapid_engine/compress/dictionary/dictionary.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -91,32 +91,9 @@ uint32 Dictionary::store(const uchar *str, size_t len, Encoding_type type) {
}

uint32 Dictionary::get(uint64 strid, String &ret_val) {
compress_algos alg [[maybe_unused]]{compress_algos::NONE};
switch (m_encoding_type) {
case Encoding_type::SORTED:
alg = compress_algos::ZSTD;
break;
case Encoding_type::VARLEN:
alg = compress_algos::LZ4;
break;
case Encoding_type::NONE:
alg = compress_algos::NONE;
break;
default:
break;
}

{
std::scoped_lock lk(m_content_mtx);
auto id_pos = m_id2content.find(strid);
if (id_pos != m_id2content.end()) {
auto val_str = id_pos->second;
String strs(val_str.c_str(), val_str.length(), ret_val.charset());
copy_if_not_alloced(&ret_val, &strs, strs.length());
} else
return 0;
}

auto compressed_str = reinterpret_cast<char *>(get(strid));
String strs(compressed_str, strlen(compressed_str), ret_val.charset());
copy_if_not_alloced(&ret_val, &strs, strs.length());
return 0;
}

Expand All @@ -139,8 +116,13 @@ uchar *Dictionary::get(uint64 strid) {
{
std::scoped_lock lk(m_content_mtx);
auto id_pos = m_id2content.find(strid);
return (id_pos != m_id2content.end()) ? (uchar *)(id_pos->second.c_str()) : nullptr;
if (id_pos != m_id2content.end()) {
auto compressed_str = id_pos->second;
auto ret_strp = CompressFactory::get_instance(alg)->decompressString(compressed_str).c_str();
return reinterpret_cast<uchar *>(const_cast<char *>(ret_strp));
}
}

return nullptr;
}

Expand Down
8 changes: 4 additions & 4 deletions storage/rapid_engine/imcs/data_table.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -164,12 +164,12 @@ int DataTable::next(uchar *buf) {
source_fld->set_notnull();
auto data_ptr = cu->chunk(current_chunk)->base() + offset_in_chunk * normalized_length;
if (is_text_value) {
uint32 str_id = *(uint32 *)data_ptr;
auto str_ptr = cu->header()->m_local_dict->get(str_id);
uint32 str_id = *reinterpret_cast<uint32 *>(data_ptr);
auto str_ptr = reinterpret_cast<char *>(cu->header()->m_local_dict->get(str_id));
Utils::Util::is_blob(cu->header()->m_type)
? (down_cast<Field_blob *>(source_fld)->set_ptr(strlen((char *)str_ptr), str_ptr), 0)
? down_cast<Field_blob *>(source_fld)->store(str_ptr, strlen(str_ptr), source_fld->charset())
: (Utils::Util::is_varstring(cu->header()->m_source_fld->type())
? source_fld->store(reinterpret_cast<char *>(str_ptr), strlen((char *)str_ptr), source_fld->charset())
? source_fld->store(reinterpret_cast<char *>(str_ptr), strlen(str_ptr), source_fld->charset())
: source_fld->store(reinterpret_cast<char *>(str_ptr), cu->pack_length(), source_fld->charset()));
} else
source_fld->pack(const_cast<uchar *>(source_fld->data_ptr()), data_ptr, normalized_length);
Expand Down

0 comments on commit d6c5068

Please sign in to comment.