From 71f721a6cfa0209dae01b49fd8279fd8c99942e2 Mon Sep 17 00:00:00 2001 From: Matthew A Johnson Date: Fri, 28 May 2021 14:40:06 +0100 Subject: [PATCH] Addressing NPZ version bug + misc (#23) --- .travis.yml | 2 +- CHANGELOG.md | 12 +++++- CMakeLists.txt | 29 ++++++++----- RELEASE_NOTES | 9 +++- VERSION | 2 +- include/npy/npz.h | 16 +++++--- include/npy/tensor.h | 84 +++++++++++++++++++------------------- src/memstream.cpp | 96 +++++++++++++++++++++---------------------- src/npy.cpp | 12 +++--- src/npz.cpp | 97 +++++++++++++++++++++++++------------------- 10 files changed, 200 insertions(+), 159 deletions(-) diff --git a/.travis.yml b/.travis.yml index ab68ec0..df8210b 100644 --- a/.travis.yml +++ b/.travis.yml @@ -17,6 +17,6 @@ before_install: script: - mkdir build - cd build - - /usr/bin/cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo .. + - /usr/bin/cmake -DCMAKE_BUILD_TYPE=RelWithDebInfo -DBUILD_TESTS=1 .. - /usr/bin/cmake --build . - /usr/bin/ctest -C RelWithDebInfo diff --git a/CHANGELOG.md b/CHANGELOG.md index 6e6d464..a50415f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,18 @@ # Changelog +## [2021-05-28 - Version 1.4.0](https://github.com/matajoh/libnpy/releases/tag/v1.4.0) + +Improvements: +- Further minor CMake changes to improve ease of use +- NPZ streams now have `is_open` methods to check for successful file opening +- Minor code style changes + +Bug fixes: +- NPZ files will now correctly handle PKZIP versions after 2.0, both for reading and writing + ## [2021-05-21 - Version 1.3.1](https://github.com/matajoh/libnpy/releases/tag/v1.3.1) -New Features: +Improvements: - Updated CMake integration to make the library easier to use via `FetchContent` ## [2021-02-10 - Version 1.3.0](https://github.com/matajoh/libnpy/releases/tag/v1.3.0) diff --git a/CMakeLists.txt b/CMakeLists.txt index 42ccf78..e010a08 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -20,6 +20,8 @@ project( libnpy VERSION ${LIBNPY_VERSION} LANGUAGES CXX) # -------------------- Options -------------------------------- +option( BUILD_TESTS "Specifies whether to build the tests" OFF ) +option( BUILD_SAMPLES "Specifies whether to build the samples" OFF ) option( BUILD_DOCUMENTATION "Specifies whether to build the documentation for the API and XML" OFF ) option( INCLUDE_CSHARP "Specifies whether to build libnpy with C# bindings" OFF ) @@ -89,19 +91,13 @@ if( BUILD_DOCUMENTATION ) find_package( Doxygen REQUIRED ) endif() -# -------------------- Testing ------------------------------------ - -include( CTest ) - -if( MSVC ) - set( LIBNPY_CSHARP_DIR ${CMAKE_BINARY_DIR}/CSharpWrapper/$ ) -endif() - # -------------------- Walk the subdirectories -------------------- add_subdirectory( src ) -add_subdirectory( test ) -add_subdirectory( samples ) + +if( BUILD_SAMPLES ) + add_subdirectory( samples ) +endif() if( BUILD_DOCUMENTATION ) add_subdirectory( doc ) @@ -119,6 +115,18 @@ target_include_directories(npy ${CMAKE_CURRENT_SOURCE_DIR}/src ) +# -------------------- Testing ------------------------------------ + +if( BUILD_TESTS ) + if( MSVC ) + set( LIBNPY_CSHARP_DIR ${CMAKE_BINARY_DIR}/CSharpWrapper/$ ) + endif() + + include( CTest ) + add_subdirectory( test ) +endif() + + # -------------------- Build settings ----------------------------- # use C++11 @@ -182,7 +190,6 @@ install( FILES ${PROJECT_FILES} DESTINATION "." ) if( MSVC ) # NuGet files - set( CMAKE_INSTALL_PREFIX "${CMAKE_CURRENT_BINARY_DIR}/nuget" CACHE PATH "..." FORCE ) set( LIBNPY_NUGET_NAME "npy-${SYSTEM_TOOLKIT}-${SYSTEM_BITS}-${CMAKE_BUILD_TYPE}" CACHE STRING "npy NuGet Name" FORCE ) file( READ RELEASE_NOTES LIBNPY_RELEASE_NOTES ) diff --git a/RELEASE_NOTES b/RELEASE_NOTES index 2a4d3a7..058736e 100644 --- a/RELEASE_NOTES +++ b/RELEASE_NOTES @@ -1,2 +1,7 @@ -New Features: -- Updated CMake integration to make the library easier to use via `FetchContent` \ No newline at end of file +Improvements: +- Further minor CMake changes to improve ease of use +- NPZ streams now have `is_open` methods to check for successful file opening +- Minor code style changes + +Bug fixes: +- NPZ files will now correctly handle PKZIP versions after 2.0, both for reading and writing \ No newline at end of file diff --git a/VERSION b/VERSION index 6261a05..e21e727 100644 --- a/VERSION +++ b/VERSION @@ -1 +1 @@ -1.3.1 \ No newline at end of file +1.4.0 \ No newline at end of file diff --git a/include/npy/npz.h b/include/npy/npz.h index a4fb7fa..b2a6bd0 100644 --- a/include/npy/npz.h +++ b/include/npy/npz.h @@ -75,6 +75,9 @@ class onpzstream compression_method_t compression = compression_method_t::STORED, endian_t endianness = npy::endian_t::NATIVE); + /** Whether the underlying stream has successfully been opened. */ + bool is_open() const; + /** Closes this stream. This will write the directory and close * the underlying stream as well. */ void close(); @@ -89,7 +92,7 @@ class onpzstream template class TENSOR> void write(const std::string &filename, const TENSOR &tensor) { - if (this->m_closed) + if (m_closed) { throw std::logic_error("Stream is closed"); } @@ -104,7 +107,7 @@ class onpzstream name += ".npy"; } - this->write_file(name, std::move(output.buf())); + write_file(name, std::move(output.buf())); } /** Write a tensor to the NPZ archive. @@ -115,7 +118,7 @@ class onpzstream template void write(const std::string &filename, const tensor &tensor) { - this->write(filename, tensor); + write(filename, tensor); } /** Destructor. This will call @@ -147,6 +150,9 @@ class inpzstream */ inpzstream(const std::string &path); + /** Whether the underlying stream has successfully been opened. */ + bool is_open() const; + /** Closes the underlying stream. */ void close(); @@ -175,7 +181,7 @@ class inpzstream template class TENSOR> TENSOR read(const std::string &filename) { - imemstream stream(this->read_file(filename)); + imemstream stream(read_file(filename)); return load(stream); } @@ -188,7 +194,7 @@ class inpzstream template tensor read(const std::string &filename) { - return this->read(filename); + return read(filename); } private: diff --git a/include/npy/tensor.h b/include/npy/tensor.h index 4eb73bd..3ca5dad 100644 --- a/include/npy/tensor.h +++ b/include/npy/tensor.h @@ -84,7 +84,7 @@ class tensor template const T &operator()(Indices... index) const { - return this->m_values[this->ravel(std::vector({index...}))]; + return m_values[ravel(std::vector({index...}))]; } /** Index function. @@ -93,7 +93,7 @@ class tensor */ const T &operator()(const std::vector &multi_index) const { - return this->m_values[this->ravel(multi_index)]; + return m_values[ravel(multi_index)]; } /** Variable parameter index function. @@ -103,7 +103,7 @@ class tensor template T &operator()(Indices... index) { - return this->m_values[this->ravel(std::vector({index...}))]; + return m_values[ravel(std::vector({index...}))]; } /** Index function. @@ -112,31 +112,31 @@ class tensor */ T &operator()(const std::vector &multi_index) { - return this->m_values[this->ravel(multi_index)]; + return m_values[ravel(multi_index)]; } /** Iterator pointing at the beginning of the tensor in memory. */ typename std::vector::iterator begin() { - return this->m_values.begin(); + return m_values.begin(); } /** Iterator pointing at the beginning of the tensor in memory. */ typename std::vector::const_iterator begin() const { - return this->m_values.begin(); + return m_values.begin(); } /** Iterator pointing at the end of the tensor in memory. */ typename std::vector::iterator end() { - return this->m_values.end(); + return m_values.end(); } /** Iterator pointing at the end of the tensor in memory. */ typename std::vector::const_iterator end() const { - return this->m_values.end(); + return m_values.end(); } /** Sets the value at the provided index. @@ -145,7 +145,7 @@ class tensor */ void set(const std::vector &multi_index, const T &value) { - this->m_values[this->ravel(multi_index)] = value; + m_values[ravel(multi_index)] = value; } /** Gets the value at the provided index. @@ -154,19 +154,19 @@ class tensor */ const T &get(const std::vector &multi_index) const { - return this->m_values[this->ravel(multi_index)]; + return m_values[ravel(multi_index)]; } /** The data type of the tensor. */ const data_type_t dtype() const { - return this->m_dtype; + return m_dtype; } /** The underlying values buffer. */ const std::vector &values() const { - return this->m_values; + return m_values; } /** Copy values from the source to this tensor. @@ -175,12 +175,12 @@ class tensor */ void copy_from(const T *source, size_t nitems) { - if (nitems != this->size()) + if (nitems != size()) { throw std::invalid_argument("nitems"); } - std::copy(source, source + nitems, this->m_values.begin()); + std::copy(source, source + nitems, m_values.begin()); } /** Copy values from the provided vector. @@ -188,12 +188,12 @@ class tensor */ void copy_from(const std::vector &source) { - if (source.size() != this->size()) + if (source.size() != size()) { throw std::invalid_argument("source.size"); } - std::copy(source.begin(), source.end(), this->m_values.begin()); + std::copy(source.begin(), source.end(), m_values.begin()); } /** Move values from the provided vector. @@ -201,37 +201,37 @@ class tensor */ void move_from(std::vector &&source) { - if (source.size() != this->size) + if (source.size() != size) { throw std::invalid_argument("source.size"); } - this->m_values = std::move(source); + m_values = std::move(source); } /** A pointer to the start of the underlying values buffer. */ T *data() { - return this->m_values.data(); + return m_values.data(); } /** A pointer to the start of the underlying values buffer. */ const T *data() const { - return this->m_values.data(); + return m_values.data(); } /** The number of elements in the tensor. */ size_t size() const { - return this->m_values.size(); + return m_values.size(); } /** The shape of the vector. Each element is the size of the * corresponding dimension. */ const std::vector &shape() const { - return this->m_shape; + return m_shape; } /** Returns the dimensionality of the tensor at the specified index. @@ -240,34 +240,34 @@ class tensor */ const size_t shape(int index) const { - return this->m_shape[index]; + return m_shape[index]; } /** Whether the tensor data is stored in FORTRAN, or column-major, order. */ bool fortran_order() const { - return this->m_fortran_order; + return m_fortran_order; } /** Copy assignment operator. */ tensor &operator=(const tensor &other) { - this->m_shape = other.m_shape; - this->m_ravel_strides = other.m_ravel_strides; - this->m_fortran_order = other.m_fortran_order; - this->m_dtype = other.m_dtype; - this->m_values = other.m_values; + m_shape = other.m_shape; + m_ravel_strides = other.m_ravel_strides; + m_fortran_order = other.m_fortran_order; + m_dtype = other.m_dtype; + m_values = other.m_values; return *this; } /** Move assignment operator. */ tensor &operator=(tensor &&other) { - this->m_shape = std::move(other.m_shape); - this->m_ravel_strides = std::move(other.m_ravel_strides); - this->m_fortran_order = other.m_fortran_order; - this->m_dtype = other.m_dtype; - this->m_values = std::move(other.m_values); + m_shape = std::move(other.m_shape); + m_ravel_strides = std::move(other.m_ravel_strides); + m_fortran_order = other.m_fortran_order; + m_dtype = other.m_dtype; + m_values = std::move(other.m_values); return *this; } @@ -291,8 +291,8 @@ class tensor size_t ravel(INDEX_IT index, SHAPE_IT shape) const { std::size_t ravel = 0; - for (auto stride = this->m_ravel_strides.begin(); - stride < this->m_ravel_strides.end(); + for (auto stride = m_ravel_strides.begin(); + stride < m_ravel_strides.end(); ++index, ++shape, ++stride) { if (*index >= *shape) @@ -312,13 +312,13 @@ class tensor */ size_t ravel(const std::vector &multi_index) const { - if (multi_index.size() != this->m_shape.size()) + if (multi_index.size() != m_shape.size()) { throw std::invalid_argument("multi_index"); } std::vector abs_multi_index(multi_index.size()); - std::transform(multi_index.begin(), multi_index.end(), this->m_shape.begin(), abs_multi_index.begin(), + std::transform(multi_index.begin(), multi_index.end(), m_shape.begin(), abs_multi_index.begin(), [](std::int32_t index, std::size_t shape) -> std::size_t{ if(index < 0) { @@ -328,7 +328,7 @@ class tensor return static_cast(index); }); - return this->ravel(abs_multi_index); + return ravel(abs_multi_index); } /** Ravels a multi-index into a single value indexing the buffer. @@ -337,12 +337,12 @@ class tensor */ size_t ravel(const std::vector &abs_multi_index) const { - if (this->m_fortran_order) + if (m_fortran_order) { - return this->ravel(abs_multi_index.rbegin(), this->m_shape.rbegin()); + return ravel(abs_multi_index.rbegin(), m_shape.rbegin()); } - return this->ravel(abs_multi_index.begin(), this->m_shape.begin()); + return ravel(abs_multi_index.begin(), m_shape.begin()); } private: diff --git a/src/memstream.cpp b/src/memstream.cpp index b758900..4b27d1b 100644 --- a/src/memstream.cpp +++ b/src/memstream.cpp @@ -8,30 +8,30 @@ namespace npy { membuf::membuf() : membuf(BUFFER_SIZE) { - this->seekpos(0); + seekpos(0); } membuf::membuf(size_t n) { m_buffer.reserve(BUFFER_SIZE); - this->seekpos(0); + seekpos(0); } membuf::membuf(const std::vector &buffer) : m_buffer(buffer) { - this->seekpos(0); + seekpos(0); } membuf::membuf(std::vector &&buffer) : m_buffer(std::move(buffer)) { - this->seekpos(0); + seekpos(0); } membuf *membuf::setbuf(std::uint8_t *s, std::streamsize n) { - this->m_buffer = std::vector(s, s + n); - this->m_posg = this->m_buffer.begin(); - this->m_posp = this->m_buffer.begin(); + m_buffer = std::vector(s, s + n); + m_posg = m_buffer.begin(); + m_posp = m_buffer.begin(); return this; } @@ -43,19 +43,19 @@ membuf::pos_type membuf::seekoff(membuf::off_type off, std::ios_base::seekdir wa switch (way) { case std::ios_base::beg: - this->m_posg = this->m_buffer.begin() + off; + m_posg = m_buffer.begin() + off; break; case std::ios_base::end: - this->m_posg = this->m_buffer.end() + off; + m_posg = m_buffer.end() + off; break; case std::ios_base::cur: - this->m_posg += off; + m_posg += off; break; } - result = static_cast(this->m_posg - this->m_buffer.begin()); + result = static_cast(m_posg - m_buffer.begin()); } if (which & std::ios::out) @@ -63,19 +63,19 @@ membuf::pos_type membuf::seekoff(membuf::off_type off, std::ios_base::seekdir wa switch (way) { case std::ios_base::beg: - this->m_posp = this->m_buffer.begin() + off; + m_posp = m_buffer.begin() + off; break; case std::ios_base::end: - this->m_posp = this->m_buffer.end() + off; + m_posp = m_buffer.end() + off; break; case std::ios_base::cur: - this->m_posp += off; + m_posp += off; break; } - result = static_cast(this->m_posp - this->m_buffer.begin()); + result = static_cast(m_posp - m_buffer.begin()); } return result; @@ -86,14 +86,14 @@ membuf::pos_type membuf::seekpos(membuf::pos_type pos, ios_base::openmode which) membuf::pos_type result(membuf::off_type(-1)); if (which & std::ios_base::in) { - this->m_posg = this->m_buffer.begin() + pos; - result = static_cast(this->m_posg - this->m_buffer.begin()); + m_posg = m_buffer.begin() + pos; + result = static_cast(m_posg - m_buffer.begin()); } if (which & std::ios::out) { - this->m_posp = this->m_buffer.begin() + pos; - result = static_cast(this->m_posp - this->m_buffer.begin()); + m_posp = m_buffer.begin() + pos; + result = static_cast(m_posp - m_buffer.begin()); } return result; @@ -101,26 +101,26 @@ membuf::pos_type membuf::seekpos(membuf::pos_type pos, ios_base::openmode which) std::streamsize membuf::showmanyc() { - return this->m_buffer.end() - this->m_posg; + return m_buffer.end() - m_posg; } std::streamsize membuf::xsgetn(std::uint8_t *s, std::streamsize n) { - std::streamsize bytes_read = this->showmanyc(); + std::streamsize bytes_read = showmanyc(); bytes_read = n < bytes_read ? n : bytes_read; - auto end = this->m_posg + bytes_read; - std::copy(this->m_posg, end, s); - this->m_posg = end; + auto end = m_posg + bytes_read; + std::copy(m_posg, end, s); + m_posg = end; return bytes_read; } membuf::int_type membuf::underflow() { int_type result = membuf::traits_type::eof(); - if (this->m_posg < this->m_buffer.end()) + if (m_posg < m_buffer.end()) { - result = membuf::traits_type::to_int_type(*this->m_posg); - ++this->m_posg; + result = membuf::traits_type::to_int_type(*m_posg); + ++m_posg; } return result; @@ -130,11 +130,11 @@ membuf::int_type membuf::pbackfail(membuf::int_type c) { if (c != membuf::traits_type::eof()) { - *this->m_posg = membuf::traits_type::to_char_type(c); + *m_posg = membuf::traits_type::to_char_type(c); } else { - c = membuf::traits_type::to_int_type(*this->m_posg); + c = membuf::traits_type::to_int_type(*m_posg); } return c; @@ -142,20 +142,20 @@ membuf::int_type membuf::pbackfail(membuf::int_type c) std::streamsize membuf::xsputn(const std::uint8_t *s, std::streamsize n) { - std::streamsize num_copy = this->m_buffer.end() - this->m_posp; + std::streamsize num_copy = m_buffer.end() - m_posp; num_copy = n < num_copy ? n : num_copy; std::streamsize num_insert = n - num_copy; - std::copy(s, s + num_copy, this->m_posp); + std::copy(s, s + num_copy, m_posp); if (num_insert > 0) { - auto diffg = this->m_posg - this->m_buffer.begin(); - this->m_buffer.insert(this->m_buffer.end(), s + num_copy, s + n); - this->m_posp = this->m_buffer.end(); - this->m_posg = this->m_buffer.begin() + diffg; + auto diffg = m_posg - m_buffer.begin(); + m_buffer.insert(m_buffer.end(), s + num_copy, s + n); + m_posp = m_buffer.end(); + m_posg = m_buffer.begin() + diffg; }else{ - this->m_posp += num_copy; + m_posp += num_copy; } return n; @@ -165,7 +165,7 @@ membuf::int_type membuf::overflow(membuf::int_type c) { if (c != membuf::traits_type::eof()) { - this->m_buffer.push_back(membuf::traits_type::to_char_type(c)); + m_buffer.push_back(membuf::traits_type::to_char_type(c)); } return c; @@ -173,56 +173,56 @@ membuf::int_type membuf::overflow(membuf::int_type c) std::vector &membuf::buf() { - return this->m_buffer; + return m_buffer; } const std::vector &membuf::buf() const { - return this->m_buffer; + return m_buffer; } imemstream::imemstream(const std::vector &buffer) : m_buffer(buffer), - std::basic_istream(&this->m_buffer) + std::basic_istream(&m_buffer) { } imemstream::imemstream(std::vector &&buffer) : m_buffer(std::move(buffer)), - std::basic_istream(&this->m_buffer) + std::basic_istream(&m_buffer) { } std::vector &imemstream::buf() { - return this->m_buffer.buf(); + return m_buffer.buf(); } const std::vector &imemstream::buf() const { - return this->m_buffer.buf(); + return m_buffer.buf(); } -omemstream::omemstream() : std::basic_ostream(&this->m_buffer) +omemstream::omemstream() : std::basic_ostream(&m_buffer) { } omemstream::omemstream(std::vector &&buffer) : m_buffer(std::move(buffer)), - std::basic_ostream(&this->m_buffer) + std::basic_ostream(&m_buffer) { } omemstream::omemstream(std::streamsize capacity) : m_buffer(capacity), - std::basic_ostream(&this->m_buffer) + std::basic_ostream(&m_buffer) { } std::vector &omemstream::buf() { - return this->m_buffer.buf(); + return m_buffer.buf(); } const std::vector &omemstream::buf() const { - return this->m_buffer.buf(); + return m_buffer.buf(); } } // namespace npy \ No newline at end of file diff --git a/src/npy.cpp b/src/npy.cpp index 57003bc..5b43f78 100644 --- a/src/npy.cpp +++ b/src/npy.cpp @@ -115,23 +115,23 @@ header_info::header_info(const std::string &dictionary) if(dtype[1] == 'U') { this->dtype = npy::data_type_t::UNICODE_STRING; - this->endianness = dtype[0] == '>' ? npy::endian_t::BIG : npy::endian_t::LITTLE; - this->max_element_length = std::stoi(dtype.substr(2)); + endianness = dtype[0] == '>' ? npy::endian_t::BIG : npy::endian_t::LITTLE; + max_element_length = std::stoi(dtype.substr(2)); } else { - std::tie(this->dtype, this->endianness) = from_dtype(dtype); - this->max_element_length = 0; + std::tie(this->dtype, endianness) = from_dtype(dtype); + max_element_length = 0; } } else if (key == "fortran_order") { - this->fortran_order = read_bool(input); + fortran_order = read_bool(input); } else if (key == "shape") { - this->shape = read_shape(input); + shape = read_shape(input); } else { diff --git a/src/npz.cpp b/src/npz.cpp index 04f2386..85a7293 100644 --- a/src/npz.cpp +++ b/src/npz.cpp @@ -19,7 +19,8 @@ const std::array CD_END_SIG = {0x50, 0x4B, 0x05, 0x06}; const std::array EXTERNAL_ATTR = {0x00, 0x00, 0x80, 0x01}; const std::array TIME = {0x00, 0x00, 0x21, 0x00}; const int CD_END_SIZE = 22; -const std::uint16_t VERSION = 20; +const std::uint16_t STANDARD_VERSION = 20; // 2.0 File is encrypted using traditional PKWARE encryption +const std::uint16_t ZIP64_VERSION = 45; // 4.5 File uses ZIP64 format extensions const int CHUNK = 128 * 1024; const std::uint16_t ZIP64_TAG = 1; @@ -216,10 +217,10 @@ std::uint16_t read_shared_header(std::istream &stream, npy::file_entry &header) return read16(stream); } -void write_local_header(std::ostream &stream, const npy::file_entry &header) +void write_local_header(std::ostream &stream, const npy::file_entry &header, bool zip64) { stream.write(reinterpret_cast(LOCAL_HEADER_SIG.data()), LOCAL_HEADER_SIG.size()); - write(stream, VERSION); + write(stream, zip64 ? ZIP64_VERSION : STANDARD_VERSION); write_shared_header(stream, header); std::uint16_t extra_field_length = determine_extra_length(header, false); write(stream, extra_field_length); @@ -234,7 +235,7 @@ npy::file_entry read_local_header(std::istream &stream) { assert_sig(stream, LOCAL_HEADER_SIG); std::uint16_t version = read16(stream); - if (version > VERSION) + if (version > ZIP64_VERSION) { throw std::logic_error("Unsupported NPZ version"); } @@ -257,11 +258,11 @@ npy::file_entry read_local_header(std::istream &stream) void write_central_directory_header(std::ostream &stream, const npy::file_entry &header) { + std::uint16_t extra_field_length = determine_extra_length(header, true); stream.write(reinterpret_cast(CD_HEADER_SIG.data()), CD_HEADER_SIG.size()); - write(stream, VERSION); - write(stream, VERSION); + write(stream, STANDARD_VERSION); + write(stream, extra_field_length > 0 ? ZIP64_VERSION : STANDARD_VERSION); write_shared_header(stream, header); - std::uint16_t extra_field_length = determine_extra_length(header, true); write(stream, extra_field_length); std::uint16_t file_comment_length = 0; write(stream, file_comment_length); @@ -283,7 +284,7 @@ npy::file_entry read_central_directory_header(std::istream &stream) assert_sig(stream, CD_HEADER_SIG); read16(stream); // version made by std::uint16_t version = read16(stream); - if (version > VERSION) + if (version > ZIP64_VERSION) { throw std::logic_error("Unsupported NPZ version"); } @@ -368,9 +369,9 @@ onpzstream::onpzstream(const std::string &path, onpzstream::~onpzstream() { - if (!this->m_closed) + if (!m_closed) { - this->close(); + close(); } } @@ -381,12 +382,12 @@ void onpzstream::write_file(const std::string &filename, std::uint32_t compressed_size = 0; std::vector compressed_bytes; std::uint32_t checksum = npy_crc32(bytes); - if (this->m_compression_method == compression_method_t::STORED) + if (m_compression_method == compression_method_t::STORED) { compressed_bytes = bytes; compressed_size = uncompressed_size; } - else if (this->m_compression_method == compression_method_t::DEFLATED) + else if (m_compression_method == compression_method_t::DEFLATED) { compressed_bytes = npy_deflate(std::move(bytes)); compressed_size = static_cast(compressed_bytes.size()); @@ -401,79 +402,86 @@ void onpzstream::write_file(const std::string &filename, checksum, compressed_size, uncompressed_size, - static_cast(this->m_compression_method), - static_cast(this->m_output.tellp())}; - write_local_header(this->m_output, entry); - this->m_output.write(reinterpret_cast(compressed_bytes.data()), compressed_size); - this->m_entries.push_back(std::move(entry)); + static_cast(m_compression_method), + static_cast(m_output.tellp())}; + + bool zip64 = uncompressed_size > ZIP64_LIMIT || compressed_size > ZIP64_LIMIT; + write_local_header(m_output, entry, zip64); + m_output.write(reinterpret_cast(compressed_bytes.data()), compressed_size); + m_entries.push_back(std::move(entry)); +} + +bool onpzstream::is_open() const +{ + return m_output.is_open(); } void onpzstream::close() { - if (!this->m_closed) + if (!m_closed) { CentralDirectory dir; - dir.offset = static_cast(this->m_output.tellp()); - for (auto &header : this->m_entries) + dir.offset = static_cast(m_output.tellp()); + for (auto &header : m_entries) { - write_central_directory_header(this->m_output, header); + write_central_directory_header(m_output, header); } - dir.size = static_cast(this->m_output.tellp()) - dir.offset; - dir.num_entries = static_cast(this->m_entries.size()); - write_end_of_central_directory(this->m_output, dir); - this->m_output.close(); - this->m_closed = true; + dir.size = static_cast(m_output.tellp()) - dir.offset; + dir.num_entries = static_cast(m_entries.size()); + write_end_of_central_directory(m_output, dir); + m_output.close(); + m_closed = true; } } inpzstream::inpzstream(const std::string &path) : m_input(path, std::ios::out | std::ios::binary) { - this->read_entries(); + read_entries(); } void inpzstream::read_entries() { - if (!this->m_input.is_open()) + if (!m_input.is_open()) { throw std::invalid_argument("path"); } - this->m_input.seekg(-CD_END_SIZE, std::ios::end); - CentralDirectory dir = read_end_of_central_directory(this->m_input); + m_input.seekg(-CD_END_SIZE, std::ios::end); + CentralDirectory dir = read_end_of_central_directory(m_input); - this->m_input.seekg(dir.offset, std::ios::beg); + m_input.seekg(dir.offset, std::ios::beg); for (size_t i = 0; i < dir.num_entries; ++i) { - file_entry entry = read_central_directory_header(this->m_input); - this->m_entries[entry.filename] = entry; + file_entry entry = read_central_directory_header(m_input); + m_entries[entry.filename] = entry; } } std::vector inpzstream::read_file(const std::string &temp_filename) { std::string filename = temp_filename; - if (this->m_entries.count(filename) == 0) + if (m_entries.count(filename) == 0) { filename += ".npy"; - if(this->m_entries.count(filename) == 0) + if(m_entries.count(filename) == 0) { throw std::invalid_argument("filename"); } } - const file_entry &entry = this->m_entries[filename]; - this->m_input.seekg(entry.offset, std::ios::beg); + const file_entry &entry = m_entries[filename]; + m_input.seekg(entry.offset, std::ios::beg); - file_entry local = read_local_header(this->m_input); + file_entry local = read_local_header(m_input); if (!entry.check(local)) { throw std::logic_error("Central directory and local headers disagree"); } std::vector uncompressed_bytes(entry.compressed_size); - this->m_input.read(reinterpret_cast(uncompressed_bytes.data()), uncompressed_bytes.size()); + m_input.read(reinterpret_cast(uncompressed_bytes.data()), uncompressed_bytes.size()); compression_method_t cmethod = static_cast(entry.compression_method); if (cmethod == compression_method_t::DEFLATED) { @@ -489,19 +497,24 @@ std::vector inpzstream::read_file(const std::string &temp_filename return uncompressed_bytes; } +bool inpzstream::is_open() const +{ + return m_input.is_open(); +} + void inpzstream::close() { - this->m_input.close(); + m_input.close(); } bool inpzstream::contains(const std::string &filename) { - return this->m_entries.count(filename); + return m_entries.count(filename); } header_info inpzstream::peek(const std::string &filename) { - imemstream stream(this->read_file(filename)); + imemstream stream(read_file(filename)); return npy::peek(stream); } } // namespace npy \ No newline at end of file