From aac2dbd1c0f3ed5740812206ddeb7af33ea34ee1 Mon Sep 17 00:00:00 2001 From: Michal Siedlaczek Date: Tue, 17 Dec 2024 20:35:08 -0500 Subject: [PATCH] Lookup table v1 implementation Changelog-added: New lookup table implementation available Signed-off-by: Michal Siedlaczek --- include/pisa/lookup_table.hpp | 229 +++++++++++++++++++++++++ include/pisa/span.hpp | 50 +++++- include/pisa/stream.hpp | 56 ++++++ src/lookup_table.cpp | 309 ++++++++++++++++++++++++++++++++++ src/stream.cpp | 41 +++++ test/test_span.cpp | 71 ++++++++ 6 files changed, 753 insertions(+), 3 deletions(-) create mode 100644 include/pisa/lookup_table.hpp create mode 100644 include/pisa/stream.hpp create mode 100644 src/lookup_table.cpp create mode 100644 src/stream.cpp diff --git a/include/pisa/lookup_table.hpp b/include/pisa/lookup_table.hpp new file mode 100644 index 00000000..4a6af1cc --- /dev/null +++ b/include/pisa/lookup_table.hpp @@ -0,0 +1,229 @@ +// Copyright 2024 PISA developers +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace pisa::lt { + +namespace detail { + + class BaseLookupTable { + public: + virtual ~BaseLookupTable() = default; + [[nodiscard]] virtual auto size() const noexcept -> std::size_t = 0; + [[nodiscard]] virtual auto operator[](std::size_t idx) const + -> std::span = 0; + [[nodiscard]] virtual auto find(std::span value) const noexcept + -> std::optional = 0; + + [[nodiscard]] virtual auto clone() -> std::unique_ptr = 0; + }; + + class BaseLookupTableEncoder { + public: + virtual ~BaseLookupTableEncoder() = default; + void virtual insert(std::span payload) = 0; + void virtual encode(std::ostream& out) = 0; + }; + +} // namespace detail + +namespace v1 { + + class Flags { + private: + std::uint8_t flags = 0; + + public: + constexpr Flags() = default; + explicit constexpr Flags(std::uint8_t bitset) : flags(bitset) {} + + [[nodiscard]] auto sorted() const noexcept -> bool; + [[nodiscard]] auto wide_offsets() const noexcept -> bool; + [[nodiscard]] auto bits() const noexcept -> std::uint8_t; + }; + + namespace flags { + inline constexpr std::uint8_t SORTED = 0b001; + inline constexpr std::uint8_t WIDE_OFFSETS = 0b010; + } // namespace flags + +}; // namespace v1 + +} // namespace pisa::lt + +namespace pisa { + +/** + * Lookup table mapping integers from a range [0, N) to binary payloads. + * + * This table assigns each _unique_ value (duplicates are not allowed) to an index in [0, N), where + * N is the size of the table. Thus, this structure is equivalent to a sequence of binary values. + * The difference between `LookupTable` and, say, `std::vector` is that its encoding supports + * reading the values without fully parsing the entire binary representation of the table. As such, + * it supports quickly initializing the structure from an external device (with random access), + * e.g., via mmap, and performing a lookup without loading the entire structure to main memory. + * This is especially useful for short-lived programs that must perform a lookup without the + * unnecessary overhead of loading it to memory. + * + * If the values are sorted, and the appropriate flag is toggled in the header, a quick binary + * search lookup can be performed to find an index of a value. If the values are not sorted, then a + * linear scan will be used; therefore, one should consider having values sorted if such lookups are + * important. Getting the value at a given index is a constant-time operation, though if using + * memory mapping, each such operation may need to load multiple pages to memory. + */ +class LookupTable { + private: + std::unique_ptr<::pisa::lt::detail::BaseLookupTable> m_impl; + + explicit LookupTable(std::unique_ptr<::pisa::lt::detail::BaseLookupTable> impl); + + [[nodiscard]] static auto v1(std::span bytes) -> LookupTable; + + public: + LookupTable(LookupTable const&); + LookupTable(LookupTable&&); + LookupTable& operator=(LookupTable const&); + LookupTable& operator=(LookupTable&&); + ~LookupTable(); + + /** + * The number of elements in the table. + */ + [[nodiscard]] auto size() const noexcept -> std::size_t; + + /** + * Retrieves the value at index `idx`. + * + * If `idx < size()`, then `std::out_of_range` exception is thrown. See `at()` if you want to + * conveniently cast the memory span to another type. + */ + [[nodiscard]] auto operator[](std::size_t idx) const -> std::span; + + /** + * Returns the position of `value` in the table or `std::nullopt` if the value does not exist. + * + * See the templated version of this function if you want to automatically cast from another + * type to byte span. + */ + [[nodiscard]] auto find(std::span value) const noexcept + -> std::optional; + + /** + * Returns the value at index `idx` cast to type `T`. + * + * The type `T` must define `T::value_type` that resolves to a byte-wide type, as well as a + * constructor that takes `T::value_type const*` (pointer to the first byte) and `std::size_t` + * (the total number of bytes). If `T::value_type` is longer than 1 byte, this operation results + * in **undefined behavior**. + * + * Examples of types that can be used are: `std::string_view` or `std::span`. + */ + template + [[nodiscard]] auto at(std::size_t idx) const -> T { + auto bytes = this->operator[](idx); + return T(reinterpret_cast(bytes.data()), bytes.size()); + } + + /** + * Returns the position of `value` in the table or `std::nullopt` if the value does not exist. + * + * The type `T` of the value must be such that `std:span` is + * constructible from `T`. + */ + template + requires(std::constructible_from, T>) + [[nodiscard]] auto find(T value) const noexcept -> std::optional { + return find(std::as_bytes(std::span(value))); + } + + /** + * Constructs a lookup table from the encoded sequence of bytes. + */ + [[nodiscard]] static auto from_bytes(std::span bytes) -> LookupTable; +}; + +/** + * Lookup table encoder. + * + * This class builds and encodes a sequence of values to the binary format of lookup table. + * See `LookupTable` for more details. + * + * Note that all encoded data is accumulated in memory and only flushed to the output stream when + * `encode()` member function is called. + */ +class LookupTableEncoder { + std::unique_ptr<::pisa::lt::detail::BaseLookupTableEncoder> m_impl; + + explicit LookupTableEncoder(std::unique_ptr<::pisa::lt::detail::BaseLookupTableEncoder> impl); + + public: + /** + * Constructs an encoder for a lookup table in v1 format, with the given flag options. + * + * If sorted flag is _not_ set, then an additional hash set will be produced to keep track of + * duplicates. This will increase the memory footprint at build time. + */ + static LookupTableEncoder v1(::pisa::lt::v1::Flags flags); + + /** + * Inserts payload. + * + * If sorted flag was set at construction time, it will throw if the given payload is not + * lexicographically greater than the previously inserted payload. If sorted flag was _not_ set + * and the given payload has already been inserted, it will throw as well. + */ + auto insert(std::span payload) -> LookupTableEncoder&; + + /** + * Writes the encoded table to the output stream. + */ + auto encode(std::ostream& out) -> LookupTableEncoder&; + + /** + * Inserts a payload of type `Payload`. + * + * `std::span` must be constructible from `Payload`, which + * in turn will be cast as byte span before calling the non-templated version of `insert()`. + */ + template + requires(std::constructible_from, Payload>) + auto insert(Payload const& payload) -> LookupTableEncoder& { + insert(std::as_bytes(std::span(payload))); + return *this; + } + + /** + * Inserts all payloads in the given span. + * + * It calls `insert()` for each element in the span. See `insert()` for more details. + */ + template + auto insert_span(std::span payloads) -> LookupTableEncoder& { + for (auto const& payload: payloads) { + insert(payload); + } + return *this; + } +}; + +} // namespace pisa diff --git a/include/pisa/span.hpp b/include/pisa/span.hpp index 4eb0b103..3ce1bf4e 100644 --- a/include/pisa/span.hpp +++ b/include/pisa/span.hpp @@ -34,12 +34,47 @@ template return span[pos]; } -} // namespace pisa +template +[[nodiscard]] constexpr auto subspan_or_throw( + std::span const& span, + typename std::span::size_type offset, + typename std::span::size_type count, + std::string const& error_msg +) -> std::span { + if (offset + count > span.size()) { + throw std::out_of_range(error_msg); + } + return span.subspan(offset, count); +} -namespace std { +template +[[nodiscard]] constexpr auto subspan_or_throw( + std::span const& span, + typename std::span::size_type offset, + typename std::span::size_type count +) -> std::span { + return subspan_or_throw(span, offset, count, "out of range subspan"); +} template -[[nodiscard]] auto operator==(std::span const& lhs, std::span const& rhs) -> bool { +[[nodiscard]] auto lex_lt(std::span const& lhs, std::span const& rhs) -> bool { + auto lit = lhs.begin(); + auto rit = rhs.begin(); + while (lit != lhs.end() && rit != rhs.end()) { + if (*lit < *rit) { + return true; + } + if (*lit > *rit) { + return false; + } + ++lit; + ++rit; + } + return rit == rhs.end() && lit != lhs.end(); +} + +template +[[nodiscard]] auto lex_eq(std::span const& lhs, std::span const& rhs) -> bool { if (lhs.size() != rhs.size()) { return false; } @@ -53,4 +88,13 @@ template return true; } +} // namespace pisa + +namespace std { + +template +[[nodiscard]] auto operator==(std::span const& lhs, std::span const& rhs) -> bool { + return ::pisa::lex_eq(lhs, rhs); +} + } // namespace std diff --git a/include/pisa/stream.hpp b/include/pisa/stream.hpp new file mode 100644 index 00000000..6768db49 --- /dev/null +++ b/include/pisa/stream.hpp @@ -0,0 +1,56 @@ +// Copyright 2024 PISA developers +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include + +namespace pisa { + +class FileOpenError: public std::exception { + public: + explicit FileOpenError(std::string const& file); + [[nodiscard]] auto what() const noexcept -> char const*; + + private: + std::string m_message; +}; + +class WriteError: public std::exception { + public: + [[nodiscard]] auto what() const noexcept -> char const*; +}; + +auto open_file_w(std::string const& filename) -> std::ofstream; + +template +auto put(std::basic_ostream& stream, CharT ch) -> std::ostream& { + if (!stream.put(ch)) { + throw WriteError(); + } + return stream; +} + +template +auto write(std::basic_ostream& stream, CharT const* data, std::streamsize count) + -> std::basic_ostream& { + if (!stream.write(data, count)) { + throw WriteError(); + } + return stream; +} + +} // namespace pisa diff --git a/src/lookup_table.cpp b/src/lookup_table.cpp new file mode 100644 index 00000000..5c686b11 --- /dev/null +++ b/src/lookup_table.cpp @@ -0,0 +1,309 @@ +// Copyright 2024 PISA developers +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include "fmt/core.h" +#include "pisa/lookup_table.hpp" +#include "pisa/span.hpp" +#include "pisa/stream.hpp" + +namespace pisa::lt { + +constexpr std::byte VERIFICATION_BYTE = std::byte(0x87); +constexpr std::size_t PADDING_LENGTH = 5; +constexpr std::array PADDING = { + std::byte{0}, std::byte{0}, std::byte{0}, std::byte{0}, std::byte{0} +}; + +auto v1::Flags::sorted() const noexcept -> bool { + return (this->flags & 0b1) > 0; +} + +auto v1::Flags::wide_offsets() const noexcept -> bool { + return (this->flags & 0b10) > 0; +} + +auto v1::Flags::bits() const noexcept -> std::uint8_t { + return this->flags; +} + +} // namespace pisa::lt + +namespace pisa { + +LookupTable::LookupTable(std::unique_ptr<::pisa::lt::detail::BaseLookupTable> impl) + : m_impl(std::move(impl)) {} + +LookupTable::LookupTable(LookupTable const& other) : m_impl(other.m_impl->clone()) {} + +LookupTable::LookupTable(LookupTable&&) = default; + +LookupTable& LookupTable::operator=(LookupTable const& other) { + m_impl = other.m_impl->clone(); + return *this; +} + +LookupTable& LookupTable::operator=(LookupTable&&) = default; + +LookupTable::~LookupTable() = default; + +template + requires(std::unsigned_integral) +[[nodiscard]] auto +read(std::span bytes, std::size_t offset, std::string const& error_msg) -> T { + auto sub = pisa::subspan_or_throw(bytes, offset, sizeof(T), error_msg); + T value; + std::memcpy(&value, bytes.data() + offset, sizeof(T)); + return value; +} + +template + requires(std::unsigned_integral) +[[nodiscard]] auto read(std::span bytes, std::size_t offset) -> T { + return read(bytes, offset, "not enough bytes"); +} + +void validate_padding(std::span bytes) { + auto padding = read(bytes, 0, "not enough bytes for header"); + padding &= 0xFFFFFFFFFF000000; + if (padding != 0) { + throw std::domain_error(fmt::format( + "bytes 3-7 must be all 0 but are {:#2x} {:#2x} {:#2x} {:#2x} {:#2x}", + bytes[3], + bytes[4], + bytes[5], + bytes[6], + bytes[7] + )); + } +} + +template +class LookupTableV1: public ::pisa::lt::detail::BaseLookupTable { + std::span m_offsets; + std::span m_payloads; + std::size_t m_size; + bool m_sorted; + + [[nodiscard]] auto read_offset(std::size_t idx) const -> Offset { + return read(m_offsets, idx * sizeof(Offset)); + } + + [[nodiscard]] auto read_payload(std::size_t idx) const -> std::span { + auto offset = read_offset(idx); + auto count = read_offset(idx + 1) - offset; + return pisa::subspan_or_throw(m_payloads, offset, count, "not enough bytes for payload"); + } + + public: + LookupTableV1(std::span offsets, std::span payloads, bool sorted) + : m_offsets(offsets), + m_payloads(payloads), + m_size(m_offsets.size() / sizeof(Offset) - 1), + m_sorted(sorted) {} + + ~LookupTableV1() = default; + + [[nodiscard]] virtual auto clone() -> std::unique_ptr override { + return std::make_unique>(m_offsets, m_payloads, m_sorted); + } + + [[nodiscard]] virtual auto size() const noexcept -> std::size_t override { return m_size; } + + [[nodiscard]] virtual auto operator[](std::size_t idx) const + -> std::span override { + if (idx >= m_size) { + throw std::out_of_range( + fmt::format("accessing element {} in a table of size {}", idx, m_size) + ); + } + auto offset = read_offset(idx); + auto count = read_offset(idx + 1) - offset; + return pisa::subspan_or_throw(m_payloads, offset, count, "not enough bytes for payload"); + } + + [[nodiscard]] virtual auto find_sorted(std::span value) const noexcept + -> std::optional { + if (size() == 0) { + return std::nullopt; + } + std::size_t low = 0; + std::size_t high = size() - 1; + while (low < high) { + auto mid = std::midpoint(low, high); + auto midval = read_payload(mid); + if (lex_lt(midval, value)) { + low = mid + 1; + } else { + high = mid; + } + } + return std::nullopt; + } + + [[nodiscard]] virtual auto find_unsorted(std::span value) const noexcept + -> std::optional { + for (std::size_t pos = 0; pos < size(); ++pos) { + if (read_payload(pos) == value) { + return pos; + } + } + return std::nullopt; + } + + [[nodiscard]] virtual auto find(std::span value) const noexcept + -> std::optional override { + return m_sorted ? find_sorted(value) : find_unsorted(value); + } +}; + +template +auto construct_lookup_table_v1(std::span bytes, bool sorted) + -> std::unique_ptr<::pisa::lt::detail::BaseLookupTable> { + auto length = read(bytes, 8, "not enough bytes for table length"); + std::size_t offsets_bytes_length = (length + 1) * sizeof(Offset); + return std::make_unique>( + pisa::subspan_or_throw(bytes, 16, offsets_bytes_length, "not enough bytes for offsets"), + pisa::subspan_or_throw(bytes, 16 + offsets_bytes_length, std::dynamic_extent), + sorted + ); +} + +auto LookupTable::v1(std::span bytes) -> LookupTable { + validate_padding(bytes); + auto flags = lt::v1::Flags(static_cast(bytes[2])); + if (flags.wide_offsets()) { + return LookupTable(construct_lookup_table_v1(bytes, flags.sorted())); + } + return LookupTable(construct_lookup_table_v1(bytes, flags.sorted())); +} + +auto LookupTable::from_bytes(std::span bytes) -> LookupTable { + auto leading_bytes = pisa::subspan_or_throw(bytes, 0, 2, "header must be at least 2 bytes"); + auto verification_byte = leading_bytes[0]; + if (verification_byte != lt::VERIFICATION_BYTE) { + throw std::domain_error(fmt::format( + "lookup table verification byte invalid: must be {:#x} but {:#x} given", + lt::VERIFICATION_BYTE, + verification_byte + )); + } + + auto version = static_cast(leading_bytes[1]); + if (version != 1) { + throw std::domain_error(fmt::format("only version 1 is valid but {} given", version)); + } + + return LookupTable::v1(bytes); +} + +auto LookupTable::size() const noexcept -> std::size_t { + return m_impl->size(); +} +auto LookupTable::operator[](std::size_t idx) const -> std::span { + return m_impl->operator[](idx); +} + +auto LookupTable::find(std::span value) const noexcept + -> std::optional { + return m_impl->find(value); +} + +template +class LookupTableEncoderV1: public ::pisa::lt::detail::BaseLookupTableEncoder { + ::pisa::lt::v1::Flags m_flags; + std::vector m_offsets{0}; + std::vector m_payloads{}; + std::unordered_set m_inserted{}; + + void encode_header(std::ostream& out) { + auto flag_bits = m_flags.bits(); + pisa::put(out, static_cast(lt::VERIFICATION_BYTE)); + pisa::put(out, static_cast(1)); + pisa::put(out, static_cast(flag_bits)); + pisa::write( + out, reinterpret_cast(&::pisa::lt::PADDING), ::pisa::lt::PADDING_LENGTH + ); + } + + void write_offsets(std::ostream& out) { + for (auto const& offset: m_offsets) { + pisa::write(out, reinterpret_cast(&offset), sizeof(Offset)); + } + } + + public: + explicit LookupTableEncoderV1(::pisa::lt::v1::Flags flags) : m_flags(flags) {} + + virtual ~LookupTableEncoderV1() = default; + + void virtual insert(std::span payload) { + if (m_flags.sorted()) { + auto prev = std::span(m_payloads).subspan(m_offsets.back()); + if (pisa::lex_lt(payload, prev)) { + throw std::invalid_argument("payloads not strictly sorted in sorted table"); + } + } else { + auto payload_as_str = + std::string_view(reinterpret_cast(payload.data()), payload.size()); + if (auto pos = m_inserted.find(payload_as_str); pos != m_inserted.end()) { + throw std::invalid_argument("payload duplicate"); + } + m_inserted.insert(payload_as_str); + } + m_offsets.push_back(m_offsets.back() + payload.size()); + m_payloads.insert(m_payloads.end(), payload.begin(), payload.end()); + } + + void virtual encode(std::ostream& out) { + encode_header(out); + std::uint64_t size = m_offsets.size() - 1; + pisa::write(out, reinterpret_cast(&size), sizeof(size)); + write_offsets(out); + pisa::write(out, reinterpret_cast(m_payloads.data()), m_payloads.size()); + } +}; + +LookupTableEncoder::LookupTableEncoder(std::unique_ptr<::pisa::lt::detail::BaseLookupTableEncoder> impl) + : m_impl(std::move(impl)) {} + +LookupTableEncoder LookupTableEncoder::v1(::pisa::lt::v1::Flags flags) { + if (flags.wide_offsets()) { + return LookupTableEncoder(std::make_unique>(flags)); + } + return LookupTableEncoder(std::make_unique>(flags)); +} + +auto LookupTableEncoder::insert(std::span payload) -> LookupTableEncoder& { + m_impl->insert(payload); + return *this; +} + +auto LookupTableEncoder::encode(std::ostream& out) -> LookupTableEncoder& { + m_impl->encode(out); + return *this; +} + +} // namespace pisa diff --git a/src/stream.cpp b/src/stream.cpp new file mode 100644 index 00000000..b9b14895 --- /dev/null +++ b/src/stream.cpp @@ -0,0 +1,41 @@ +// Copyright 2024 PISA developers +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include + +#include "fmt/core.h" +#include "pisa/stream.hpp" + +namespace pisa { + +FileOpenError::FileOpenError(std::string const& file) + : m_message(fmt::format("failed to open file: {}", file)) {} + +auto FileOpenError::what() const noexcept -> char const* { + return m_message.c_str(); +} + +auto WriteError::what() const noexcept -> char const* { + return "failed to write to stream"; +} + +auto open_file_w(std::string const& filename) -> std::ofstream { + auto stream = std::ofstream(filename); + if (!stream) { + throw FileOpenError(filename); + } + return stream; +} + +} // namespace pisa diff --git a/test/test_span.cpp b/test/test_span.cpp index 4b0a5b9e..23c21576 100644 --- a/test/test_span.cpp +++ b/test/test_span.cpp @@ -1,6 +1,23 @@ +// Copyright 2024 PISA developers +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #define CATCH_CONFIG_MAIN #include "catch2/catch.hpp" +#include +#include + #include "span.hpp" TEST_CASE("pisa::at", "[span]") { @@ -13,6 +30,28 @@ TEST_CASE("pisa::at", "[span]") { REQUIRE_THROWS_AS(pisa::at(span, 4), std::out_of_range); } +TEST_CASE("pisa::subspan", "[span]") { + std::vector vec{0, 1, 2, 3}; + auto span = std::span{vec.data(), vec.size()}; + REQUIRE(pisa::subspan_or_throw(span, 0, 0) == std::span(vec.data(), 0)); + REQUIRE(pisa::subspan_or_throw(span, 0, 1) == std::span(vec.data(), 1)); + REQUIRE(pisa::subspan_or_throw(span, 1, 0) == std::span(vec.data() + 1, 0)); + REQUIRE(pisa::subspan_or_throw(span, 0, 4) == std::span(vec.data(), 4)); + REQUIRE(pisa::subspan_or_throw(span, 1, 3) == std::span(vec.data() + 1, 3)); + REQUIRE(pisa::subspan_or_throw(span, 0, 3) == std::span(vec.data(), 3)); + REQUIRE(pisa::subspan_or_throw(span, 2, 2) == std::span(vec.data() + 2, 2)); + REQUIRE(pisa::subspan_or_throw(span, 3, 1) == std::span(vec.data() + 3, 1)); + REQUIRE(pisa::subspan_or_throw(span, 4, 0) == std::span(vec.data() + 4, 0)); + REQUIRE_THROWS_AS(pisa::subspan_or_throw(span, 0, 6), std::out_of_range); + REQUIRE_THROWS_AS(pisa::subspan_or_throw(span, 0, 5), std::out_of_range); + REQUIRE_THROWS_AS(pisa::subspan_or_throw(span, 1, 4), std::out_of_range); + REQUIRE_THROWS_AS(pisa::subspan_or_throw(span, 2, 3), std::out_of_range); + REQUIRE_THROWS_AS(pisa::subspan_or_throw(span, 3, 2), std::out_of_range); + REQUIRE_THROWS_AS(pisa::subspan_or_throw(span, 4, 1), std::out_of_range); + REQUIRE_THROWS_AS(pisa::subspan_or_throw(span, 5, 0), std::out_of_range); + REQUIRE_THROWS_AS(pisa::subspan_or_throw(span, 5, 1), std::out_of_range); +} + TEST_CASE("operator== for spans", "[span]") { std::vector vec1{0, 1, 2, 3}; auto span1 = std::span(vec1.data(), vec1.size()); @@ -25,3 +64,35 @@ TEST_CASE("operator== for spans", "[span]") { REQUIRE(span2 != span3); REQUIRE(span1 == std::span(vec1.data(), vec1.size())); } + +TEST_CASE("lex_lt", "[span]") { + std::string_view aardvark = "aardvark"; + std::string_view dog = "dog"; + std::string_view zebra = "zebra"; + + REQUIRE_FALSE(pisa::lex_lt(std::span(aardvark), std::span(aardvark))); + REQUIRE(pisa::lex_lt(std::span(aardvark), std::span(dog))); + REQUIRE(pisa::lex_lt(std::span(aardvark), std::span(zebra))); + + REQUIRE_FALSE(pisa::lex_lt(std::span(dog), std::span(dog))); + REQUIRE_FALSE(pisa::lex_lt(std::span(dog), std::span(aardvark))); + REQUIRE(pisa::lex_lt(std::span(dog), std::span(zebra))); + + REQUIRE_FALSE(pisa::lex_lt(std::span(zebra), std::span(zebra))); + REQUIRE_FALSE(pisa::lex_lt(std::span(zebra), std::span(aardvark))); + REQUIRE_FALSE(pisa::lex_lt(std::span(zebra), std::span(dog))); +} + +TEST_CASE("lex_lt sort", "[span]") { + std::vector> animals{ + "aardvark", "dog", "zebra", "pelican", "goose", "geese", "cat" + }; + std::sort(animals.begin(), animals.end(), pisa::lex_lt); + REQUIRE(animals[0] == std::span("aardvark")); + REQUIRE(animals[1] == std::span("cat")); + REQUIRE(animals[2] == std::span("dog")); + REQUIRE(animals[3] == std::span("geese")); + REQUIRE(animals[4] == std::span("goose")); + REQUIRE(animals[5] == std::span("pelican")); + REQUIRE(animals[6] == std::span("zebra")); +}