diff --git a/Userland/Libraries/LibGfx/ImageFormats/WebPWriterLossless.cpp b/Userland/Libraries/LibGfx/ImageFormats/WebPWriterLossless.cpp index d706f41dce2dba..b90a318e8be845 100644 --- a/Userland/Libraries/LibGfx/ImageFormats/WebPWriterLossless.cpp +++ b/Userland/Libraries/LibGfx/ImageFormats/WebPWriterLossless.cpp @@ -11,21 +11,13 @@ #include #include #include +#include #include #include #include namespace Gfx { -static bool are_all_pixels_opaque(Bitmap const& bitmap) -{ - for (ARGB32 pixel : bitmap) { - if ((pixel >> 24) != 0xff) - return false; - } - return true; -} - NEVER_INLINE static ErrorOr write_image_data(LittleEndianOutputBitStream& bit_stream, Bitmap const& bitmap, PrefixCodeGroup const& prefix_code_group) { // This is currently the hot loop. Keep performance in mind when you change it. @@ -43,6 +35,184 @@ NEVER_INLINE static ErrorOr write_image_data(LittleEndianOutputBitStream& return {}; } +struct CodeLengthSymbol { + u8 symbol { 0 }; + u8 count { 0 }; // used for special symbols 16-18 +}; + +// This is very similar to DeflateCompressor::encode_huffman_lengths(). +// But: +// * size can be larger than 288 for green, is always 256 for r, b, a, and is always 40 for distance codes +// * code 16 has different semantics, requires last_non_zero_symbol +static size_t encode_huffman_lengths(ReadonlyBytes lengths, Array& encoded_lengths) +{ + size_t encoded_count = 0; + size_t i = 0; + u8 last_non_zero_symbol = 8; // "If code 16 is used before a non-zero value has been emitted, a value of 8 is repeated." + while (i < lengths.size()) { + if (lengths[i] == 0) { + auto zero_count = 0; + for (size_t j = i; j < min(lengths.size(), i + 138) && lengths[j] == 0; j++) + zero_count++; + + if (zero_count < 3) { // below minimum repeated zero count + encoded_lengths[encoded_count++].symbol = 0; + i++; + continue; + } + + if (zero_count <= 10) { + // "Code 17 emits a streak of zeros [3..10], i.e., 3 + ReadBits(3) times." + encoded_lengths[encoded_count].symbol = 17; + encoded_lengths[encoded_count++].count = zero_count; + } else { + // "Code 18 emits a streak of zeros of length [11..138], i.e., 11 + ReadBits(7) times." + encoded_lengths[encoded_count].symbol = 18; + encoded_lengths[encoded_count++].count = zero_count; + } + i += zero_count; + continue; + } + + VERIFY(lengths[i] != 0); + last_non_zero_symbol = lengths[i]; + encoded_lengths[encoded_count++].symbol = lengths[i++]; + + // "Code 16 repeats the previous non-zero value [3..6] times, i.e., 3 + ReadBits(2) times." + // This is different from deflate. + auto copy_count = 0; + for (size_t j = i; j < min(lengths.size(), i + 6) && lengths[j] == last_non_zero_symbol; j++) + copy_count++; + + if (copy_count >= 3) { + encoded_lengths[encoded_count].symbol = 16; + encoded_lengths[encoded_count++].count = copy_count; + i += copy_count; + continue; + } + } + return encoded_count; +} + +static ErrorOr write_simple_code_lengths(LittleEndianOutputBitStream& bit_stream, ReadonlyBytes symbols) +{ + VERIFY(symbols.size() <= 2); + + static constexpr Array empty { 0 }; + if (symbols.size() == 0) { + // "Another special case is when all prefix code lengths are zeros (an empty prefix code). [...] + // empty prefix codes can be coded as those containing a single symbol 0." + symbols = empty; + } + + unsigned non_zero_symbol_count = symbols.size(); + + TRY(bit_stream.write_bits(1u, 1u)); // Simple code length code. + TRY(bit_stream.write_bits(non_zero_symbol_count - 1, 1u)); // num_symbols - 1 + if (symbols[0] <= 1) { + TRY(bit_stream.write_bits(0u, 1u)); // is_first_8bits: no + TRY(bit_stream.write_bits(symbols[0], 1u)); // symbol0 + } else { + TRY(bit_stream.write_bits(1u, 1u)); // is_first_8bits: yes + TRY(bit_stream.write_bits(symbols[0], 8u)); // symbol0 + } + if (non_zero_symbol_count > 1) + TRY(bit_stream.write_bits(symbols[1], 8u)); // symbol1 + + Array bits_per_symbol {}; + // "When coding a single leaf node [...], all but one code length are zeros, and the single leaf node value + // is marked with the length of 1 -- even when no bits are consumed when that single leaf node tree is used." + // CanonicalCode follows that convention too, even when describing simple code lengths. + bits_per_symbol[symbols[0]] = 1; + if (non_zero_symbol_count > 1) + bits_per_symbol[symbols[1]] = 1; + + return MUST(CanonicalCode::from_bytes(bits_per_symbol)); +} + +static ErrorOr write_normal_code_lengths(LittleEndianOutputBitStream& bit_stream, Array const& bit_lengths, size_t alphabet_size) +{ + // bit_lengths stores how many bits each symbol is encoded with. + + // Drop trailing zero lengths. + // This will keep at least three symbols; else we would've called write_simple_code_lengths() instead. + // This is similar to the loops in Deflate::encode_block_lengths(). + size_t code_count = bit_lengths.size(); + while (bit_lengths[code_count - 1] == 0) { + code_count--; + VERIFY(code_count > 2); + } + + Array encoded_lengths {}; + auto encoded_lengths_count = encode_huffman_lengths(bit_lengths.span().trim(code_count), encoded_lengths); + + // The code to compute code length code lengths is very similar to some of the code in DeflateCompressor::flush(). + // count code length frequencies + Array code_lengths_frequencies { 0 }; + for (size_t i = 0; i < encoded_lengths_count; i++) { + VERIFY(code_lengths_frequencies[encoded_lengths[i].symbol] < UINT16_MAX); + code_lengths_frequencies[encoded_lengths[i].symbol]++; + } + + // generate optimal huffman code lengths code lengths + Array code_lengths_bit_lengths {}; + Compress::generate_huffman_lengths(code_lengths_bit_lengths, code_lengths_frequencies, 7); // deflate code length huffman can use up to 7 bits per symbol + // calculate actual code length code lengths count (without trailing zeros) + auto code_lengths_count = code_lengths_bit_lengths.size(); + while (code_lengths_bit_lengths[kCodeLengthCodeOrder[code_lengths_count - 1]] == 0) + code_lengths_count--; + + TRY(bit_stream.write_bits(0u, 1u)); // Normal code length code. + + // This here isn't needed in Deflate because it always writes EndOfBlock. WebP does not have an EndOfBlock marker, so it needs this check. + if (code_lengths_count < 4) + code_lengths_count = 4; + dbgln_if(WEBP_DEBUG, "writing code_lengths_count: {}", code_lengths_count); + + // WebP uses a different kCodeLengthCodeOrder than deflate. Other than that, the following is similar to a loop in Compress::write_dynamic_huffman(). + // "int num_code_lengths = 4 + ReadBits(4);" + TRY(bit_stream.write_bits(code_lengths_count - 4u, 4u)); + + for (size_t i = 0; i < code_lengths_count; i++) { + TRY(bit_stream.write_bits(code_lengths_bit_lengths[kCodeLengthCodeOrder[i]], 3)); + } + + // Write code lengths. This is slightly different from deflate too -- deflate writes literal and distance lengths here, + // while WebP writes one of these codes each for g, r, b, a, and distance. + if (alphabet_size == encoded_lengths_count) { + TRY(bit_stream.write_bits(0u, 1u)); // max_symbol is alphabet_size + } else { + TRY(bit_stream.write_bits(1u, 1u)); // max_symbol is explicitly coded + // "int length_nbits = 2 + 2 * ReadBits(3); + // int max_symbol = 2 + ReadBits(length_nbits);" + // => length_nbits is at most 2 + 2*7 == 16 + unsigned needed_length_nbits = ceil(log2(encoded_lengths_count - 2)); + VERIFY(needed_length_nbits <= 16); + needed_length_nbits = ceil_div(needed_length_nbits, 2) * 2; + TRY(bit_stream.write_bits((needed_length_nbits - 2) / 2, 3u)); // length_nbits = 2 + 2 * 3 == 8 + TRY(bit_stream.write_bits(encoded_lengths_count - 2, needed_length_nbits)); // max_symbol = 2 + 254 + } + + // The rest is identical to write_dynamic_huffman() again. (Code 16 has different semantics, but that doesn't matter here.) + auto code_lengths_code = MUST(CanonicalCode::from_bytes(code_lengths_bit_lengths)); + for (size_t i = 0; i < encoded_lengths_count; i++) { + auto encoded_length = encoded_lengths[i]; + TRY(code_lengths_code.write_symbol(bit_stream, encoded_length.symbol)); + if (encoded_length.symbol == 16) { + // "Code 16 repeats the previous non-zero value [3..6] times, i.e., 3 + ReadBits(2) times." + TRY(bit_stream.write_bits(encoded_length.count - 3, 2)); + } else if (encoded_length.symbol == 17) { + // "Code 17 emits a streak of zeros [3..10], i.e., 3 + ReadBits(3) times." + TRY(bit_stream.write_bits(encoded_length.count - 3, 3)); + } else if (encoded_length.symbol == 18) { + // "Code 18 emits a streak of zeros of length [11..138], i.e., 11 + ReadBits(7) times." + TRY(bit_stream.write_bits(encoded_length.count - 11, 7)); + } + } + + return CanonicalCode::from_bytes(bit_lengths.span().trim(code_count)); +} + static ErrorOr write_VP8L_image_data(Stream& stream, Bitmap const& bitmap) { LittleEndianOutputBitStream bit_stream { MaybeOwned(stream) }; @@ -75,78 +245,62 @@ static ErrorOr write_VP8L_image_data(Stream& stream, Bitmap const& bitmap) // Prefix code #5: Used for backward-reference distance." // We use neither back-references not color cache entries yet. - // We write prefix trees for 256 literals all of length 8, which means each byte is encoded as itself. - // That doesn't give any compression, but is a valid bit stream. // We can make this smarter later on. size_t const color_cache_size = 0; - constexpr Array alphabet_sizes = to_array({ 256 + 24 + static_cast(color_cache_size), 256, 256, 256, 40 }); // XXX Shared? + constexpr Array alphabet_sizes = to_array({ 256 + 24 + static_cast(color_cache_size), 256, 256, 256, 40 }); // If you add support for color cache: At the moment, CanonicalCodes does not support writing more than 288 symbols. if (alphabet_sizes[0] > 288) return Error::from_string_literal("Invalid alphabet size"); - bool all_pixels_are_opaque = are_all_pixels_opaque(bitmap); - - PrefixCodeGroup prefix_code_group; - int number_of_full_channels = all_pixels_are_opaque ? 3 : 4; - for (int i = 0; i < number_of_full_channels; ++i) { - TRY(bit_stream.write_bits(0u, 1u)); // Normal code length code. - - // Write code length codes. - constexpr int kCodeLengthCodes = 19; - Array kCodeLengthCodeOrder = { 17, 18, 0, 1, 2, 3, 4, 5, 16, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 }; - int num_code_lengths = max(4u, find_index(kCodeLengthCodeOrder.begin(), kCodeLengthCodeOrder.end(), 8) + 1); - - // "int num_code_lengths = 4 + ReadBits(4);" - TRY(bit_stream.write_bits(num_code_lengths - 4u, 4u)); - - for (int i = 0; i < num_code_lengths - 1; ++i) - TRY(bit_stream.write_bits(0u, 3u)); - TRY(bit_stream.write_bits(1u, 3u)); - - // Write code lengths. - if (alphabet_sizes[i] == 256) { - TRY(bit_stream.write_bits(0u, 1u)); // max_symbol is alphabet_size - } else { - TRY(bit_stream.write_bits(1u, 1u)); // max_symbol is explicitly coded - // "int length_nbits = 2 + 2 * ReadBits(3); - // int max_symbol = 2 + ReadBits(length_nbits);" - TRY(bit_stream.write_bits(3u, 3u)); // length_nbits = 2 + 2 * 3 - TRY(bit_stream.write_bits(254u, 8u)); // max_symbol = 2 + 254 - } + // We do use huffman coding by writing a single prefix-code-group for the entire image. + // FIXME: Consider using a meta-prefix image and using one prefix-code-group per tile. - auto bits_per_symbol = Array::from_repeated_value(8); - prefix_code_group[i] = TRY(CanonicalCode::from_bytes(bits_per_symbol)); + // FIXME: generate_huffman_lengths() currently halves a frequency cap if the maximum bit length is reached. + // This has the effect of giving very frequent symbols a higher bit length than they would have otherwise. + // Instead, try dividing frequencies by 2 if the maximum bit length is reached. + // Then, low-frequency symbols will get a higher bit length than they would have otherwise, which might help + // compressed size. (For deflate, it doesn't matter much since their blocks are 64kiB large, but for WebP + // we currently use a single huffman tree per channel for the entire image.) + Array, 4> symbol_frequencies {}; + for (ARGB32 pixel : bitmap) { + static constexpr auto saturating_increment = [](u16& value) { + if (value < UINT16_MAX) + value++; + }; + saturating_increment(symbol_frequencies[0][(pixel >> 8) & 0xff]); // green + saturating_increment(symbol_frequencies[1][(pixel >> 16) & 0xff]); // red + saturating_increment(symbol_frequencies[2][pixel & 0xff]); // blue + saturating_increment(symbol_frequencies[3][pixel >> 24]); // alpha + } - // The code length codes only contain a single entry for '8'. WebP streams with a single element store 0 bits per element. - // (This is different from deflate, which needs 1 bit per element.) + Array, 4> code_lengths {}; + for (int i = 0; i < 4; ++i) { + // "Code [0..15] indicates literal code lengths." => the maximum bit length is 15. + Compress::generate_huffman_lengths(code_lengths[i], symbol_frequencies[i], 15); } - if (all_pixels_are_opaque) { - // Use a simple 1-element code. - TRY(bit_stream.write_bits(1u, 1u)); // Simple code length code. - TRY(bit_stream.write_bits(0u, 1u)); // num_symbols - 1 - TRY(bit_stream.write_bits(1u, 1u)); // is_first_8bits - TRY(bit_stream.write_bits(255u, 8u)); // symbol0 - Array bits_per_symbol {}; - // "When coding a single leaf node [...], all but one code length are zeros, and the single leaf node value - // is marked with the length of 1 -- even when no bits are consumed when that single leaf node tree is used." - // CanonicalCode follows that convention too, even when describing simple code lengths. - bits_per_symbol[255] = 1; - prefix_code_group[3] = TRY(CanonicalCode::from_bytes(bits_per_symbol)); + PrefixCodeGroup prefix_code_group; + for (int i = 0; i < 4; ++i) { + u8 symbols[2]; + unsigned non_zero_symbol_count = 0; + for (int j = 0; j < 256; ++j) { + if (code_lengths[i][j] != 0) { + if (non_zero_symbol_count < 2) + symbols[non_zero_symbol_count] = j; + non_zero_symbol_count++; + } + } + + if (non_zero_symbol_count <= 2) + prefix_code_group[i] = TRY(write_simple_code_lengths(bit_stream, { symbols, non_zero_symbol_count })); + else + prefix_code_group[i] = TRY(write_normal_code_lengths(bit_stream, code_lengths[i], alphabet_sizes[i])); } // For code #5, use a simple empty code, since we don't use this yet. - // "Note: Another special case is when all prefix code lengths are zeros (an empty prefix code). [...] - // empty prefix codes can be coded as those containing a single symbol 0." - TRY(bit_stream.write_bits(1u, 1u)); // Simple code length code. - TRY(bit_stream.write_bits(0u, 1u)); // num_symbols - 1 - TRY(bit_stream.write_bits(0u, 1u)); // is_first_8bits - TRY(bit_stream.write_bits(0u, 1u)); // symbol0 - Array bits_per_symbol {}; - bits_per_symbol[0] = 1; // See comment in `if (all_pixels_are_opaque)` block above. - prefix_code_group[4] = TRY(CanonicalCode::from_bytes(bits_per_symbol)); + prefix_code_group[4] = TRY(write_simple_code_lengths(bit_stream, {})); // Image data. TRY(write_image_data(bit_stream, bitmap, prefix_code_group));