Skip to content

Commit

Permalink
add support for LZW compression (#116)
Browse files Browse the repository at this point in the history
* add support for LZW compression

* fix tests

* delete test files

* optimize bit shifts

* unroll getcode() loop

* optimize horizontal (un-)differencing

* add support for pre-1.10 Julia versions

* factor out prediction reversal logic so that it supports all compression techniques

* check for overflows

* delete test files

* revert to spec behavior for codesize increases

===

I've found inconsistent behavior between Houdini and Pixelmator,
so defaulting to behavior consistent with the spec

Images will load (apparently) correctly in either case, but
we show a warning for missing EOI in the non-spec case

* update test

* fix overflow check

* bump version

* add doc for TiffFileStrip

* more explicit parameterization for TFS

* bug fix

---------

Co-authored-by: Tamas Nagy <[email protected]>
  • Loading branch information
chrstphrbrns and tlnagy authored Oct 10, 2023
1 parent 560ddd8 commit 8c898d1
Show file tree
Hide file tree
Showing 5 changed files with 257 additions and 23 deletions.
2 changes: 1 addition & 1 deletion Project.toml
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
name = "TiffImages"
uuid = "731e570b-9d59-4bfa-96dc-6df516fadf69"
authors = ["Tamas Nagy <[email protected]>"]
version = "0.6.8"
version = "0.7.0"

[deps]
ColorTypes = "3da002f7-5984-5a60-b8a6-cbb66c0b333f"
Expand Down
2 changes: 1 addition & 1 deletion src/TiffImages.jl
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@ const PKGVERSION = @PkgVersion.Version 0
include("enum.jl")
include("utils.jl")
include("files.jl")
include("compression.jl")
include("tags.jl")
include("ifds.jl")
include("compression.jl")
include("layout.jl")
include(joinpath("types", "common.jl"))
include(joinpath("types", "dense.jl"))
Expand Down
213 changes: 195 additions & 18 deletions src/compression.jl
Original file line number Diff line number Diff line change
@@ -1,39 +1,215 @@
"""
read!(tf, arr, comp)
read!(io, arr, comp)
Read in an array `arr` from the [`TiffFile`](@ref) stream `tf` inflating the
data using compression method `comp`. `read!` will dispatch on the value of
compression and use the correct compression technique to read the data.
Read in an array `arr` from the [`TiffFile`](@ref) or [`TiffFileStrip`](@ref)
stream `io`, inflating the data using compression method `comp`. `read!` will
dispatch on the value of compression and use the correct compression technique
to read the data.
"""
Base.read!(tf::TiffFile, arr::AbstractArray, comp::CompressionType) = read!(tf, arr, Val(comp))
function memcpy(dest::Ptr{T}, src::Ptr{T}, n::Int) where T
ccall(:memcpy, Ptr{T}, (Ptr{T}, Ptr{T}, Int), dest, src, n)
end

Base.read!(io::Union{TiffFile, TiffFileStrip}, arr::AbstractArray, comp::CompressionType) = read!(io, arr, Val(comp))

Base.read!(tf::TiffFile, arr::AbstractArray, ::Val{COMPRESSION_NONE}) = read!(tf, arr)
Base.read!(io::Union{TiffFile, TiffFileStrip}, arr::AbstractArray, ::Val{COMPRESSION_NONE}) = read!(io, arr)

function Base.read!(tf::TiffFile, arr::AbstractArray{T, N}, ::Val{COMPRESSION_PACKBITS}) where {T, N}
function Base.read!(tfs::TiffFileStrip, arr::AbstractArray{T, N}, ::Val{COMPRESSION_PACKBITS}) where {T, N}
pos = 1
nbit = Array{Int8}(undef, 1)
nxt = Array{T}(undef, 1)
while pos < length(arr)
read!(tf, nbit)
read!(tfs.tf, nbit)
n = nbit[1]
if 0 <= n <= 127
read!(tf, view(arr, pos:pos+n))
read!(tfs.tf, view(arr, pos:pos+n))
pos += n
elseif -127 <= n <= -1
read!(tf, nxt)
read!(tfs.tf, nxt)
arr[pos:(pos-n)] .= nxt[1]
pos += -n
end
pos += 1
end
end

function Base.read!(tf::TiffFile, arr::AbstractArray, ::Val{COMPRESSION_DEFLATE})
readbytes!(InflateZlibStream(tf.io.io), reinterpret(UInt8, vec(arr)))
function Base.read!(tfs::TiffFileStrip, arr::AbstractArray, ::Val{COMPRESSION_DEFLATE})
readbytes!(InflateZlibStream(tfs.tf.io.io), reinterpret(UInt8, vec(arr)))
end

function Base.read!(tf::TiffFile, arr::AbstractArray, ::Val{COMPRESSION_ADOBE_DEFLATE})
readbytes!(InflateZlibStream(tf.io.io), reinterpret(UInt8, vec(arr)))
function Base.read!(tfs::TiffFileStrip, arr::AbstractArray, ::Val{COMPRESSION_ADOBE_DEFLATE})
readbytes!(InflateZlibStream(tfs.tf.io.io), reinterpret(UInt8, vec(arr)))
end

function lzw_decode!(io, arr::AbstractArray)
CLEAR_CODE::Int = 256 + 1
EOI_CODE::Int = 257 + 1
TABLE_ENTRY_LENGTH_BITS::Int = 16

out_pointer::Ptr{UInt8} = reinterpret(Ptr{UInt8}, pointer(arr))
output_size::Int = sizeof(arr)
out_position::Int = 0 # current position in out

table_size::Int = output_size * 2 + 258
table_pointer::Ptr{UInt8} = reinterpret(Ptr{UInt8}, Libc.malloc(table_size)) # table of strings
table_offsets_pointer::Ptr{Int} = reinterpret(Ptr{Int}, Libc.malloc(sizeof(Int) * 4097)) # offsets into table

@inline create_table_entry(length, offset) = Base.shl_int(length, (64 - TABLE_ENTRY_LENGTH_BITS)) | offset
@inline table_entry_length(table_entry) = Base.lshr_int(table_entry, 64 - TABLE_ENTRY_LENGTH_BITS)
@inline table_entry_offset(table_entry) = table_entry & (Base.shl_int(1, 64 - TABLE_ENTRY_LENGTH_BITS) - 1)

try
# InitializeTable();
foreach(i -> unsafe_store!(table_pointer + i, UInt8(i)), 0:255)
foreach(i -> unsafe_store!(table_offsets_pointer, create_table_entry(1, i), i+1), 0:259) # length is stored in upper 16 bits

code = -1

buffer::Int=0 # buffer for reading in codes
bitcount::Int=0 # number of valid bits in buffer
codesize::Int=9 # current number of bits per code
input::Vector{UInt8} = Vector{UInt8}(undef, bytesavailable(io))
read!(io, input)
function getcode(buffer, code, bitcount, codesize, i)
old_code::Int = code

# make sure we have enough bits in the buffer
if bitcount < codesize
buffer = Base.shl_int(buffer, 8) | input[i+=1]
bitcount += 8
end

# one more time (since the max code size is 12 bits, only need to check twice)
if bitcount < codesize
buffer = Base.shl_int(buffer, 8) | input[i+=1]
bitcount += 8
end

code = Base.lshr_int(buffer, bitcount - codesize) & (Base.shl_int(1, codesize) - 1)
bitcount -= codesize
# code + 1 because this is Julia
(buffer, code + 1, old_code, bitcount, codesize, i)
end

@inline check_table_overflow(start, length) = start + length > table_size && @error "LZW: table buffer overflow"
@inline check_output_overflow(start, length) = start + length > output_size && @error "LZW: output buffer overflow"

# annotated with excerpts from the LZW pseudocode in the TIFF 6.0 spec
# https://developer.adobe.com/content/dam/udp/en/open/standards/tiff/TIFF6.pdf
table_count::Int = 258 # number of (valid) table entries; 256 one-byte codes + CLEAR_CODE + EOI_CODE
next_table_offset::Int = 258
input_pos::Int = 0 # current position in input
while true
# GetNextCode()
(buffer, code, old_code, bitcount, codesize, input_pos) = getcode(buffer, code, bitcount, codesize, input_pos)
if code == EOI_CODE || out_position >= output_size
break
elseif code == CLEAR_CODE # reset table
# InitializeTable();
table_count = 258
next_table_offset = 258
codesize = 9
# Code = GetNextCode();
(buffer, code, old_code, bitcount, codesize, input_pos) = getcode(buffer, code, bitcount, codesize, input_pos)
if code == EOI_CODE
break
end
# WriteString(StringFromCode(Code))
r = unsafe_load(table_offsets_pointer, code)
len = table_entry_length(r)

check_output_overflow(out_position, len)

memcpy(out_pointer + out_position, table_pointer + table_entry_offset(r), len)
out_position += len
else
if code <= table_count
# WriteString(StringFromCode(Code));
if code <= 256
# this is redundant with the check above, but it makes
# the code easier to reason about and less bug prone
check_output_overflow(out_position, 1)

unsafe_store!(out_pointer + out_position, code - 1)
out_position += 1
else
r = unsafe_load(table_offsets_pointer, code)
len = table_entry_length(r)

check_output_overflow(out_position, len)

memcpy(out_pointer + out_position, table_pointer + table_entry_offset(r), len)
out_position += len
end

# AddStringToTable(StringFromCode(OldCode) + FirstChar(StringFromCode(Code)));
table_count += 1
len = 1
if old_code <= 256
check_table_overflow(next_table_offset, 2) # this byte + the next one

unsafe_store!(table_pointer + next_table_offset, UInt8(old_code - 1))
else
r = unsafe_load(table_offsets_pointer, old_code)
len = table_entry_length(r)

check_table_overflow(next_table_offset, len + 1) # these bytes + the next one

memcpy(table_pointer + next_table_offset, table_pointer + table_entry_offset(r), len)
end

if code <= 256
unsafe_store!(table_pointer + next_table_offset + len, UInt8(code - 1))
else
r = unsafe_load(table_offsets_pointer, code)
memcpy(table_pointer + next_table_offset + len, table_pointer + table_entry_offset(r), 1)
end
unsafe_store!(table_offsets_pointer, create_table_entry(len + 1, next_table_offset), table_count)
next_table_offset += len + 1
else
# WriteString(StringFromCode(OldCode) + FirstChar(StringFromCode(OldCode)));
r = unsafe_load(table_offsets_pointer, old_code)
len = table_entry_length(r)

check_output_overflow(out_position, len + 1)

memcpy(out_pointer + out_position, table_pointer + table_entry_offset(r), len)
unsafe_store!(out_pointer + out_position + len, unsafe_load(table_pointer + table_entry_offset(r)))
out_position += len + 1

check_table_overflow(next_table_offset, len + 1)

# AddStringToTable(StringFromCode(OldCode) + FirstChar(StringFromCode(OldCode)));
table_count += 1
memcpy(table_pointer + next_table_offset, table_pointer + table_entry_offset(r), len)
memcpy(table_pointer + next_table_offset + len, table_pointer + table_entry_offset(r), 1)
unsafe_store!(table_offsets_pointer, create_table_entry(len + 1, next_table_offset), table_count)
next_table_offset += len + 1
end
end

if table_count == 511
codesize = 10
elseif table_count == 1023
codesize = 11
elseif table_count == 2047
codesize = 12
end
end

out_position != output_size && @warn "LZW: expected $output_size bytes, got $out_position bytes"
out_position == output_size && code != EOI_CODE && @warn "LZW: missing EOI code"
catch e
error("LZW: $e")
rethrow()
finally
Libc.free(table_pointer)
Libc.free(table_offsets_pointer)
end
end

function Base.read!(tfs::TiffFileStrip{S}, arr::AbstractArray{T, N}, ::Val{COMPRESSION_LZW}) where {T, N, S}
lzw_decode!(tfs, arr)
end

"""
Expand All @@ -46,15 +222,16 @@ julia> TiffImages.get_inflator(first(methods(read!, [TiffImages.TiffFile, Abstra
COMPRESSION_NONE::CompressionType = 1
```
"""
get_inflator(::Type{Tuple{typeof(read!), TiffFile, AbstractArray{T, N} where {T, N}, Val{C}}}) where C = C
get_inflator(::Type{Tuple{typeof(read!), TiffFileStrip, AbstractArray{T, N} where {T, N}, Val{C}}}) where C = C
get_inflator(::Type{Tuple{typeof(read!), Union{TiffFile, TiffFileStrip{S} where S}, AbstractArray{T, N} where {T, N}, Val{C}}}) where C = C

# autogenerate nice error messages for all non-implemented inflation methods
implemented = map(x->get_inflator(x.sig), methods(read!, [TiffFile, AbstractArray, Val], ))
implemented = map(x->get_inflator(x.sig), methods(read!, [Union{TiffFile, TiffFileStrip}, AbstractArray, Val], ))
comps = Set(instances(CompressionType))
setdiff!(comps, implemented)

for comp in comps
eval(quote
Base.read!(tf::TiffFile, arr::AbstractArray, ::Val{$comp}) = error("Compression ", $comp, " is not implemented. Please open an issue against TiffImages.jl.")
Base.read!(io::Union{TiffFile, TiffFileStrip}, arr::AbstractArray, ::Val{$comp}) = error("Compression ", $comp, " is not implemented. Please open an issue against TiffImages.jl.")
end)
end
end
55 changes: 53 additions & 2 deletions src/ifds.jl
Original file line number Diff line number Diff line change
Expand Up @@ -203,7 +203,28 @@ function Base.iterate(file::TiffFile, state::Tuple{Union{IFD{O}, Nothing}, Int})
return (curr_ifd, (next_ifd, next_ifd_offset))
end

function Base.read!(target::AbstractArray{T, N}, tf::TiffFile, ifd::IFD) where {T, N}
"""
$(TYPEDEF)
A strip is a contiguous block of separately-encoded image data. A TIFF
file will typically have multiple strips, each representing multiple rows of
pixels in the image
$(FIELDS)
"""
struct TiffFileStrip{O, S, P}
"""The file stream"""
tf::TiffFile{O, S}
"""The IFD corresponding to this strip"""
ifd::IFD{O}
"""The number of bytes in this strip"""
bytes::Int
end

Base.read!(tfs::TiffFileStrip, arr::AbstractArray) = read!(tfs.tf, arr)
Base.bytesavailable(tfs::TiffFileStrip) = tfs.bytes

function Base.read!(target::AbstractArray{T, N}, tf::TiffFile{O, S}, ifd::IFD{O}) where {T, N, O, S}
strip_offsets = ifd[STRIPOFFSETS].data

if PLANARCONFIG in ifd
Expand All @@ -229,11 +250,18 @@ function Base.read!(target::AbstractArray{T, N}, tf::TiffFile, ifd::IFD) where {
strip_nbytes[end] = (rows - (rowsperstrip * (nstrips-1))) * cols * sizeof(T)
end

bytes = ifd[STRIPBYTECOUNTS].data

startbyte = 1
comp = Val(compression)
rtype = rawtype(ifd)
for i in 1:nstrips
seek(tf, strip_offsets[i]::Core.BuiltinInts)
nbytes = Int(strip_nbytes[i]::Core.BuiltinInts / sizeof(T))
read!(tf, view(target, startbyte:(startbyte+nbytes-1)), compression)
tfs = TiffFileStrip{O, S, rtype}(tf, ifd, bytes[i])
arr = view(target, startbyte:(startbyte+nbytes-1))
read!(tfs, arr, comp)
reverse_prediction!(tfs, arr)
startbyte += nbytes
end
else
Expand Down Expand Up @@ -297,3 +325,26 @@ function Base.write(tf::TiffFile{O}, ifd::IFD{O}) where {O <: Unsigned}

return ifd_end_pos
end

function reverse_prediction!(tfs::TiffFileStrip{O, S, P}, arr::AbstractArray{T, N}) where {O, S, P, T, N}
predictor::Int = Int(getdata(tfs.ifd, PREDICTOR, 0))
spp::Int = Int(getdata(tfs.ifd, SAMPLESPERPIXEL, 0))
if predictor == 2
columns = Int(ncols(tfs.ifd))
rows = cld(length(arr), columns) # number of rows in this strip

# horizontal differencing
temp::Ptr{P} = reinterpret(Ptr{P}, pointer(arr))
for row in 1:rows
start = (row - 1) * columns * spp
for plane in 1:spp
previous::P = unsafe_load(temp, start + plane)
for i in (spp + plane):spp:(columns - 1) * spp + plane
current = unsafe_load(temp, start + i) + previous
unsafe_store!(temp, current, start + i)
previous = current
end
end
end
end
end
8 changes: 7 additions & 1 deletion test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -215,4 +215,10 @@ end
ifd[TiffImages.ROWSPERSTRIP] = 256

@test TiffImages.iscontiguous(ifd)
end
end

@testset "LZW" begin
uncompressed = get_example("shapes_uncompressed.tif")
compressed = get_example("shapes_lzw.tif")
@test TiffImages.load(uncompressed) == TiffImages.load(compressed)
end

2 comments on commit 8c898d1

@tlnagy
Copy link
Owner

@tlnagy tlnagy commented on 8c898d1 Oct 13, 2023

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@JuliaRegistrator
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Registration pull request created: JuliaRegistries/General/93361

After the above pull request is merged, it is recommended that a tag is created on this repository for the registered package version.

This will be done automatically if the Julia TagBot GitHub Action is installed, or can be done manually through the github interface, or via:

git tag -a v0.7.0 -m "<description of version>" 8c898d1657ea9c429d34c8065115549fc84657c6
git push origin v0.7.0

Please sign in to comment.