Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions src/CodecZstd.jl
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,17 @@ export
ZstdDecompressor,
ZstdDecompressorStream

if VERSION >= v"1.11.0-DEV.469"
eval(Meta.parse("""
public
level_bounds,
windowLog_bounds,
windowLogMax_bounds,
DEFAULT_COMPRESSION_LEVEL,
ZSTD_WINDOWLOG_LIMIT_DEFAULT
"""))
end

import TranscodingStreams:
TranscodingStreams,
TranscodingStream,
Expand Down
89 changes: 79 additions & 10 deletions src/compression.jl
Original file line number Diff line number Diff line change
Expand Up @@ -4,20 +4,51 @@
struct ZstdCompressor <: TranscodingStreams.Codec
cstream::CStream
level::Int
windowLog::Int32
endOp::LibZstd.ZSTD_EndDirective
end

function Base.show(io::IO, codec::ZstdCompressor)
if codec.endOp == LibZstd.ZSTD_e_end
print(io, "ZstdFrameCompressor(level=$(codec.level))")
else
print(io, summary(codec), "(level=$(codec.level))")
print(io, summary(codec), "(")
print(io, "level=$(codec.level)")
if codec.windowLog != Int32(0)
print(io, ", windowLog=Int32($(codec.windowLog))")
end
print(io, ")")
end
end

# Same as the zstd command line tool (v1.2.0).
const DEFAULT_COMPRESSION_LEVEL = 3

# This is technically part of the static api, but I don't see how this could be changed easily.
const ZSTD_WINDOWLOG_LIMIT_DEFAULT = Int32(27)

"""
level_bounds() -> min::Int32, max::Int32

Return the minimum and maximum compression levels available.
"""
function level_bounds()
bounds = LibZstd.ZSTD_cParam_getBounds(LibZstd.ZSTD_c_compressionLevel)
@assert !iserror(bounds.error)
Int32(bounds.lowerBound), Int32(bounds.upperBound)
end

"""
windowLog_bounds() -> min::Int32, max::Int32

Return the minimum and maximum windowLog available.
"""
function windowLog_bounds()
bounds = LibZstd.ZSTD_cParam_getBounds(LibZstd.ZSTD_c_windowLog)
@assert !iserror(bounds.error)
Int32(bounds.lowerBound), Int32(bounds.upperBound)
end

"""
ZstdCompressor(;level=$(DEFAULT_COMPRESSION_LEVEL))

Expand All @@ -31,11 +62,36 @@ Arguments
The library also offers negative compression levels,
which extend the range of speed vs. ratio preferences.
The lower the level, the faster the speed (at the cost of compression).
0 is a special value for `ZSTD_defaultCLevel()`.
The level will be clamped to the range `ZSTD_minCLevel()` to `ZSTD_maxCLevel()`.
0 is a special value for the default level of the c library.
The level will be clamped by `level_bounds()`.

Advanced compression parameters.

- `windowLog::Int32= Int32(0)`: Maximum allowed back-reference distance, expressed as power of 2.

This will set a memory budget for streaming decompression,
with larger values requiring more memory
and typically compressing more.
Must be clamped between `windowLog_bounds()[1]` and `windowLog_bounds()[2]` inclusive.
Special: value 0 means "use default windowLog".
Note: Using a windowLog greater than $(ZSTD_WINDOWLOG_LIMIT_DEFAULT)
requires explicitly allowing such size at streaming decompression stage.
"""
function ZstdCompressor(;level::Integer=DEFAULT_COMPRESSION_LEVEL)
ZstdCompressor(CStream(), clamp(level, LibZstd.ZSTD_minCLevel(), LibZstd.ZSTD_maxCLevel()))
function ZstdCompressor(;
level::Integer=DEFAULT_COMPRESSION_LEVEL,
windowLog::Int32=Int32(0),
)
windowLog_range = (:)(windowLog_bounds()...)
if !iszero(windowLog) && windowLog ∉ windowLog_range
# Since this has to be matched on the decompression side, throw instead of clamping.
throw(ArgumentError("windowLog ∈ $(windowLog_range) must hold. Got\nwindowLog => $(windowLog)"))
end
ZstdCompressor(
CStream(),
clamp(level, level_bounds()...),
windowLog,
LibZstd.ZSTD_e_continue,
)
end
ZstdCompressor(cstream, level) = ZstdCompressor(cstream, level, :continue)

Expand All @@ -54,10 +110,15 @@ Arguments
which extend the range of speed vs. ratio preferences.
The lower the level, the faster the speed (at the cost of compression).
0 is a special value for `ZSTD_defaultCLevel()`.
The level will be clamped to the range `ZSTD_minCLevel()` to `ZSTD_maxCLevel()`.
The level will be clamped by `level_bounds()`.
"""
function ZstdFrameCompressor(;level::Integer=DEFAULT_COMPRESSION_LEVEL)
ZstdCompressor(CStream(), clamp(level, LibZstd.ZSTD_minCLevel(), LibZstd.ZSTD_maxCLevel()), :end)
ZstdCompressor(
CStream(),
clamp(level, level_bounds()...),
Int32(0),
LibZstd.ZSTD_e_end,
)
end
# pretend that ZstdFrameCompressor is a compressor type
function TranscodingStreams.transcode(C::typeof(ZstdFrameCompressor), args...)
Expand All @@ -78,7 +139,7 @@ const ZstdCompressorStream{S} = TranscodingStream{ZstdCompressor,S} where S<:IO
Create a new zstd compression stream (see `ZstdCompressor` for `kwargs`).
"""
function ZstdCompressorStream(stream::IO; kwargs...)
x, y = splitkwargs(kwargs, (:level,))
x, y = splitkwargs(kwargs, (:level, :windowLog))
return TranscodingStream(ZstdCompressor(;x...), stream; y...)
end

Expand All @@ -105,12 +166,20 @@ function TranscodingStreams.startproc(codec::ZstdCompressor, mode::Symbol, err::
throw(OutOfMemoryError())
end
ret = LibZstd.ZSTD_CCtx_setParameter(codec.cstream, LibZstd.ZSTD_c_compressionLevel, clamp(codec.level, Cint))
# TODO Allow setting other parameters here.
if iserror(ret)
# This is unreachable according to zstd.h
err[] = ErrorException("zstd initialization error")
err[] = ErrorException("zstd error setting compressionLevel")
return :error
end
if !iszero(codec.windowLog)
ret = LibZstd.ZSTD_CCtx_setParameter(codec.cstream, LibZstd.ZSTD_c_windowLog, Cint(codec.windowLog))
if iserror(ret)
# This should be unreachable because windowLog is checked in the constructor.
err[] = ErrorException("zstd error setting windowLog to $(codec.windowLog)")
return :error
end
end
# TODO Allow setting other parameters here.
end
code = reset!(codec.cstream, 0 #=unknown source size=#)
if iserror(code)
Expand Down
61 changes: 55 additions & 6 deletions src/decompression.jl
Original file line number Diff line number Diff line change
@@ -1,21 +1,56 @@
# Decompressor Codec
# ==================

"""
windowLogMax_bounds() -> min::Int32, max::Int32

Return the minimum and maximum windowLogMax available.
"""
function windowLogMax_bounds()
bounds = LibZstd.ZSTD_dParam_getBounds(LibZstd.ZSTD_d_windowLogMax)
@assert !iserror(bounds.error)
Int32(bounds.lowerBound), Int32(bounds.upperBound)
end

struct ZstdDecompressor <: TranscodingStreams.Codec
dstream::DStream
windowLogMax::Int32
end

function Base.show(io::IO, codec::ZstdDecompressor)
print(io, summary(codec), "()")
print(io, summary(codec), "(")
if codec.windowLogMax != Int32(0)
print(io, "windowLogMax=Int32($(codec.windowLogMax))")
end
print(io, ")")
end

"""
ZstdDecompressor()

Create a new zstd decompression codec.

Arguments
---------

Advanced decompression parameters.

- `windowLogMax::Int32= Int32(0)`: Select a size limit (in power of 2) beyond which
the streaming API will refuse to allocate memory buffer
in order to protect the host from unreasonable memory requirements.
This parameter is only useful in streaming mode, since no internal buffer is allocated in single-pass mode.
By default, a decompression context accepts window sizes <= (1 << ZSTD_WINDOWLOG_LIMIT_DEFAULT).
Must be clamped between `windowLogMax_bounds()[1]` and `windowLogMax_bounds()[2]` inclusive.
Special: value 0 means "use default maximum windowLog".
"""
function ZstdDecompressor()
return ZstdDecompressor(DStream())
function ZstdDecompressor(;
windowLogMax::Int32=Int32(0),
)
windowLogMax_range = (:)(windowLogMax_bounds()...)
if !iszero(windowLogMax) && windowLogMax ∉ windowLogMax_range
throw(ArgumentError("windowLogMax ∈ $(windowLogMax_range) must hold. Got\nwindowLogMax => $(windowLogMax)"))
end
return ZstdDecompressor(DStream(), windowLogMax)
end

const ZstdDecompressorStream{S} = TranscodingStream{ZstdDecompressor,S} where S<:IO
Expand All @@ -26,7 +61,8 @@ const ZstdDecompressorStream{S} = TranscodingStream{ZstdDecompressor,S} where S<
Create a new zstd decompression stream (`kwargs` are passed to `TranscodingStream`).
"""
function ZstdDecompressorStream(stream::IO; kwargs...)
return TranscodingStream(ZstdDecompressor(), stream; kwargs...)
x, y = splitkwargs(kwargs, (:windowLogMax,))
return TranscodingStream(ZstdDecompressor(;x...), stream; y...)
end


Expand All @@ -49,7 +85,14 @@ function TranscodingStreams.startproc(codec::ZstdDecompressor, mode::Symbol, err
if codec.dstream.ptr == C_NULL
throw(OutOfMemoryError())
end
# TODO Allow setting other parameters here.
if !iszero(codec.windowLogMax)
ret = LibZstd.ZSTD_DCtx_setParameter(codec.dstream, LibZstd.ZSTD_d_windowLogMax, Cint(codec.windowLogMax))
if iserror(ret)
# This should be unreachable because windowLogMax is checked in the constructor.
err[] = ErrorException("zstd error setting windowLogMax")
return :error
end
end
end
code = reset!(codec.dstream)
if iserror(code)
Expand Down Expand Up @@ -77,7 +120,13 @@ function TranscodingStreams.process(codec::ZstdDecompressor, input::Memory, outp
if error_code(code) == Integer(LibZstd.ZSTD_error_memory_allocation)
throw(OutOfMemoryError())
end
err[] = ErrorException("zstd decompression error: " * error_name(code))
err[] = if error_code(code) == Integer(LibZstd.ZSTD_error_frameParameter_windowTooLarge)
ErrorException("zstd decompression error: Window size larger than maximum.\nHint: try increasing `windowLogMax` when constructing the `ZstdDecompressor`")
# TODO It is possible to find the requested window size by parsing the frame header.
# This could be used to get a better error message.
else
ErrorException("zstd decompression error: " * error_name(code))
end
return Δin, Δout, :error
else
if code == 0
Expand Down
78 changes: 78 additions & 0 deletions test/runtests.jl
Original file line number Diff line number Diff line change
Expand Up @@ -226,4 +226,82 @@ include("utils.jl")
end
end
end

@testset "windowLog" begin
# out of bounds values
cbounds = CodecZstd.windowLog_bounds()
dbounds = CodecZstd.windowLogMax_bounds()
@test_throws ArgumentError ZstdCompressor(;windowLog=cbounds[2]+Int32(1))
@test_throws ArgumentError ZstdCompressor(;windowLog=cbounds[1]-Int32(1))
@test_throws ArgumentError ZstdDecompressor(;windowLogMax=dbounds[2]+Int32(1))
@test_throws ArgumentError ZstdDecompressor(;windowLogMax=dbounds[1]-Int32(1))

codec = ZstdCompressor(;level=10, windowLog=cbounds[1])
@test codec isa ZstdCompressor
@test sprint(show, codec) == "ZstdCompressor(level=10, windowLog=Int32($(cbounds[1])))"

codec = ZstdDecompressor(;windowLogMax=dbounds[1])
@test codec isa ZstdDecompressor
@test sprint(show, codec) == "ZstdDecompressor(windowLogMax=Int32($(dbounds[1])))"

@test CodecZstd.ZSTD_WINDOWLOG_LIMIT_DEFAULT ∈ (:)(dbounds...)
@test CodecZstd.ZSTD_WINDOWLOG_LIMIT_DEFAULT ∈ (:)(cbounds...)

windowLogs = Int32[
cbounds[1],
Int32(0),
CodecZstd.ZSTD_WINDOWLOG_LIMIT_DEFAULT-1,
CodecZstd.ZSTD_WINDOWLOG_LIMIT_DEFAULT,
CodecZstd.ZSTD_WINDOWLOG_LIMIT_DEFAULT+1,
cbounds[2]
]
windowLogMaxs = Int32[
dbounds[1],
Int32(0),
CodecZstd.ZSTD_WINDOWLOG_LIMIT_DEFAULT-1,
CodecZstd.ZSTD_WINDOWLOG_LIMIT_DEFAULT,
CodecZstd.ZSTD_WINDOWLOG_LIMIT_DEFAULT+1,
dbounds[2]
]
# 32 bit systems don't have enough memory to test upper bound windowLog
if Sys.WORD_SIZE == 32
pop!(windowLogs)
pop!(windowLogMaxs)
end

for windowLog in windowLogs
for windowLogMax in windowLogMaxs
uncompressed = rand(UInt8, 3000)
sink = IOBuffer()
# level 22 is needed to get compression to use the full
# ZSTD_WINDOWLOG_LIMIT_DEFAULT when windowLog is 0
compressor = TranscodingStream(ZstdCompressor(;level = 22, windowLog), sink; stop_on_end=true)
write(compressor, uncompressed)
close(compressor)
compressed = take!(sink)

decompressor = TranscodingStream(ZstdDecompressor(;windowLogMax), IOBuffer(compressed))

actual_windowLog = if iszero(windowLog)
CodecZstd.ZSTD_WINDOWLOG_LIMIT_DEFAULT
else
windowLog
end
actual_windowLogMax = if iszero(windowLogMax)
CodecZstd.ZSTD_WINDOWLOG_LIMIT_DEFAULT
else
windowLogMax
end
if actual_windowLogMax ≥ actual_windowLog
@test read(decompressor) == uncompressed
else
@test_throws(
ErrorException("zstd decompression error: Window size larger than maximum.\nHint: try increasing `windowLogMax` when constructing the `ZstdDecompressor`"),
read(decompressor),
)
end
close(decompressor)
end
end
end
end
Loading