Skip to content

Commit

Permalink
Improve tooling and refactor (#12)
Browse files Browse the repository at this point in the history
- Use multiple binaries to test different models & options
- Add helper functions and macros
  • Loading branch information
mitiko authored Mar 3, 2024
2 parents 9d6eb44 + fc32216 commit f985f42
Show file tree
Hide file tree
Showing 15 changed files with 454 additions and 133 deletions.
12 changes: 12 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@ license = "GPL-3.0"
keywords = ["compressor", "context_mixing"]
categories = ["compression"]

default-run = "weath3rb0i"

[dependencies]
debug_unreachable = "0.1"

Expand All @@ -25,3 +27,13 @@ panic = "abort"
inherits = "release"
debug = true
strip = "none"

[features]
default = []
unsafe_conversions = []

[[bin]]
name = "order0"

[[bin]]
name = "ac-over-huffman"
15 changes: 15 additions & 0 deletions run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#!/bin/bash

PS3="Select binary to run: "
binaries=(
"order0"
"ac-over-huffman"
)

# TODO: if arg == 1, select the first choice
# TODO: pass release mode

select binaryName in "${binaries[@]}"; do
cargo run --bin $binaryName
break
done
61 changes: 61 additions & 0 deletions src/bin/ac-over-huffman/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
use std::{io::Result, time::Instant};

use weath3rb0i::{
entropy_coding::{
arithmetic_coder::ArithmeticCoder,
package_merge::{canonical, package_merge},
},
helpers::{histogram, ACStats},
models::{Model, Order0},
u64, u8,
};

fn main() -> Result<()> {
let buf = std::fs::read("/Users/mitiko/_data/book1")?;

let mut best = u64!(buf.len());
let mut params = 0;
for huffman_size in 7..16 {
let res = exec(&buf, huffman_size)?;
if res < best {
params = huffman_size;
best = res;
}
}
println!("best: {best} for [hsize: {params}]"); // TODO: color

Ok(())
}

fn exec(buf: &[u8], huffman_size: u8) -> Result<u64> {
let timer = Instant::now();
let mut ac = ArithmeticCoder::new_coder();
let mut model = Order0::new();
let mut writer = ACStats::new(); // TODO: order n?

let counts = histogram(&buf);
let code_lens = package_merge(&counts, huffman_size);
let huffman = canonical(&code_lens);

for &byte in buf {
let (code, len) = huffman[usize::from(byte)];
for i in (0..len).rev() {
let p = model.predict();
let bit = u8!((code >> i) & 1);
model.update(bit);
ac.encode(bit, p, &mut writer)?;
}
}
ac.flush(&mut writer)?;

println!(
"[ac-over-huffman] [hsize: {:02} b{:02}] csize: {} (ratio: {:.3}), ctime: {:?}",
huffman_size,
8, // bits in context
writer.result(),
writer.result() as f64 / buf.len() as f64,
timer.elapsed()
);

Ok(writer.result())
}
35 changes: 35 additions & 0 deletions src/bin/order0/main.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
use std::{io::Result, time::Instant};

use weath3rb0i::{
entropy_coding::arithmetic_coder::ArithmeticCoder,
helpers::ACStats,
models::{Model, Order0},
unroll_for,
};

fn main() -> Result<()> {
let buf = std::fs::read("/Users/mitiko/_data/book1")?;

let timer = Instant::now();
let mut ac = ArithmeticCoder::new_coder();
let mut model = Order0::new();
let mut writer = ACStats::new();

for byte in &buf {
unroll_for!(bit in byte, {
let p = model.predict();
model.update(bit);
ac.encode(bit, p, &mut writer)?;
});
}
ac.flush(&mut writer)?;

println!(
"[order0] csize: {} (ratio: {:.3}), ctime: {:?}",
writer.result(),
writer.result() as f64 / buf.len() as f64,
timer.elapsed()
);

Ok(())
}
119 changes: 119 additions & 0 deletions src/entropy_coding/arithmetic_coder.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
use crate::u32;
use std::{io, marker::PhantomData};

const PREC_SHIFT: u32 = u32::BITS - 1; // 31
const Q1: u32 = 1 << (PREC_SHIFT - 1); // 0x40000000, 1 = 0b01, quarter 1
const Q2: u32 = 2 << (PREC_SHIFT - 1); // 0x80000000, 2 = 0b10, range middle
const Q3: u32 = 3 << (PREC_SHIFT - 1); // 0xC0000000, 3 = 0b11, quarter 3
const RLO_MOD: u32 = (1 << PREC_SHIFT) - 1; // 0x7FFFFFFF, range low modify
const RHI_MOD: u32 = (1 << PREC_SHIFT) + 1; // 0x80000001, range high modify

/// The `ArithmeticCoder` encodes/decodes bits given a probability
#[derive(Clone)]
pub struct ArithmeticCoder<T> {
x1: u32, // low
x2: u32, // high
x: u32, // state
_marker: PhantomData<T>, // use for io
}

pub trait ACRead {
/// Read bit or 0 on EOF
fn read_bit(&mut self) -> io::Result<u8>;
/// Read 4 bytes BE as u32 and pad with 0s on EOF
fn read_u32(&mut self) -> io::Result<u32>;
}

pub trait ACWrite {
/// Increases the number of reverse bits to write
fn inc_parity(&mut self);
/// Writes a bit and maintains E3 mapping logic
fn write_bit(&mut self, bit: impl TryInto<u8>) -> io::Result<()>;
/// Flushes leftover parity bits and internal writer
fn flush(&mut self, padding: u32) -> io::Result<()>;
}

impl<W: ACWrite> ArithmeticCoder<W> {
pub fn new_coder() -> Self {
Self { x1: 0, x2: u32::MAX, x: 0, _marker: PhantomData }
}

pub fn encode(&mut self, bit: u8, prob: u16, io: &mut W) -> io::Result<()> {
let xmid = lerp(self.x1, self.x2, prob);

// Update range (kinda like binary search)
match bit {
0 => self.x1 = xmid + 1,
_ => self.x2 = xmid,
}

// Renormalize range -> write matching bits to stream
while ((self.x1 ^ self.x2) >> PREC_SHIFT) == 0 {
io.write_bit(self.x1 >> PREC_SHIFT)?;
self.x1 <<= 1;
self.x2 = (self.x2 << 1) | 1;
}

// E3 renorm (special case) -> increase parity
while self.x1 >= Q1 && self.x2 < Q3 {
io.inc_parity();
self.x1 = (self.x1 << 1) & RLO_MOD;
self.x2 = (self.x2 << 1) | RHI_MOD;
}

Ok(())
}

pub fn flush(&mut self, io: &mut W) -> io::Result<()> {
// assert state is normalized
debug_assert!(self.x1 >> PREC_SHIFT == 0 && self.x2 >> PREC_SHIFT == 1);
io.flush(self.x2)
}
}

impl<R: ACRead> ArithmeticCoder<R> {
pub fn new_decoder(reader: &mut R) -> io::Result<Self> {
let x = reader.read_u32()?;
Ok(Self { x1: 0, x2: u32::MAX, x, _marker: PhantomData })
}

pub fn decode(&mut self, prob: u16, io: &mut R) -> io::Result<u8> {
let xmid = lerp(self.x1, self.x2, prob);
let bit = (self.x <= xmid).into();

// Update range (kinda like binary search)
match bit {
0 => self.x1 = xmid + 1,
_ => self.x2 = xmid,
}

// Renormalize range -> read new bits from stream
while ((self.x1 ^ self.x2) >> PREC_SHIFT) == 0 {
self.x1 <<= 1;
self.x2 = (self.x2 << 1) | 1;
self.x = (self.x << 1) | u32::from(io.read_bit()?);
}

// E3 renorm (special case) -> fix parity
while self.x1 >= Q1 && self.x2 < Q3 {
self.x1 = (self.x1 << 1) & RLO_MOD;
self.x2 = (self.x2 << 1) | RHI_MOD;
self.x = ((self.x << 1) ^ Q2) | u32::from(io.read_bit()?);
}

Ok(bit)
}
}

#[inline(always)]
fn lerp(x1: u32, x2: u32, prob: u16) -> u32 {
// make prob 32-bit & always leave chance
let p = if prob == 0 { 1 } else { u64::from(prob) << 16 };
let range = u64::from(x2 - x1);
let lerped_range = (range * p) >> 32;

// no overflows/underflows, as both range < 2^32 and p < 2^32
let xmid = x1 + u32!(lerped_range);
debug_assert!(xmid >= x1 && xmid < x2);
xmid
}
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
use std::{fs::File, io::BufWriter};

use super::{
ac_io::{ACReader, ACWriter},
ACRead, ACWrite, ArithmeticCoder,
arithmetic_coder::*,
io::{ACReader, ACWriter},
};

fn compress(filename: &str, input: &[u8], probabilities: &[u16]) -> Vec<u8> {
Expand Down
3 changes: 2 additions & 1 deletion src/entropy_coding/ac_io.rs → src/entropy_coding/io.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
use super::{ACRead, ACWrite};
use core::slice::from_mut as into_slice;
use std::io::{self, ErrorKind, Read, Write};

use super::arithmetic_coder::{ACRead, ACWrite};

/// Arithmetic coder read io for `io::Read` types
pub struct ACReader<R> {
inner: R,
Expand Down
Loading

0 comments on commit f985f42

Please sign in to comment.