Add huffman encoding and decoding for PIZ compression - exr-zig

commit 3457726e51ce107e929a9751d327dd8dfc6fa2e4
parent bb5f6ce51ca52e9043febcc9cd464a44475b8125
Author: Christian Ermann <christianermann@gmail.com>
Date:   Sat, 18 Jan 2025 18:01:47 -0800

Add huffman encoding and decoding for PIZ compression

Diffstat:
A src/piz/huffman.zig  | 861 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M src/root.zig  | 3 ++-

2 files changed, 863 insertions(+), 1 deletion(-)
diff --git a/src/piz/huffman.zig b/src/piz/huffman.zig
@@ -0,0 +1,861 @@
+//! This modules provides function for huffman encoding and decoding, specific
+//! to the PIZ compression format.
+
+const std = @import("std");
+const assert = std.debug.assert;
+const expect = std.testing.expect;
+
+const encoding_bits = 16;
+const decoding_bits = 14;
+
+const encoding_table_size = (1 << encoding_bits) + 1;
+const decoding_table_size = 1 << decoding_bits;
+const decoding_mask = decoding_table_size - 1;
+
+const Encoding = packed struct {
+    val: u58,
+    len: u6,
+};
+
+const DecodingType = enum { empty, short, long };
+const Decoding = union(DecodingType) {
+    empty: void,
+    short: ShortDecoding,
+    long: std.ArrayList(u32),
+};
+
+const ShortDecoding = packed struct {
+    val: u16,
+    len: u6,
+    _: u10 = 0,
+};
+
+fn countFrequencies(data: []const u16, allocator: std.mem.Allocator) ![]u64 {
+    const frequencies = try allocator.alloc(u64, encoding_table_size);
+    @memset(frequencies, 0);
+    for (data) |value| {
+        frequencies[value] += 1;
+    }
+    return frequencies;
+}
+
+test "count frequencies" {
+    const allocator = std.testing.allocator;
+    const data = [_]u16{ 3, 3, 3, 10, 5, 5, 1, 1, 1, 1 };
+    const frequencies = try countFrequencies(&data, allocator);
+    defer allocator.free(frequencies);
+    try expect(frequencies[1] == 4);
+    try expect(frequencies[3] == 3);
+    try expect(frequencies[5] == 2);
+    try expect(frequencies[10] == 1);
+}
+
+const Frequency = struct {
+    frequency: u64,
+    index: u32,
+
+    fn compare(_: void, a: Frequency, b: Frequency) std.math.Order {
+        const freq_order = std.math.order(a.frequency, b.frequency);
+        switch (freq_order) {
+            .eq => return std.math.order(a.index, b.index),
+            else => return freq_order,
+        }
+    }
+};
+const FrequencyHeap = std.PriorityQueue(Frequency, void, Frequency.compare);
+
+const EncodingTableResult = struct {
+    encoding_table: []Encoding,
+    min_code_idx: u32,
+    max_code_idx: u32,
+};
+
+fn buildEncodingTable(
+    frequencies: []const u64,
+    allocator: std.mem.Allocator,
+) !EncodingTableResult {
+    assert(frequencies.len == encoding_table_size);
+    // Initialize a heap of frequencies
+    var heap = FrequencyHeap.init(allocator, {});
+    defer heap.deinit();
+    const link = try allocator.alloc(u32, encoding_table_size);
+    defer allocator.free(link);
+    var min_non_zero: u32 = encoding_table_size - 1;
+    for (0..encoding_table_size) |i| {
+        if (frequencies[i] == 0) continue;
+        min_non_zero = @intCast(i);
+        break;
+    }
+    var max_non_zero: u32 = 0;
+    for (min_non_zero..encoding_table_size) |i| {
+        link[i] = @intCast(i);
+        if (frequencies[i] == 0) continue;
+        try heap.add(.{ .frequency = frequencies[i], .index = @intCast(i) });
+        max_non_zero = @intCast(i);
+    }
+    // Add a pseudo-symbol which will indicate run-length encoding
+    try heap.add(.{ .frequency = 1, .index = max_non_zero + 1 });
+    max_non_zero += 1;
+    // Compute code lengths for each symbol
+    const encoding_table = try allocator.alloc(Encoding, encoding_table_size);
+    @memset(encoding_table, .{ .val = 0, .len = 0 });
+    while (heap.count() > 1) {
+        const lowest = heap.remove();
+        const second_lowest = heap.remove();
+        try heap.add(.{
+            .frequency = lowest.frequency + second_lowest.frequency,
+            .index = second_lowest.index,
+        });
+        var index = second_lowest.index;
+        while (true) : (index = link[index]) {
+            encoding_table[index].len += 1;
+            assert(encoding_table[index].len < 59);
+            if (index == link[index]) break;
+        }
+        link[index] = lowest.index;
+        index = lowest.index;
+        while (true) : (index = link[index]) {
+            encoding_table[index].len += 1;
+            assert(encoding_table[index].len < 59);
+            if (index == link[index]) break;
+        }
+    }
+    return .{
+        .encoding_table = encoding_table,
+        .min_code_idx = min_non_zero,
+        .max_code_idx = max_non_zero,
+    };
+}
+
+test "encoding lengths" {
+    const allocator = std.testing.allocator;
+    const data = [_]u16{
+        1, 1, 1, 2, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5,
+        5, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    };
+    const frequencies = try countFrequencies(&data, allocator);
+    defer allocator.free(frequencies);
+    const result = try buildEncodingTable(frequencies, allocator);
+    defer allocator.free(result.encoding_table);
+    const encoding_table = result.encoding_table;
+    const min_code_idx = result.min_code_idx;
+    const max_code_idx = result.max_code_idx;
+    // Only the symbols in our data should have code lengths.
+    for (encoding_table[0..min_code_idx]) |encoding| {
+        try expect(encoding.len == 0);
+    }
+    for (encoding_table[min_code_idx .. max_code_idx + 1]) |encoding| {
+        try expect(encoding.len > 0);
+    }
+    for (encoding_table[max_code_idx + 1 ..]) |encoding| {
+        try expect(encoding.len == 0);
+    }
+    // More frequent symbols shouldn't have longer codes.
+    for (min_code_idx..max_code_idx + 1) |i| {
+        for (min_code_idx..max_code_idx + 1) |j| {
+            if (frequencies[i] <= frequencies[j]) continue;
+            try expect(encoding_table[i].len <= encoding_table[j].len);
+        }
+    }
+}
+
+fn buildCanonicalEncodings(encoding_table: []Encoding) void {
+    // This algorithm originates here: http://www.compressconsult.com/huffman/#huffman
+    // How many codes of each length are there?
+    var num_codes_per_length: [59]u32 = undefined;
+    @memset(&num_codes_per_length, 0);
+    for (encoding_table) |encoding| {
+        num_codes_per_length[encoding.len] += 1;
+    }
+    // What is the lowest code for each length?
+    var code: u58 = 0;
+    var codes_per_length: [59]u58 = undefined;
+    for (0..59) |i| {
+        const j = 59 - (i + 1);
+        codes_per_length[j] = code;
+        code = (code + num_codes_per_length[j]) >> 1;
+    }
+    std.debug.assert(codes_per_length[0] == 1);
+    // Assign lowest code, then increment.
+    for (encoding_table) |*encoding| {
+        if (encoding.len == 0) continue;
+        encoding.val = codes_per_length[encoding.len];
+        codes_per_length[encoding.len] += 1;
+        std.debug.assert(encoding.val >> encoding.len == 0);
+    }
+}
+
+test "canonical encodings" {
+    const allocator = std.testing.allocator;
+    const data = [_]u16{
+        1, 1, 1, 2, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5,
+        5, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    };
+    const frequencies = try countFrequencies(&data, allocator);
+    defer allocator.free(frequencies);
+    const result = try buildEncodingTable(frequencies, allocator);
+    defer allocator.free(result.encoding_table);
+    const encoding_table = result.encoding_table;
+    const min_code_idx = result.min_code_idx;
+    const max_code_idx = result.max_code_idx;
+    buildCanonicalEncodings(encoding_table);
+    // Only the symbols in our data should have code lengths and values.
+    // Code values should match the specified length.
+    for (encoding_table[0..min_code_idx]) |encoding| {
+        try expect(encoding.val == 0);
+        try expect(encoding.len == 0);
+    }
+    for (encoding_table[min_code_idx .. max_code_idx + 1]) |encoding| {
+        try expect(encoding.val >= 0);
+        try expect(encoding.len > 0);
+        try expect(encoding.val >> encoding.len == 0);
+    }
+    for (encoding_table[max_code_idx + 1 ..]) |encoding| {
+        try expect(encoding.val == 0);
+        try expect(encoding.len == 0);
+    }
+}
+
+fn encode(
+    data: []const u16,
+    encoding_table: []const Encoding,
+    rle_symbol: u32,
+    writer: anytype,
+) !u32 {
+    var num_bits: u32 = 0;
+    var bit_writer = std.io.bitWriter(std.builtin.Endian.big, writer);
+    // Encode and write all symbols.
+    const rle_encoding = encoding_table[rle_symbol];
+    var run_length: u8 = 0;
+    var run_symbol = data[0];
+    for (data[1..]) |x| {
+        if (x == run_symbol and run_length < 255) {
+            run_length += 1;
+        } else {
+            const run_encoding = encoding_table[run_symbol];
+            num_bits += try writeCode(
+                run_encoding,
+                rle_encoding,
+                run_length,
+                &bit_writer,
+            );
+            run_length = 0;
+        }
+        run_symbol = x;
+    }
+    const run_encoding = encoding_table[run_symbol];
+    num_bits += try writeCode(
+        run_encoding,
+        rle_encoding,
+        run_length,
+        &bit_writer,
+    );
+    try bit_writer.flushBits();
+    return num_bits;
+}
+
+fn writeCode(
+    encoding: Encoding,
+    rle_encoding: Encoding,
+    run_length: u8,
+    bit_writer: anytype,
+) !u32 {
+    var num_bits: u32 = 0;
+    // Output `run_length` instances of `encoding`. This is either stored as
+    // `encoding, rle_encoding, run_length` or `[encoding] * run_length`,
+    // whichever is shortest.
+    const rle_length = encoding.len + rle_encoding.len + 8;
+    const rep_length = encoding.len * run_length;
+    if (rle_length < rep_length) {
+        try bit_writer.writeBits(encoding.val, encoding.len);
+        try bit_writer.writeBits(rle_encoding.val, rle_encoding.len);
+        try bit_writer.writeBits(run_length, 8);
+        num_bits += encoding.len + rle_encoding.len + 8;
+    } else {
+        for (0..run_length + 1) |_| {
+            try bit_writer.writeBits(encoding.val, encoding.len);
+            num_bits += encoding.len;
+        }
+    }
+    return num_bits;
+}
+
+test "encode" {
+    // Adapted from tests in the 'exrs' project: https://github.com/johannesvollmer/exrs
+    const allocator = std.testing.allocator;
+    const uncompressed = [_]u16{
+        3852,  2432,  33635, 49381, 10100, 15095, 62693, 63738, 62359, 5013,
+        7715,  59875, 28182, 34449, 19983, 20399, 63407, 29486, 4877,  26738,
+        44815, 14042, 46091, 48228, 25682, 35412, 7582,  65069, 6632,  54124,
+        13798, 27503, 52154, 61961, 30474, 46880, 39097, 15754, 52897, 42371,
+        54053, 14178, 48276, 34591, 42602, 32126, 42062, 31474, 16274, 55991,
+        2882,  17039, 56389, 20835, 57057, 54081, 3414,  33957, 52584, 10222,
+        25139, 40002, 44980, 1602,  48021, 19703, 6562,  61777, 41582, 201,
+        31253, 51790, 15888, 40921, 3627,  12184, 16036, 26349, 3159,  29002,
+        14535, 50632, 18118, 33583, 18878, 59470, 32835, 9347,  16991, 21303,
+        26263, 8312,  14017, 41777, 43240, 3500,  60250, 52437, 45715, 61520,
+    };
+    const compressed = [_]u8{
+        0x10, 0x9,  0xb4, 0xe4, 0x4c, 0xf7, 0xef, 0x42, 0x87, 0x6a, 0xb5, 0xc2,
+        0x34, 0x9e, 0x2f, 0x12, 0xae, 0x21, 0x68, 0xf2, 0xa8, 0x74, 0x37, 0xe1,
+        0x98, 0x14, 0x59, 0x57, 0x2c, 0x24, 0x3b, 0x35, 0x6c, 0x1b, 0x8b, 0xcc,
+        0xe6, 0x13, 0x38, 0xc,  0x8e, 0xe2, 0xc,  0xfe, 0x49, 0x73, 0xbc, 0x2b,
+        0x7b, 0x9,  0x27, 0x79, 0x14, 0xc,  0x94, 0x42, 0xf8, 0x7c, 0x1,  0x8d,
+        0x26, 0xde, 0x87, 0x26, 0x71, 0x50, 0x45, 0xc6, 0x28, 0x40, 0xd5, 0xe,
+        0x8d, 0x8,  0x1e, 0x4c, 0xa4, 0x79, 0x57, 0xf0, 0xc3, 0x6d, 0x5c, 0x6d,
+        0xc0,
+    };
+    // Build encoding table for data.
+    const frequencies = try countFrequencies(&uncompressed, allocator);
+    defer allocator.free(frequencies);
+    const result = try buildEncodingTable(frequencies, allocator);
+    const encoding_table = result.encoding_table;
+    const max_code_idx = result.max_code_idx;
+    defer allocator.free(encoding_table);
+    buildCanonicalEncodings(encoding_table);
+    // Encode data into output buffer.
+    var encoded = std.ArrayList(u8).init(allocator);
+    defer encoded.deinit();
+    const writer = encoded.writer();
+    _ = try encode(&uncompressed, encoding_table, max_code_idx, writer);
+    try expect(std.mem.eql(u8, encoded.items, &compressed));
+}
+
+fn buildDecodingTable(
+    encoding_table: []const Encoding,
+    min_code_idx: u32,
+    max_code_idx: u32,
+    allocator: std.mem.Allocator,
+) ![]Decoding {
+    var decoding_table = try allocator.alloc(Decoding, decoding_table_size);
+    @memset(decoding_table, Decoding{ .empty = @as(void, {}) });
+    for (min_code_idx..max_code_idx + 1) |encoding_idx| {
+        const encoding = encoding_table[encoding_idx];
+        if (encoding.val >> encoding.len != 0) {
+            return error.InvalidEncodingTableEntry;
+        }
+        if (encoding.len > decoding_bits) {
+            // If symbol `x` is encoded as `XX111111111111` and symbol `y` is
+            // encoded as `YY111111111111`, then index `11111111111111` in the
+            // decoding table will contain the list `[x, y]`.
+            const shift = encoding.len - decoding_bits;
+            const decoding_idx = encoding.val >> shift;
+            const decoding = &decoding_table[decoding_idx];
+            switch (decoding.*) {
+                .empty => {
+                    decoding.* = Decoding{
+                        .long = std.ArrayList(u32).init(allocator),
+                    };
+                    try decoding.long.append(@intCast(encoding_idx));
+                },
+                .long => try decoding.long.append(@intCast(encoding_idx)),
+                .short => return error.InvalidEncodingTableEntry,
+            }
+        } else if (encoding.len > 0) {
+            // If symbol `x` is encoded as `XXX`, then all entries between
+            // `XXX00000000000` and `XXX11111111111` map to the value `x` in
+            // the decoding table.
+            const shift = decoding_bits - encoding.len;
+            const start: u64 = @as(u64, encoding.val) << shift;
+            const count: u64 = @as(u64, 1) << shift;
+            for (start..start + count) |i| {
+                decoding_table[i] = Decoding{ .short = .{
+                    .len = encoding.len,
+                    .val = @intCast(encoding_idx),
+                } };
+            }
+        }
+    }
+    return decoding_table;
+}
+
+test "build decoding table" {
+    // Adapted from tests in the 'exrs' project: https://github.com/johannesvollmer/exrs
+    const allocator = std.testing.allocator;
+    const uncompressed = [_]u16{
+        3852,  2432,  33635, 49381, 10100, 15095, 62693, 63738, 62359, 5013,
+        7715,  59875, 28182, 34449, 19983, 20399, 63407, 29486, 4877,  26738,
+        44815, 14042, 46091, 48228, 25682, 35412, 7582,  65069, 6632,  54124,
+        13798, 27503, 52154, 61961, 30474, 46880, 39097, 15754, 52897, 42371,
+        54053, 14178, 48276, 34591, 42602, 32126, 42062, 31474, 16274, 55991,
+        2882,  17039, 56389, 20835, 57057, 54081, 3414,  33957, 52584, 10222,
+        25139, 40002, 44980, 1602,  48021, 19703, 6562,  61777, 41582, 201,
+        31253, 51790, 15888, 40921, 3627,  12184, 16036, 26349, 3159,  29002,
+        14535, 50632, 18118, 33583, 18878, 59470, 32835, 9347,  16991, 21303,
+        26263, 8312,  14017, 41777, 43240, 3500,  60250, 52437, 45715, 61520,
+    };
+    // Build encoding and decoding tables.
+    const frequencies = try countFrequencies(&uncompressed, allocator);
+    defer allocator.free(frequencies);
+    const result = try buildEncodingTable(frequencies, allocator);
+    defer allocator.free(result.encoding_table);
+    const encoding_table = result.encoding_table;
+    const min_code_idx = result.min_code_idx;
+    const max_code_idx = result.max_code_idx;
+    buildCanonicalEncodings(encoding_table);
+    const decoding_table = try buildDecodingTable(
+        encoding_table,
+        min_code_idx,
+        max_code_idx,
+        allocator,
+    );
+    defer allocator.free(decoding_table);
+    // Doesn't test long codes, but at least we know short codes work.
+    for (uncompressed) |x| {
+        const encoding = encoding_table[x];
+        const decoding_idx = @as(u64, encoding.val) << (decoding_bits - encoding.len);
+        const decoding = decoding_table[decoding_idx];
+        try expect(decoding.short.val == x);
+    }
+}
+
+const BitBufferError = error{
+    BitOverflow,
+    BitUnderflow,
+};
+
+const BitBuffer = struct {
+    bits: u64 = 0,
+    num_bits: u6 = 0,
+
+    fn addBits(self: *@This(), bits: anytype, num_bits: u6) !void {
+        if (@as(u8, 63) - num_bits < self.num_bits) {
+            return error.BitOverflow;
+        }
+        self.num_bits += num_bits;
+        const mask = (@as(u64, 1) << num_bits) - 1;
+        self.bits = (self.bits << num_bits) | (bits & mask);
+    }
+
+    fn peekBits(self: *@This(), comptime T: type, num_bits: u6) !T {
+        if (num_bits > self.num_bits) {
+            return error.BitUnderflow;
+        }
+        const mask = (@as(u64, 1) << num_bits) - 1;
+        const val = (self.bits >> (self.num_bits - num_bits)) & mask;
+        return @as(T, @intCast(val));
+    }
+
+    fn readBits(self: *@This(), comptime T: type, num_bits: u6) !T {
+        const val = try self.peekBits(T, num_bits);
+        self.num_bits -= num_bits;
+        return val;
+    }
+
+    fn dumpBits(self: *@This(), num_bits: u6) !void {
+        if (num_bits > self.num_bits) {
+            return error.BitUnderflow;
+        }
+        self.bits >>= @intCast(num_bits);
+        self.num_bits -= @intCast(num_bits);
+    }
+};
+
+fn decode(
+    encoding_table: []const Encoding,
+    decoding_table: []const Decoding,
+    reader: anytype,
+    writer: anytype,
+    num_bits: u32,
+) !void {
+    // Initialize buffer with input data.
+    var bit_buffer = BitBuffer{};
+    while (bit_buffer.num_bits < decoding_bits) {
+        const byte = try reader.readByte();
+        try bit_buffer.addBits(byte, 8);
+    }
+    // Decode (most) input data. Some data may still be in the buffer after
+    // this loop.
+    while (bit_buffer.num_bits >= decoding_bits) {
+        const decoding_idx = try bit_buffer.peekBits(u16, decoding_bits);
+        const decoding = decoding_table[decoding_idx];
+        switch (decoding) {
+            .short => {
+                bit_buffer.num_bits -= decoding.short.len;
+                try writer.writeInt(
+                    u16,
+                    decoding.short.val,
+                    std.builtin.Endian.little,
+                );
+            },
+            .long => {
+                // For long codes, we have to search the list stored in the
+                // decoding table for a matching entry.
+                var found = false;
+                for (decoding.long.items) |encoding_idx| {
+                    const encoding = encoding_table[encoding_idx];
+                    const code = try bit_buffer.peekBits(u16, encoding.len);
+                    if (encoding.val == code) {
+                        bit_buffer.num_bits -= encoding.len;
+                        try writer.writeInt(
+                            u16,
+                            decoding.short.val,
+                            std.builtin.Endian.little,
+                        );
+                        found = true;
+                        break;
+                    }
+                }
+                if (!found) return error.InvalidCode;
+            },
+            .empty => return error.InvalidCode,
+        }
+        // Refill buffer if necessary.
+        while (bit_buffer.num_bits < decoding_bits) {
+            const byte = reader.readByte() catch |err| {
+                if (err != error.EndOfStream) return err;
+                break;
+            };
+            try bit_buffer.addBits(byte, 8);
+        }
+    }
+    std.debug.assert(bit_buffer.num_bits < decoding_bits);
+    // The compressed data isn't always byte-aligned at the end, so we need to
+    // discard the extra bits we read.
+    const extra_bits = (8 - @as(i32, @intCast(num_bits))) & 7;
+    try bit_buffer.dumpBits(@intCast(extra_bits));
+    // Decode any data still in the buffer.
+    while (bit_buffer.num_bits > 0) {
+        const shift = decoding_bits - bit_buffer.num_bits;
+        const decoding_idx = (bit_buffer.bits << shift) & decoding_mask;
+        const decoding = decoding_table[decoding_idx];
+        switch (decoding) {
+            .short => {
+                if (decoding.short.len > bit_buffer.num_bits) {
+                    break;
+                }
+                bit_buffer.num_bits -= decoding.short.len;
+                try writer.writeInt(
+                    u16,
+                    decoding.short.val,
+                    std.builtin.Endian.little,
+                );
+            },
+            else => return error.InvalidCode,
+        }
+    }
+}
+
+test "decode" {
+    // Adapted from tests in the 'exrs' project: https://github.com/johannesvollmer/exrs
+    const allocator = std.testing.allocator;
+    const uncompressed = [_]u16{
+        3852,  2432,  33635, 49381, 10100, 15095, 62693, 63738, 62359, 5013,
+        7715,  59875, 28182, 34449, 19983, 20399, 63407, 29486, 4877,  26738,
+        44815, 14042, 46091, 48228, 25682, 35412, 7582,  65069, 6632,  54124,
+        13798, 27503, 52154, 61961, 30474, 46880, 39097, 15754, 52897, 42371,
+        54053, 14178, 48276, 34591, 42602, 32126, 42062, 31474, 16274, 55991,
+        2882,  17039, 56389, 20835, 57057, 54081, 3414,  33957, 52584, 10222,
+        25139, 40002, 44980, 1602,  48021, 19703, 6562,  61777, 41582, 201,
+        31253, 51790, 15888, 40921, 3627,  12184, 16036, 26349, 3159,  29002,
+        14535, 50632, 18118, 33583, 18878, 59470, 32835, 9347,  16991, 21303,
+        26263, 8312,  14017, 41777, 43240, 3500,  60250, 52437, 45715, 61520,
+    };
+    const compressed = [_]u8{
+        0x10, 0x9,  0xb4, 0xe4, 0x4c, 0xf7, 0xef, 0x42, 0x87, 0x6a, 0xb5, 0xc2,
+        0x34, 0x9e, 0x2f, 0x12, 0xae, 0x21, 0x68, 0xf2, 0xa8, 0x74, 0x37, 0xe1,
+        0x98, 0x14, 0x59, 0x57, 0x2c, 0x24, 0x3b, 0x35, 0x6c, 0x1b, 0x8b, 0xcc,
+        0xe6, 0x13, 0x38, 0xc,  0x8e, 0xe2, 0xc,  0xfe, 0x49, 0x73, 0xbc, 0x2b,
+        0x7b, 0x9,  0x27, 0x79, 0x14, 0xc,  0x94, 0x42, 0xf8, 0x7c, 0x1,  0x8d,
+        0x26, 0xde, 0x87, 0x26, 0x71, 0x50, 0x45, 0xc6, 0x28, 0x40, 0xd5, 0xe,
+        0x8d, 0x8,  0x1e, 0x4c, 0xa4, 0x79, 0x57, 0xf0, 0xc3, 0x6d, 0x5c, 0x6d,
+        0xc0,
+    };
+    const num_bits = 674;
+    // Build encoding and decoding tables.
+    const frequencies = try countFrequencies(&uncompressed, allocator);
+    defer allocator.free(frequencies);
+    const result = try buildEncodingTable(frequencies, allocator);
+    defer allocator.free(result.encoding_table);
+    const encoding_table = result.encoding_table;
+    const min_code_idx = result.min_code_idx;
+    const max_code_idx = result.max_code_idx;
+    buildCanonicalEncodings(encoding_table);
+    const decoding_table = try buildDecodingTable(
+        encoding_table,
+        min_code_idx,
+        max_code_idx,
+        allocator,
+    );
+    defer allocator.free(decoding_table);
+    // Decode compressed data
+    var stream = std.io.fixedBufferStream(&compressed);
+    var reader = stream.reader();
+    var decoded = std.ArrayList(u8).init(allocator);
+    defer decoded.deinit();
+    var writer = decoded.writer();
+    try decode(encoding_table, decoding_table, &reader, &writer, num_bits);
+    const output = @as([]u16, @alignCast(std.mem.bytesAsSlice(u16, decoded.items)));
+    try expect(std.mem.eql(u16, &uncompressed, output));
+}
+
+test "encode then decode" {
+    // Adapted from tests in the 'exrs' project: https://github.com/johannesvollmer/exrs
+    const allocator = std.testing.allocator;
+    const uncompressed = [_]u16{
+        3852,  2432,  33635, 49381, 10100, 15095, 62693, 63738, 62359, 5013,
+        7715,  59875, 28182, 34449, 19983, 20399, 63407, 29486, 4877,  26738,
+        44815, 14042, 46091, 48228, 25682, 35412, 7582,  65069, 6632,  54124,
+        13798, 27503, 52154, 61961, 30474, 46880, 39097, 15754, 52897, 42371,
+        54053, 14178, 48276, 34591, 42602, 32126, 42062, 31474, 16274, 55991,
+        2882,  17039, 56389, 20835, 57057, 54081, 3414,  33957, 52584, 10222,
+        25139, 40002, 44980, 1602,  48021, 19703, 6562,  61777, 41582, 201,
+        31253, 51790, 15888, 40921, 3627,  12184, 16036, 26349, 3159,  29002,
+        14535, 50632, 18118, 33583, 18878, 59470, 32835, 9347,  16991, 21303,
+        26263, 8312,  14017, 41777, 43240, 3500,  60250, 52437, 45715, 61520,
+    };
+    // Build encoding table for data.
+    const frequencies = try countFrequencies(&uncompressed, allocator);
+    defer allocator.free(frequencies);
+    const result = try buildEncodingTable(frequencies, allocator);
+    const encoding_table = result.encoding_table;
+    const min_code_idx = result.min_code_idx;
+    const max_code_idx = result.max_code_idx;
+    defer allocator.free(encoding_table);
+    buildCanonicalEncodings(encoding_table);
+    // Encode data into output buffer.
+    var encoded = std.ArrayList(u8).init(allocator);
+    defer encoded.deinit();
+    var writer = encoded.writer();
+    const num_bits = try encode(&uncompressed, encoding_table, max_code_idx, writer);
+    // Build encoding and decoding tables.
+    buildCanonicalEncodings(encoding_table);
+    const decoding_table = try buildDecodingTable(
+        encoding_table,
+        min_code_idx,
+        max_code_idx,
+        allocator,
+    );
+    defer allocator.free(decoding_table);
+    // Decode compressed data
+    var stream = std.io.fixedBufferStream(encoded.items);
+    var reader = stream.reader();
+    var decoded = std.ArrayList(u8).init(allocator);
+    defer decoded.deinit();
+    writer = decoded.writer();
+    try decode(encoding_table, decoding_table, &reader, &writer, num_bits);
+    const output = @as([]u16, @alignCast(std.mem.bytesAsSlice(u16, decoded.items)));
+    try expect(std.mem.eql(u16, &uncompressed, output));
+}
+
+const short_zerocode_run = 59;
+const long_zerocode_run = 63;
+const shortest_long_run = 2 + long_zerocode_run - short_zerocode_run;
+const longest_long_run = 255 + shortest_long_run;
+
+fn packEncodingTable(
+    encoding_table: []Encoding,
+    min_code_idx: u32,
+    max_code_idx: u32,
+    writer: anytype,
+) !u32 {
+    var num_bits: u32 = 0;
+    var bit_writer = std.io.bitWriter(std.builtin.Endian.big, writer);
+    var run_len: u8 = 0;
+    for (min_code_idx..max_code_idx + 1) |encoding_idx| {
+        const encoding = encoding_table[encoding_idx];
+        if ((encoding.len == 0) and (run_len < longest_long_run)) {
+            run_len += 1;
+            continue;
+        }
+        if (run_len >= shortest_long_run) {
+            try bit_writer.writeBits(@as(u6, long_zerocode_run), 6);
+            try bit_writer.writeBits(run_len - shortest_long_run, 8);
+            num_bits += 14;
+        } else if (run_len >= 2) {
+            try bit_writer.writeBits(run_len - 2 + short_zerocode_run, 6);
+            num_bits += 6;
+        } else if (run_len == 1) {
+            try bit_writer.writeBits(@as(u6, 0), 6);
+            num_bits += 6;
+        }
+        run_len = 0;
+        try bit_writer.writeBits(encoding.len, 6);
+        num_bits += 6;
+    }
+    try bit_writer.flushBits();
+    return (num_bits + 7) / 8;
+}
+
+fn readEncodingTable(
+    reader: anytype,
+    min_code_idx: u32,
+    max_code_idx: u32,
+    allocator: std.mem.Allocator,
+) ![]Encoding {
+    const encoding_table = try allocator.alloc(Encoding, encoding_table_size);
+    @memset(encoding_table, .{ .val = 0, .len = 0 });
+    var bit_reader = std.io.bitReader(std.builtin.Endian.big, reader);
+    var encoding_idx = min_code_idx;
+    while (encoding_idx <= max_code_idx) {
+        const encoding_len = try bit_reader.readBitsNoEof(u6, 6);
+        if (encoding_len == long_zerocode_run) {
+            const run_len = try bit_reader.readBitsNoEof(u8, 8) + shortest_long_run;
+            if (encoding_idx + run_len > max_code_idx + 1) {
+                return error.EncodingTableTooLong;
+            }
+            encoding_idx += run_len;
+            continue;
+        }
+        if (encoding_len >= short_zerocode_run) {
+            const run_len = encoding_len - short_zerocode_run + 2;
+            if (encoding_idx + run_len > max_code_idx + 1) {
+                return error.EncodingTableTooLong;
+            }
+            encoding_idx += run_len;
+            continue;
+        }
+        encoding_table[encoding_idx].len = encoding_len;
+        encoding_idx += 1;
+    }
+    return encoding_table;
+}
+
+test "pack and unpack encoding table" {
+    const allocator = std.testing.allocator;
+    const data = [_]u16{
+        1, 1, 1, 2, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5,
+        5, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    };
+    // Build encoding table.
+    const frequencies = try countFrequencies(&data, allocator);
+    defer allocator.free(frequencies);
+    const result = try buildEncodingTable(frequencies, allocator);
+    defer allocator.free(result.encoding_table);
+    const encoding_table = result.encoding_table;
+    const min_code_idx = result.min_code_idx;
+    const max_code_idx = result.max_code_idx;
+    buildCanonicalEncodings(encoding_table);
+    // Pack encoding table.
+    var packed_table = std.ArrayList(u8).init(allocator);
+    defer packed_table.deinit();
+    const writer = packed_table.writer();
+    _ = try packEncodingTable(
+        encoding_table,
+        min_code_idx,
+        max_code_idx,
+        writer,
+    );
+    // Unpack encoding table.
+    var stream = std.io.fixedBufferStream(packed_table.items);
+    const reader = stream.reader();
+    const unpacked = try readEncodingTable(
+        reader,
+        min_code_idx,
+        max_code_idx,
+        allocator,
+    );
+    defer allocator.free(unpacked);
+    buildCanonicalEncodings(unpacked);
+    // The encoding tables should be identical.
+    for (encoding_table, unpacked) |e1, e2| {
+        try expect(std.meta.eql(e1, e2));
+    }
+}
+
+const EncodingMetadata = packed struct {
+    min_code_idx: u32,
+    max_code_idx: u32,
+    table_size: u32,
+    num_bits: u32,
+    padding: u32 = 0,
+};
+
+pub fn compress(
+    src: []const u16,
+    dst: *std.io.StreamSource,
+    allocator: std.mem.Allocator,
+) !void {
+    const frequencies = try countFrequencies(src, allocator);
+    defer allocator.free(frequencies);
+    const result = try buildEncodingTable(frequencies, allocator);
+    defer allocator.free(result.encoding_table);
+    const encoding_table = result.encoding_table;
+    const min_code_idx = result.min_code_idx;
+    const max_code_idx = result.max_code_idx;
+    buildCanonicalEncodings(encoding_table);
+
+    const metadata_pos = try dst.getPos();
+    try dst.seekBy(5 * @sizeOf(u32));
+    const table_size = try packEncodingTable(
+        encoding_table,
+        min_code_idx,
+        max_code_idx,
+        dst.writer(),
+    );
+    const num_bits = try encode(
+        src,
+        encoding_table,
+        max_code_idx,
+        dst.writer(),
+    );
+    try dst.seekTo(metadata_pos);
+    try dst.writer().writeInt(u32, min_code_idx, std.builtin.Endian.little);
+    try dst.writer().writeInt(u32, max_code_idx, std.builtin.Endian.little);
+    try dst.writer().writeInt(u32, table_size, std.builtin.Endian.little);
+    try dst.writer().writeInt(u32, num_bits, std.builtin.Endian.little);
+    try dst.writer().writeInt(u32, 0, std.builtin.Endian.little);
+}
+
+pub fn decompress(
+    reader: anytype,
+    writer: anytype,
+    allocator: std.mem.Allocator,
+) !void {
+    const min_code_idx = try reader.readInt(u32, std.builtin.Endian.little);
+    const max_code_idx = try reader.readInt(u32, std.builtin.Endian.little);
+    const table_size = try reader.readInt(u32, std.builtin.Endian.little);
+    const num_bits = try reader.readInt(u32, std.builtin.Endian.little);
+    const padding = try reader.readInt(u32, std.builtin.Endian.little);
+    _ = padding;
+    _ = table_size;
+    const unpacked = try readEncodingTable(
+        reader,
+        min_code_idx,
+        max_code_idx,
+        allocator,
+    );
+    defer allocator.free(unpacked);
+    buildCanonicalEncodings(unpacked);
+    const decoding_table = try buildDecodingTable(
+        unpacked,
+        min_code_idx,
+        max_code_idx,
+        allocator,
+    );
+    defer allocator.free(decoding_table);
+    const num_bytes = ((num_bits + 7) & 0xFFFFFFF8) >> 3;
+    var limited_reader = std.io.limitedReader(reader, num_bytes);
+    try decode(unpacked, decoding_table, limited_reader.reader(), writer, num_bits);
+}
+
+test "compress then decompress" {
+    const allocator = std.testing.allocator;
+    const src = [_]u16{
+        1, 1, 1, 2, 3, 3, 4, 4, 4, 4, 4, 5, 5, 5,
+        5, 5, 6, 7, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8,
+    };
+    const dst_buffer = try allocator.alloc(u8, 256);
+    defer allocator.free(dst_buffer);
+    @memset(dst_buffer, 0);
+    var dst = std.io.StreamSource{
+        .buffer = std.io.fixedBufferStream(dst_buffer),
+    };
+    try compress(&src, &dst, allocator);
+
+    try dst.seekTo(0);
+
+    const decoded_buffer = try allocator.alloc(u8, 256);
+    defer allocator.free(decoded_buffer);
+    @memset(decoded_buffer, 0);
+    var decoded = std.io.StreamSource{
+        .buffer = std.io.fixedBufferStream(decoded_buffer),
+    };
+    try decompress(
+        dst.reader(),
+        decoded.writer(),
+        allocator,
+    );
+    const output = @as([]u16, @alignCast(std.mem.bytesAsSlice(u16, decoded_buffer)));
+    try expect(std.mem.eql(u16, output[0..src.len], &src));
+}
diff --git a/src/root.zig b/src/root.zig
@@ -1,7 +1,8 @@
 const std = @import("std");
 const testing = std.testing;
 
-pub const piz = @import("piz/lut.zig");
+pub const lut = @import("piz/lut.zig");
+pub const huffman = @import("piz/huffman.zig");
 
 test {
     std.testing.refAllDecls(@This());

	exr-zig Use EXR images
	git clone git://git.electrosoup.com/exr-zig
	Log \| Files \| Refs

A	src/piz/huffman.zig	\|	861	+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
M	src/root.zig	\|	3	++-