146 lines
3.2 KiB
Lua
146 lines
3.2 KiB
Lua
-- tiny_lz4_like_bytes.lua
|
|
local M = {}
|
|
|
|
local MIN_MATCH = 3
|
|
local MAX_OFFSET = 0xFFFF
|
|
|
|
local function write_ext_len(out, n)
|
|
while n >= 255 do
|
|
table.insert(out, 255)
|
|
n = n - 255
|
|
end
|
|
table.insert(out, n)
|
|
end
|
|
|
|
local function read_ext_len(input, pos, base)
|
|
local add = 0
|
|
while true do
|
|
local b = input[pos]; pos = pos + 1
|
|
add = add + b
|
|
if b < 255 then break end
|
|
end
|
|
return base + add, pos
|
|
end
|
|
|
|
local function find_longest_match(data, pos, window)
|
|
local n = #data
|
|
local max_off = math.min(window, pos - 1)
|
|
local best_len, best_off = 0, 0
|
|
local max_match_len = n - pos + 1
|
|
if max_match_len < MIN_MATCH then return 0, 0 end
|
|
|
|
local start = pos - max_off
|
|
for j = start, pos - 1 do
|
|
if data[j] == data[pos] then
|
|
local k = 0
|
|
while k < max_match_len and data[j + k] == data[pos + k] do
|
|
k = k + 1
|
|
end
|
|
if k >= MIN_MATCH and k > best_len then
|
|
best_len = k
|
|
best_off = pos - j
|
|
if best_len >= 255 + MIN_MATCH then break end
|
|
end
|
|
end
|
|
end
|
|
return best_off, best_len
|
|
end
|
|
|
|
function M.compress(bytes)
|
|
assert(type(bytes) == "table", "compress expects table of bytes (0-255)")
|
|
local n = #bytes
|
|
local i, anchor = 1, 1
|
|
local out = {}
|
|
|
|
while i <= n do
|
|
local off, match_len = find_longest_match(bytes, i, MAX_OFFSET)
|
|
|
|
if match_len >= MIN_MATCH then
|
|
local lit_len = i - anchor
|
|
local lit_nib = math.min(lit_len, 15)
|
|
local match_nib = math.min(match_len - MIN_MATCH, 15)
|
|
local token = lit_nib * 16 + match_nib
|
|
table.insert(out, token)
|
|
|
|
if lit_len >= 15 then
|
|
write_ext_len(out, lit_len - 15)
|
|
end
|
|
|
|
for k = anchor, i - 1 do
|
|
table.insert(out, bytes[k])
|
|
end
|
|
|
|
-- offset
|
|
table.insert(out, off % 256)
|
|
table.insert(out, math.floor(off / 256) % 256)
|
|
|
|
local rem = match_len - MIN_MATCH
|
|
if rem >= 15 then
|
|
write_ext_len(out, rem - 15)
|
|
end
|
|
|
|
i = i + match_len
|
|
anchor = i
|
|
else
|
|
i = i + 1
|
|
end
|
|
end
|
|
|
|
local final_lit = n - anchor + 1
|
|
local tok_lit = math.min(final_lit, 15)
|
|
local token = tok_lit * 16
|
|
table.insert(out, token)
|
|
if final_lit >= 15 then
|
|
write_ext_len(out, final_lit - 15)
|
|
end
|
|
for k = anchor, n do
|
|
table.insert(out, bytes[k])
|
|
end
|
|
|
|
return out
|
|
end
|
|
|
|
function M.decompress(bytes)
|
|
local pos, n = 1, #bytes
|
|
local out = {}
|
|
|
|
while pos <= n do
|
|
local token = bytes[pos]; pos = pos + 1
|
|
if not token then break end
|
|
|
|
local lit_nib = math.floor(token / 16)
|
|
local match_nib = token % 16
|
|
|
|
local lit_len = lit_nib
|
|
if lit_len == 15 then
|
|
lit_len, pos = read_ext_len(bytes, pos, 15)
|
|
end
|
|
|
|
for k = 1, lit_len do
|
|
table.insert(out, bytes[pos]); pos = pos + 1
|
|
end
|
|
|
|
if pos > n then break end
|
|
|
|
local off = bytes[pos] + bytes[pos + 1] * 256
|
|
pos = pos + 2
|
|
|
|
local match_len = match_nib + MIN_MATCH
|
|
if match_nib == 15 then
|
|
local extra; extra, pos = read_ext_len(bytes, pos, 0)
|
|
match_len = match_len + extra
|
|
end
|
|
|
|
local sofar_len = #out
|
|
local match_start = sofar_len - off + 1
|
|
if match_start < 1 then error("invalid offset") end
|
|
|
|
for k = 0, match_len - 1 do
|
|
table.insert(out, out[match_start + k])
|
|
end
|
|
end
|
|
|
|
return out
|
|
end
|
|
|
|
return M
|