-
Notifications
You must be signed in to change notification settings - Fork 3
Open
Description
I'm trying to use this module on enwik5 data (10 000 bytes). But I encounter this error:
AssertionError: Low or high out of range
Are there any additional limitations in the implementation? Or do I do something wrong?
The script below works ok with enwik4 data (1 000 bytes).
I count statistics myself and then use StaticModel
, but I encounter either this Low or high out of range
error, or ValueError: Symbol has zero frequency
error.
fn = 'enwik5'
print(fn)
def read_bytes(path):
with open(path, 'rb') as f:
return list(f.read())
data = read_bytes(fn)
nsyms = 256
stats = [0] * nsyms
for c in data:
stats[c] += 1
from arithmetic_compressor import AECompressor
from arithmetic_compressor.models.base_adaptive_model import BaseFrequencyTable
from arithmetic_compressor.util import *
SCALE_FACTOR = 4096
class StaticModel:
"""A static model, which does not adapt to input data or statistics."""
def __init__(self, counts_dict):
#vals = (v for k, v in counts_dict.items())
#counts_sum = sum(vals)
#probability = {k: v / counts_sum for k, v in counts_dict.items()}
#print(probability)
probability = counts_dict
symbols = list(probability.keys())
self.name = "Static"
self.symbols = symbols
self.__prob = dict(probability)
# compute cdf from given probability
cdf = {}
prev_freq = 0
self.freq = freq = {sym: round(SCALE_FACTOR * prob)
for sym, prob in probability.items()}
for sym, freq in freq.items():
cdf[sym] = Range(prev_freq, prev_freq + freq)
prev_freq += freq
self.cdf_object = cdf
def cdf(self):
return self.cdf_object
def probability(self):
return self.__prob
def predict(self, symbol):
assert symbol in self.symbols
return self.probability()[symbol]
def update(self, symbol):
pass
def test_model(self, gen_random=True, N=10000, custom_data=None):
self.name = "Static Model"
return BaseFrequencyTable.test_model(self, gen_random, N, custom_data)
freq_map = {
sym: freq for sym, freq in enumerate(stats)
if freq > 0
}
model = StaticModel(freq_map)
coder = AECompressor(model)
N = len(data)
compressed = coder.compress(data)
Metadata
Metadata
Assignees
Labels
No labels