import zlib import gradio as gr import gptzip import transformers model = "EleutherAI/pythia-410m" lm = transformers.AutoModelForCausalLM.from_pretrained(model) tokenizer = transformers.AutoTokenizer.from_pretrained(model) coder = gptzip.ArithmeticCoder(lm=lm, tokenizer=tokenizer) def compress(string: str) -> str: code, num_padded_bits = coder.encode( string, return_num_padded_bits=True, ) gzip_str = zlib.compress(string.encode()) gzip_bytes = len(gzip_str) return f"Num tokens {len(tokenizer.encode(string))} || GZ: {gzip_bytes} bytes: {gzip_str} || AC: {len(code)} bytes: {code}" demo = gr.Interface(fn=compress, inputs="text", outputs="text") demo.launch()