File size: 709 Bytes
d0928f8
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
import zlib

import gradio as gr

import gptzip
import transformers

model = "EleutherAI/pythia-410m"

lm = transformers.AutoModelForCausalLM.from_pretrained(model)
tokenizer = transformers.AutoTokenizer.from_pretrained(model)
coder = gptzip.ArithmeticCoder(lm=lm, tokenizer=tokenizer)

def compress(string: str) -> str:
    code, num_padded_bits = coder.encode(
        string, 
        return_num_padded_bits=True, 
    )
    gzip_str = zlib.compress(string.encode())
    gzip_bytes = len(gzip_str)
    return f"Num tokens {len(tokenizer.encode(string))} || GZ: {gzip_bytes} bytes: {gzip_str} || AC: {len(code)} bytes:  {code}"

demo = gr.Interface(fn=compress, inputs="text", outputs="text")
demo.launch()