jxm commited on
Commit
d0928f8
1 Parent(s): 9acc0af

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -0
app.py ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import zlib
2
+
3
+ import gradio as gr
4
+
5
+ import gptzip
6
+ import transformers
7
+
8
+ model = "EleutherAI/pythia-410m"
9
+
10
+ lm = transformers.AutoModelForCausalLM.from_pretrained(model)
11
+ tokenizer = transformers.AutoTokenizer.from_pretrained(model)
12
+ coder = gptzip.ArithmeticCoder(lm=lm, tokenizer=tokenizer)
13
+
14
+ def compress(string: str) -> str:
15
+ code, num_padded_bits = coder.encode(
16
+ string,
17
+ return_num_padded_bits=True,
18
+ )
19
+ gzip_str = zlib.compress(string.encode())
20
+ gzip_bytes = len(gzip_str)
21
+ return f"Num tokens {len(tokenizer.encode(string))} || GZ: {gzip_bytes} bytes: {gzip_str} || AC: {len(code)} bytes: {code}"
22
+
23
+ demo = gr.Interface(fn=compress, inputs="text", outputs="text")
24
+ demo.launch()