xzuyn commited on
Commit
bbc0512
1 Parent(s): f0bb904

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -9
app.py CHANGED
@@ -1,17 +1,29 @@
1
  from transformers import AutoTokenizer
2
  import gradio as gr
3
 
4
-
5
  def tokenize(input_text):
6
- llama_tokens = llama_tokenizer(input_text, add_special_tokens=True)["input_ids"]
7
- mistral_tokens = mistral_tokenizer(input_text, add_special_tokens=True)["input_ids"]
8
- gpt2_tokens = gpt2_tokenizer(input_text, add_special_tokens=True)["input_ids"]
9
- gpt_neox_tokens = gpt_neox_tokenizer(input_text, add_special_tokens=True)["input_ids"]
10
- falcon_tokens = falcon_tokenizer(input_text, add_special_tokens=True)["input_ids"]
11
- phi2_tokens = phi2_tokenizer(input_text, add_special_tokens=True)["input_ids"]
12
- t5_tokens = t5_tokenizer(input_text, add_special_tokens=True)["input_ids"]
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
- return f"LLaMa: {len(llama_tokens)}\nMistral: {len(mistral_tokens)}\nGPT-2/GPT-J: {len(gpt2_tokens)}\nGPT-NeoX: {len(gpt_neox_tokens)}\nFalcon: {len(falcon_tokens)}\nPhi-2: {len(phi2_tokens)}\nT5: {len(t5_tokens)}"
15
 
16
 
17
  if __name__ == "__main__":
 
1
  from transformers import AutoTokenizer
2
  import gradio as gr
3
 
 
4
  def tokenize(input_text):
5
+ llama_tokens = len(llama_tokenizer(input_text, add_special_tokens=True)["input_ids"])
6
+ mistral_tokens = len(mistral_tokenizer(input_text, add_special_tokens=True)["input_ids"])
7
+ gpt2_tokens = len(gpt2_tokenizer(input_text, add_special_tokens=True)["input_ids"])
8
+ gpt_neox_tokens = len(gpt_neox_tokenizer(input_text, add_special_tokens=True)["input_ids"])
9
+ falcon_tokens = len(falcon_tokenizer(input_text, add_special_tokens=True)["input_ids"])
10
+ phi2_tokens = len(phi2_tokenizer(input_text, add_special_tokens=True)["input_ids"])
11
+ t5_tokens = len(t5_tokenizer(input_text, add_special_tokens=True)["input_ids"])
12
+
13
+ token_lengths = {
14
+ "LLaMa": llama_tokens,
15
+ "Mistral": mistral_tokens,
16
+ "GPT-2/GPT-J": gpt2_tokens,
17
+ "GPT-NeoX": gpt_neox_tokens,
18
+ "Falcon": falcon_tokens,
19
+ "Phi-2": phi2_tokens,
20
+ "T5": t5_tokens
21
+ }
22
+
23
+ sorted_tokens = sorted(token_lengths.items(), key=lambda x: x[1], reverse=True)
24
+ result = "\n".join([f"{name}: {length}" for name, length in sorted_tokens])
25
 
26
+ return result
27
 
28
 
29
  if __name__ == "__main__":