xzuyn commited on
Commit
a17b6c0
1 Parent(s): f3369dd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +30 -9
app.py CHANGED
@@ -3,15 +3,36 @@ import gradio as gr
3
 
4
 
5
  def tokenize(input_text):
6
- llama_tokens = llama_tokenizer(input_text, add_special_tokens=True)["input_ids"]
7
- mistral_tokens = mistral_tokenizer(input_text, add_special_tokens=True)["input_ids"]
8
- gpt2_tokens = gpt2_tokenizer(input_text, add_special_tokens=True)["input_ids"]
9
- gpt_neox_tokens = gpt_neox_tokenizer(input_text, add_special_tokens=True)["input_ids"]
10
- falcon_tokens = falcon_tokenizer(input_text, add_special_tokens=True)["input_ids"]
11
- phi2_tokens = phi2_tokenizer(input_text, add_special_tokens=True)["input_ids"]
12
- t5_tokens = t5_tokenizer(input_text, add_special_tokens=True)["input_ids"]
13
-
14
- return f"LLaMa: {len(llama_tokens)}\nMistral: {len(mistral_tokens)}\nGPT-2/GPT-J: {len(gpt2_tokens)}\nGPT-NeoX: {len(gpt_neox_tokens)}\nFalcon: {len(falcon_tokens)}\nPhi-2: {len(phi2_tokens)}\nT5: {len(t5_tokens)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
15
 
16
 
17
  if __name__ == "__main__":
 
3
 
4
 
5
  def tokenize(input_text):
6
+ llama_tokens = len(
7
+ llama_tokenizer(input_text, add_special_tokens=True)["input_ids"]
8
+ )
9
+ mistral_tokens = len(
10
+ mistral_tokenizer(input_text, add_special_tokens=True)["input_ids"]
11
+ )
12
+ gpt2_tokens = len(gpt2_tokenizer(input_text, add_special_tokens=True)["input_ids"])
13
+ gpt_neox_tokens = len(
14
+ gpt_neox_tokenizer(input_text, add_special_tokens=True)["input_ids"]
15
+ )
16
+ falcon_tokens = len(
17
+ falcon_tokenizer(input_text, add_special_tokens=True)["input_ids"]
18
+ )
19
+ phi2_tokens = len(phi2_tokenizer(input_text, add_special_tokens=True)["input_ids"])
20
+ t5_tokens = len(t5_tokenizer(input_text, add_special_tokens=True)["input_ids"])
21
+
22
+ results = {
23
+ "LLaMa": llama_tokens,
24
+ "Mistral": mistral_tokens,
25
+ "GPT-2/GPT-J": gpt2_tokens,
26
+ "GPT-NeoX": gpt_neox_tokens,
27
+ "Falcon": falcon_tokens,
28
+ "Phi-2": phi2_tokens,
29
+ "T5": t5_tokens,
30
+ }
31
+
32
+ # Sort the results in descending order based on token length
33
+ sorted_results = sorted(results.items(), key=lambda x: x[1], reverse=True)
34
+
35
+ return "\n".join([f"{model}: {tokens}" for model, tokens in sorted_results])
36
 
37
 
38
  if __name__ == "__main__":