concedo commited on
Commit
0571571
1 Parent(s): 8377b08

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -24
app.py CHANGED
@@ -1,12 +1,11 @@
1
  from transformers import AutoTokenizer
2
  import gradio as gr
3
 
4
- def formatarr(input):
5
- return "["+",".join(str(x) for x in input)+"]"
6
 
7
  def tokenize(input_text):
8
- llama_tokens = llama_tokenizer(input_text, add_special_tokens=True)["input_ids"]
9
-
 
10
  llama3_tokens = len(
11
  llama3_tokenizer(input_text, add_special_tokens=True)["input_ids"]
12
  )
@@ -42,22 +41,7 @@ def tokenize(input_text):
42
  )
43
 
44
  results = {
45
- "LLaMa-1/LLaMa-2": len(llama_tokens),
46
- "LLaMa-3": llama3_tokens,
47
- "Mistral": mistral_tokens,
48
- "GPT-2/GPT-J": gpt2_tokens,
49
- "GPT-NeoX": gpt_neox_tokens,
50
- "Falcon": falcon_tokens,
51
- "Phi-1/Phi-2": phi2_tokens,
52
- "T5": t5_tokens,
53
- "Gemma": gemma_tokens,
54
- "Command-R": command_r_tokens,
55
- "Qwen/Qwen1.5": qwen_tokens,
56
- "CodeQwen": codeqwen_tokens,
57
- }
58
-
59
- results2 = {
60
- "LLaMa-1/LLaMa-2": formatarr(llama_tokens),
61
  "LLaMa-3": llama3_tokens,
62
  "Mistral": mistral_tokens,
63
  "GPT-2/GPT-J": gpt2_tokens,
@@ -74,9 +58,7 @@ def tokenize(input_text):
74
  # Sort the results in descending order based on token length
75
  sorted_results = sorted(results.items(), key=lambda x: x[1], reverse=True)
76
 
77
- lens = "\n".join([f"{model}: {tokens}" for model, tokens in sorted_results])
78
- toks = "\n".join([f"{model}: {tokens}" for model, tokens in results2])
79
- return lens + "\n" + toks
80
 
81
 
82
  if __name__ == "__main__":
@@ -120,4 +102,4 @@ if __name__ == "__main__":
120
  iface = gr.Interface(
121
  fn=tokenize, inputs=gr.Textbox(label="Input Text", lines=12), outputs="text"
122
  )
123
- iface.launch()
 
1
  from transformers import AutoTokenizer
2
  import gradio as gr
3
 
 
 
4
 
5
  def tokenize(input_text):
6
+ llama_tokens = len(
7
+ llama_tokenizer(input_text, add_special_tokens=True)["input_ids"]
8
+ )
9
  llama3_tokens = len(
10
  llama3_tokenizer(input_text, add_special_tokens=True)["input_ids"]
11
  )
 
41
  )
42
 
43
  results = {
44
+ "LLaMa-1/LLaMa-2": llama_tokens,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  "LLaMa-3": llama3_tokens,
46
  "Mistral": mistral_tokens,
47
  "GPT-2/GPT-J": gpt2_tokens,
 
58
  # Sort the results in descending order based on token length
59
  sorted_results = sorted(results.items(), key=lambda x: x[1], reverse=True)
60
 
61
+ return "\n".join([f"{model}: {tokens}" for model, tokens in sorted_results])
 
 
62
 
63
 
64
  if __name__ == "__main__":
 
102
  iface = gr.Interface(
103
  fn=tokenize, inputs=gr.Textbox(label="Input Text", lines=12), outputs="text"
104
  )
105
+ iface.launch()