concedo commited on
Commit
24c122a
1 Parent(s): 0571571

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -6
app.py CHANGED
@@ -1,11 +1,12 @@
1
  from transformers import AutoTokenizer
2
  import gradio as gr
3
 
 
 
4
 
5
  def tokenize(input_text):
6
- llama_tokens = len(
7
- llama_tokenizer(input_text, add_special_tokens=True)["input_ids"]
8
- )
9
  llama3_tokens = len(
10
  llama3_tokenizer(input_text, add_special_tokens=True)["input_ids"]
11
  )
@@ -41,7 +42,22 @@ def tokenize(input_text):
41
  )
42
 
43
  results = {
44
- "LLaMa-1/LLaMa-2": llama_tokens,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  "LLaMa-3": llama3_tokens,
46
  "Mistral": mistral_tokens,
47
  "GPT-2/GPT-J": gpt2_tokens,
@@ -58,7 +74,9 @@ def tokenize(input_text):
58
  # Sort the results in descending order based on token length
59
  sorted_results = sorted(results.items(), key=lambda x: x[1], reverse=True)
60
 
61
- return "\n".join([f"{model}: {tokens}" for model, tokens in sorted_results])
 
 
62
 
63
 
64
  if __name__ == "__main__":
@@ -102,4 +120,4 @@ if __name__ == "__main__":
102
  iface = gr.Interface(
103
  fn=tokenize, inputs=gr.Textbox(label="Input Text", lines=12), outputs="text"
104
  )
105
- iface.launch()
 
1
  from transformers import AutoTokenizer
2
  import gradio as gr
3
 
4
+ def formatarr(input):
5
+ return "["+",".join(str(x) for x in input)+"]"
6
 
7
  def tokenize(input_text):
8
+ llama_tokens = llama_tokenizer(input_text, add_special_tokens=True)["input_ids"]
9
+
 
10
  llama3_tokens = len(
11
  llama3_tokenizer(input_text, add_special_tokens=True)["input_ids"]
12
  )
 
42
  )
43
 
44
  results = {
45
+ "LLaMa-1/LLaMa-2": len(llama_tokens),
46
+ "LLaMa-3": llama3_tokens,
47
+ "Mistral": mistral_tokens,
48
+ "GPT-2/GPT-J": gpt2_tokens,
49
+ "GPT-NeoX": gpt_neox_tokens,
50
+ "Falcon": falcon_tokens,
51
+ "Phi-1/Phi-2": phi2_tokens,
52
+ "T5": t5_tokens,
53
+ "Gemma": gemma_tokens,
54
+ "Command-R": command_r_tokens,
55
+ "Qwen/Qwen1.5": qwen_tokens,
56
+ "CodeQwen": codeqwen_tokens,
57
+ }
58
+
59
+ results2 = {
60
+ "LLaMa-1/LLaMa-2": formatarr(llama_tokens),
61
  "LLaMa-3": llama3_tokens,
62
  "Mistral": mistral_tokens,
63
  "GPT-2/GPT-J": gpt2_tokens,
 
74
  # Sort the results in descending order based on token length
75
  sorted_results = sorted(results.items(), key=lambda x: x[1], reverse=True)
76
 
77
+ lens = "\n".join([f"{model}: {tokens}" for model, tokens in sorted_results])
78
+ toks = "\n".join([f"{model}: {tokens}" for model, tokens in results2])
79
+ return lens + "\n" + toks
80
 
81
 
82
  if __name__ == "__main__":
 
120
  iface = gr.Interface(
121
  fn=tokenize, inputs=gr.Textbox(label="Input Text", lines=12), outputs="text"
122
  )
123
+ iface.launch()