xzuyn commited on
Commit
920307b
1 Parent(s): bcee706

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -5
app.py CHANGED
@@ -13,7 +13,6 @@ def tokenize(input_text):
13
  phi3_tokens = len(phi3_tokenizer(input_text, add_special_tokens=True)["input_ids"])
14
  t5_tokens = len(t5_tokenizer(input_text, add_special_tokens=True)["input_ids"])
15
  gemma_tokens = len(gemma_tokenizer(input_text, add_special_tokens=True)["input_ids"])
16
- gemma2_tokens = len(gemma2_tokenizer(input_text, add_special_tokens=True)["input_ids"])
17
  command_r_tokens = len(command_r_tokenizer(input_text, add_special_tokens=True)["input_ids"])
18
  qwen_tokens = len(qwen_tokenizer(input_text, add_special_tokens=True)["input_ids"])
19
  codeqwen_tokens = len(codeqwen_tokenizer(input_text, add_special_tokens=True)["input_ids"])
@@ -33,8 +32,7 @@ def tokenize(input_text):
33
  "Phi-1/Phi-2": phi2_tokens,
34
  "Phi-3": phi3_tokens,
35
  "T5": t5_tokens,
36
- "Gemma": gemma_tokens,
37
- "Gemma-2": gemma2_tokens,
38
  "Command-R": command_r_tokens,
39
  "Qwen/Qwen1.5": qwen_tokens,
40
  "CodeQwen": codeqwen_tokens,
@@ -62,7 +60,6 @@ if __name__ == "__main__":
62
  phi3_tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
63
  t5_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-xxl")
64
  gemma_tokenizer = AutoTokenizer.from_pretrained("alpindale/gemma-2b")
65
- gemma2_tokenizer = AutoTokenizer.from_pretrained("UCLA-AGI/Gemma-2-9B-It-SPPO-Iter3")
66
  command_r_tokenizer = AutoTokenizer.from_pretrained("PJMixers/CohereForAI_c4ai-command-r-plus-tokenizer")
67
  qwen_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-7B")
68
  codeqwen_tokenizer = AutoTokenizer.from_pretrained("Qwen/CodeQwen1.5-7B")
@@ -72,5 +69,5 @@ if __name__ == "__main__":
72
  internlm_tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-20b", trust_remote_code=True)
73
  internlm2_tokenizer = AutoTokenizer.from_pretrained("internlm/internlm2-20b", trust_remote_code=True)
74
 
75
- iface = gr.Interface(fn=tokenize, inputs=gr.Textbox(label="Input Text", lines=20), outputs="text")
76
  iface.launch()
 
13
  phi3_tokens = len(phi3_tokenizer(input_text, add_special_tokens=True)["input_ids"])
14
  t5_tokens = len(t5_tokenizer(input_text, add_special_tokens=True)["input_ids"])
15
  gemma_tokens = len(gemma_tokenizer(input_text, add_special_tokens=True)["input_ids"])
 
16
  command_r_tokens = len(command_r_tokenizer(input_text, add_special_tokens=True)["input_ids"])
17
  qwen_tokens = len(qwen_tokenizer(input_text, add_special_tokens=True)["input_ids"])
18
  codeqwen_tokens = len(codeqwen_tokenizer(input_text, add_special_tokens=True)["input_ids"])
 
32
  "Phi-1/Phi-2": phi2_tokens,
33
  "Phi-3": phi3_tokens,
34
  "T5": t5_tokens,
35
+ "Gemma/Gemma-2": gemma_tokens,
 
36
  "Command-R": command_r_tokens,
37
  "Qwen/Qwen1.5": qwen_tokens,
38
  "CodeQwen": codeqwen_tokens,
 
60
  phi3_tokenizer = AutoTokenizer.from_pretrained("microsoft/Phi-3-mini-4k-instruct")
61
  t5_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-xxl")
62
  gemma_tokenizer = AutoTokenizer.from_pretrained("alpindale/gemma-2b")
 
63
  command_r_tokenizer = AutoTokenizer.from_pretrained("PJMixers/CohereForAI_c4ai-command-r-plus-tokenizer")
64
  qwen_tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen1.5-7B")
65
  codeqwen_tokenizer = AutoTokenizer.from_pretrained("Qwen/CodeQwen1.5-7B")
 
69
  internlm_tokenizer = AutoTokenizer.from_pretrained("internlm/internlm-20b", trust_remote_code=True)
70
  internlm2_tokenizer = AutoTokenizer.from_pretrained("internlm/internlm2-20b", trust_remote_code=True)
71
 
72
+ iface = gr.Interface(fn=tokenize, inputs=gr.Textbox(label="Input Text", lines=19), outputs="text")
73
  iface.launch()