xzuyn commited on
Commit
72a73bd
1 Parent(s): 9f66134

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -1
app.py CHANGED
@@ -6,6 +6,9 @@ def tokenize(input_text):
6
  llama_tokens = len(
7
  llama_tokenizer(input_text, add_special_tokens=True)["input_ids"]
8
  )
 
 
 
9
  mistral_tokens = len(
10
  mistral_tokenizer(input_text, add_special_tokens=True)["input_ids"]
11
  )
@@ -22,6 +25,7 @@ def tokenize(input_text):
22
 
23
  results = {
24
  "LLaMa": llama_tokens,
 
25
  "Mistral": mistral_tokens,
26
  "GPT-2/GPT-J": gpt2_tokens,
27
  "GPT-NeoX": gpt_neox_tokens,
@@ -39,6 +43,7 @@ def tokenize(input_text):
39
 
40
  if __name__ == "__main__":
41
  llama_tokenizer = AutoTokenizer.from_pretrained("TheBloke/Llama-2-7B-fp16")
 
42
  mistral_tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
43
  gpt2_tokenizer = AutoTokenizer.from_pretrained("gpt2")
44
  gpt_neox_tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
@@ -47,5 +52,5 @@ if __name__ == "__main__":
47
  t5_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-xxl")
48
  gemma_tokenizer = AutoTokenizer.from_pretrained("alpindale/gemma-2b")
49
 
50
- iface = gr.Interface(fn=tokenize, inputs=gr.Textbox(lines=8), outputs="text")
51
  iface.launch()
 
6
  llama_tokens = len(
7
  llama_tokenizer(input_text, add_special_tokens=True)["input_ids"]
8
  )
9
+ llama3_tokens = len(
10
+ llama3_tokenizer(input_text, add_special_tokens=True)["input_ids"]
11
+ )
12
  mistral_tokens = len(
13
  mistral_tokenizer(input_text, add_special_tokens=True)["input_ids"]
14
  )
 
25
 
26
  results = {
27
  "LLaMa": llama_tokens,
28
+ "LLaMa-3": llama_tokens,
29
  "Mistral": mistral_tokens,
30
  "GPT-2/GPT-J": gpt2_tokens,
31
  "GPT-NeoX": gpt_neox_tokens,
 
43
 
44
  if __name__ == "__main__":
45
  llama_tokenizer = AutoTokenizer.from_pretrained("TheBloke/Llama-2-7B-fp16")
46
+ llama3_tokenizer = AutoTokenizer.from_pretraines("unsloth/llama-3-8b")
47
  mistral_tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
48
  gpt2_tokenizer = AutoTokenizer.from_pretrained("gpt2")
49
  gpt_neox_tokenizer = AutoTokenizer.from_pretrained("EleutherAI/gpt-neox-20b")
 
52
  t5_tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-xxl")
53
  gemma_tokenizer = AutoTokenizer.from_pretrained("alpindale/gemma-2b")
54
 
55
+ iface = gr.Interface(fn=tokenize, inputs=gr.Textbox(lines=9), outputs="text")
56
  iface.launch()