helenai commited on
Commit
2acbb98
1 Parent(s): 257df9a

Update tokenizer list

Browse files
Files changed (1) hide show
  1. app.py +22 -16
app.py CHANGED
@@ -9,21 +9,26 @@ from datasets import load_dataset
9
  from PIL import Image
10
  from transformers import AutoTokenizer
11
 
12
- tokenizers = [
13
- "google/gemma-7b",
14
- "meta-llama/Llama-2-7b",
15
- "mistralai/Mistral-7B-v0.1",
16
- "facebook/opt-2.7b",
17
- "microsoft/phi-2",
18
- "THUDM/chatglm3-6b",
19
- "Qwen/Qwen1.5-7B-Chat",
20
- "bigscience/bloom-560m",
21
- "ise-uiuc/Magicoder-S-DS-6.7B",
22
- "google/flan-t5-base",
23
- "TinyLlama/TinyLlama-1.1B-Chat-v1.0",
24
- "google-bert/bert-base-uncased"
25
- ]
 
 
 
 
26
 
 
27
 
28
  def plot_histogram(data):
29
  plt.hist(data)
@@ -86,11 +91,12 @@ demo = gr.Interface(
86
  examples=[
87
  ["mistralai/Mistral-7B-v0.1", "gsarti/flores_101", "eng", "dev", "sentence"],
88
  ["mistralai/Mistral-7B-v0.1", "Muennighoff/flores200", "eng_Latn", "dev", "sentence"],
89
- ["mistralai/Mistral-7B-v0.1", "wikitext", "wikitext-2-v1", "validation", "text"],
90
  ["mistralai/Mistral-7B-v0.1", "hails/mmlu_no_train", "elementary_mathematics", "test", "question"],
91
- ["mistralai/Mistral-7B-v0.1", "imdb", "", "test", "text"],
92
  ["mistralai/Mistral-7B-v0.1", "gsm8k", "main", "test", "question"],
93
  ["mistralai/Mistral-7B-v0.1", "locuslab/TOFU", "world_facts", "train", "question"],
 
 
 
94
  ],
95
  cache_examples=True
96
  )
 
9
  from PIL import Image
10
  from transformers import AutoTokenizer
11
 
12
+ tokenizers = {
13
+ "bert": "google-bert/bert-base-uncased",
14
+ "bloom": "bigscience/bloom-560m",
15
+ "gemma": "fxmarty/tiny-random-GemmaForCausalLM",
16
+ "chatglm3": "THUDM/chatglm3-6b",
17
+ "falcon": "tiiuae/falcon-7b",
18
+ "gpt-neox": "EleutherAI/gpt-neox-20b",
19
+ "llama": "TinyLlama/TinyLlama-1.1B-Chat-v0.6",
20
+ "magicoder": "ise-uiuc/Magicoder-S-DS-6.7B",
21
+ "mistral": "mistralai/Mistral-7B-v0.1",
22
+ "opt": "facebook/opt-2.7b",
23
+ "phi-2": "microsoft/phi-2",
24
+ "pythia": "EleutherAI/pythia-1.4b-deduped",
25
+ "roberta": "FacebookAI/roberta-base",
26
+ "qwen": "Qwen/Qwen1.5-7B-Chat",
27
+ "starcoder": "bigcode/starcoder2-7b",
28
+ "t5": "google-t5/t5-base",
29
+ }
30
 
31
+ tokenizers = list(tokenizers.values())
32
 
33
  def plot_histogram(data):
34
  plt.hist(data)
 
91
  examples=[
92
  ["mistralai/Mistral-7B-v0.1", "gsarti/flores_101", "eng", "dev", "sentence"],
93
  ["mistralai/Mistral-7B-v0.1", "Muennighoff/flores200", "eng_Latn", "dev", "sentence"],
 
94
  ["mistralai/Mistral-7B-v0.1", "hails/mmlu_no_train", "elementary_mathematics", "test", "question"],
 
95
  ["mistralai/Mistral-7B-v0.1", "gsm8k", "main", "test", "question"],
96
  ["mistralai/Mistral-7B-v0.1", "locuslab/TOFU", "world_facts", "train", "question"],
97
+ ["mistralai/Mistral-7B-v0.1", "imdb", "", "test", "text"],
98
+ ["mistralai/Mistral-7B-v0.1", "wikitext", "wikitext-2-v1", "validation", "text"],
99
+ ["mistralai/Mistral-7B-v0.1", "zeroshot/twitter-financial-news-sentiment", "", "validation", "text"],
100
  ],
101
  cache_examples=True
102
  )