HeshamHaroon commited on
Commit
176f915
1 Parent(s): c296fab

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +23 -4
app.py CHANGED
@@ -2,14 +2,29 @@ from gradio import Interface
2
  import gradio as gr
3
  import aranizer
4
  from aranizer import aranizer_bpe50k, aranizer_bpe64k, aranizer_bpe86k, aranizer_sp32k, aranizer_sp50k, aranizer_sp64k, aranizer_sp86k
5
- from transformers import AutoTokenizer
 
 
 
 
 
 
 
 
6
 
7
  # Load additional tokenizers from transformers
8
  gpt_13b_tokenizer = AutoTokenizer.from_pretrained("FreedomIntelligence/AceGPT-13B")
9
  gpt_7b_tokenizer = AutoTokenizer.from_pretrained("FreedomIntelligence/AceGPT-7B")
10
  jais_13b_tokenizer = AutoTokenizer.from_pretrained("inception-mbzuai/jais-13b")
11
  arabert_tokenizer = AutoTokenizer.from_pretrained("aubmindlab/bert-base-arabertv2")
12
- meta_llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B")
 
 
 
 
 
 
 
13
  cohere_command_r_v01_tokenizer = AutoTokenizer.from_pretrained("CohereForAI/c4ai-command-r-v01")
14
  cohere_command_r_plus_tokenizer = AutoTokenizer.from_pretrained("CohereForAI/c4ai-command-r-plus")
15
 
@@ -21,11 +36,13 @@ tokenizer_options = [
21
  "FreedomIntelligence/AceGPT-7B",
22
  "inception-mbzuai/jais-13b",
23
  "aubmindlab/bert-base-arabertv2",
24
- "meta-llama/Meta-Llama-3-8B",
25
  "CohereForAI/c4ai-command-r-v01",
26
  "CohereForAI/c4ai-command-r-plus"
27
  ]
28
 
 
 
 
29
  tokenizers = {
30
  "aranizer_bpe50k": aranizer_bpe50k.get_tokenizer,
31
  "aranizer_bpe64k": aranizer_bpe64k.get_tokenizer,
@@ -38,11 +55,13 @@ tokenizers = {
38
  "FreedomIntelligence/AceGPT-7B": lambda: gpt_7b_tokenizer,
39
  "inception-mbzuai/jais-13b": lambda: jais_13b_tokenizer,
40
  "aubmindlab/bert-base-arabertv2": lambda: arabert_tokenizer,
41
- "meta-llama/Meta-Llama-3-8B": lambda: meta_llama_tokenizer,
42
  "CohereForAI/c4ai-command-r-v01": lambda: cohere_command_r_v01_tokenizer,
43
  "CohereForAI/c4ai-command-r-plus": lambda: cohere_command_r_plus_tokenizer
44
  }
45
 
 
 
 
46
  def compare_tokenizers(tokenizer_name, text):
47
  # Handle the transformer tokenizers separately due to API differences
48
  if tokenizer_name in [
 
2
  import gradio as gr
3
  import aranizer
4
  from aranizer import aranizer_bpe50k, aranizer_bpe64k, aranizer_bpe86k, aranizer_sp32k, aranizer_sp50k, aranizer_sp64k, aranizer_sp86k
5
+ from transformers import AutoTokenizer, HfFolder, logging
6
+ import os
7
+
8
+ # Retrieve your Hugging Face token from the environment variable
9
+ HF_TOKEN = os.getenv('HF_TOKEN')
10
+
11
+ if HF_TOKEN:
12
+ HfFolder.save_token(HF_TOKEN)
13
+ os.environ["HUGGINGFACEHUB_API_TOKEN"] = HF_TOKEN
14
 
15
  # Load additional tokenizers from transformers
16
  gpt_13b_tokenizer = AutoTokenizer.from_pretrained("FreedomIntelligence/AceGPT-13B")
17
  gpt_7b_tokenizer = AutoTokenizer.from_pretrained("FreedomIntelligence/AceGPT-7B")
18
  jais_13b_tokenizer = AutoTokenizer.from_pretrained("inception-mbzuai/jais-13b")
19
  arabert_tokenizer = AutoTokenizer.from_pretrained("aubmindlab/bert-base-arabertv2")
20
+
21
+ # Try to load the gated tokenizer
22
+ try:
23
+ meta_llama_tokenizer = AutoTokenizer.from_pretrained("meta-llama/Meta-Llama-3-8B")
24
+ except Exception as e:
25
+ meta_llama_tokenizer = None
26
+ logging.warning(f"Could not load meta-llama/Meta-Llama-3-8B tokenizer: {e}")
27
+
28
  cohere_command_r_v01_tokenizer = AutoTokenizer.from_pretrained("CohereForAI/c4ai-command-r-v01")
29
  cohere_command_r_plus_tokenizer = AutoTokenizer.from_pretrained("CohereForAI/c4ai-command-r-plus")
30
 
 
36
  "FreedomIntelligence/AceGPT-7B",
37
  "inception-mbzuai/jais-13b",
38
  "aubmindlab/bert-base-arabertv2",
 
39
  "CohereForAI/c4ai-command-r-v01",
40
  "CohereForAI/c4ai-command-r-plus"
41
  ]
42
 
43
+ if meta_llama_tokenizer:
44
+ tokenizer_options.append("meta-llama/Meta-Llama-3-8B")
45
+
46
  tokenizers = {
47
  "aranizer_bpe50k": aranizer_bpe50k.get_tokenizer,
48
  "aranizer_bpe64k": aranizer_bpe64k.get_tokenizer,
 
55
  "FreedomIntelligence/AceGPT-7B": lambda: gpt_7b_tokenizer,
56
  "inception-mbzuai/jais-13b": lambda: jais_13b_tokenizer,
57
  "aubmindlab/bert-base-arabertv2": lambda: arabert_tokenizer,
 
58
  "CohereForAI/c4ai-command-r-v01": lambda: cohere_command_r_v01_tokenizer,
59
  "CohereForAI/c4ai-command-r-plus": lambda: cohere_command_r_plus_tokenizer
60
  }
61
 
62
+ if meta_llama_tokenizer:
63
+ tokenizers["meta-llama/Meta-Llama-3-8B"] = lambda: meta_llama_tokenizer
64
+
65
  def compare_tokenizers(tokenizer_name, text):
66
  # Handle the transformer tokenizers separately due to API differences
67
  if tokenizer_name in [