HeshamHaroon commited on
Commit
ca4c188
1 Parent(s): 855a35b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +12 -6
app.py CHANGED
@@ -1,14 +1,20 @@
1
  import gradio as gr
2
  from random import random
 
3
 
4
- # Assuming `aranizer` is a library that provides these tokenizer classes or functions (pseudo-code for demonstration only)
5
- # Please implement actual imports and tokenizer initialization logic
6
- def get_tokenizer(tokenizer_choice):
7
- # Placeholder - Replace with actual tokenizer loading logic
8
  tokenizer_map = {
9
- "aranizer_bpe32k": None, # Replace None with actual tokenizer, e.g., aranizer_bpe32k.get_tokenizer()
10
- # Add other tokenizers here
 
 
 
 
 
 
11
  }
 
12
  return tokenizer_map.get(tokenizer_choice, None)
13
 
14
  def tokenize_and_encode_and_embed(text, tokenizer_choice):
 
1
  import gradio as gr
2
  from random import random
3
+ from aranizer import aranizer_bpe32k, aranizer_bpe50k, aranizer_bpe64k, aranizer_bpe86k, aranizer_sp32k, aranizer_sp50k, aranizer_sp64k, aranizer_sp86k
4
 
5
+ def load_tokenizer(tokenizer_choice):
6
+ # Dictionary mapping tokenizer choice to actual tokenizer initializer
 
 
7
  tokenizer_map = {
8
+ "aranizer_bpe32k": aranizer_bpe32k.get_tokenizer(),
9
+ "aranizer_bpe50k": aranizer_bpe50k.get_tokenizer(),
10
+ "aranizer_bpe64k": aranizer_bpe64k.get_tokenizer(),
11
+ "aranizer_bpe86k": aranizer_bpe86k.get_tokenizer(),
12
+ "aranizer_sp32k": aranizer_sp32k.get_tokenizer(),
13
+ "aranizer_sp50k": aranizer_sp50k.get_tokenizer(),
14
+ "aranizer_sp64k": aranizer_sp64k.get_tokenizer(),
15
+ "aranizer_sp86k": aranizer_sp86k.get_tokenizer(),
16
  }
17
+
18
  return tokenizer_map.get(tokenizer_choice, None)
19
 
20
  def tokenize_and_encode_and_embed(text, tokenizer_choice):