HeshamHaroon commited on
Commit
7db01f9
1 Parent(s): 8e747fd

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -8
app.py CHANGED
@@ -1,14 +1,16 @@
1
- from gradio.inputs import Textbox
2
- from gradio.outputs import Textbox, Table
3
- from gradio import Interface
4
  import aranizer
5
-
6
- # Load your tokenizers
7
  tokenizers = {
 
8
  "aranizer_bpe50k": aranizer.aranizer_bpe50k.get_tokenizer(),
9
  "aranizer_bpe64k": aranizer.aranizer_bpe64k.get_tokenizer(),
 
10
  "aranizer_sp32k": aranizer.aranizer_sp32k.get_tokenizer(),
11
- # Add more tokenizers as needed
 
 
12
  }
13
 
14
  def compare_tokenizers(text):
@@ -20,8 +22,8 @@ def compare_tokenizers(text):
20
  results.append((name, tokens, encoded_output, decoded_text))
21
  return results
22
 
23
- inputs = Textbox(label="Enter Arabic text")
24
- outputs = Table(label="Results", columns=["Tokenizer", "Tokens", "Encoded Output", "Decoded Text"])
25
 
26
  iface = Interface(fn=compare_tokenizers, inputs=inputs, outputs=outputs)
27
 
 
1
+ from gradio import inputs, outputs, Interface
 
 
2
  import aranizer
3
+ from aranizer import aranizer_bpe32k, aranizer_bpe50k, aranizer_bpe64k, aranizer_bpe86k, aranizer_sp32k,aranizer_sp50k,aranizer_sp64k, aranizer_sp86k
4
+ # Load all available tokenizers
5
  tokenizers = {
6
+ "aranizer_bpe32k": aranizer.aranizer_bpe32k.get_tokenizer(),
7
  "aranizer_bpe50k": aranizer.aranizer_bpe50k.get_tokenizer(),
8
  "aranizer_bpe64k": aranizer.aranizer_bpe64k.get_tokenizer(),
9
+ "aranizer_bpe86k": aranizer.aranizer_bpe86k.get_tokenizer(),
10
  "aranizer_sp32k": aranizer.aranizer_sp32k.get_tokenizer(),
11
+ "aranizer_sp50k": aranizer.aranizer_sp50k.get_tokenizer(),
12
+ "aranizer_sp64k": aranizer.aranizer_sp64k.get_tokenizer(),
13
+ "aranizer_sp86k": aranizer.aranizer_sp86k.get_tokenizer(),
14
  }
15
 
16
  def compare_tokenizers(text):
 
22
  results.append((name, tokens, encoded_output, decoded_text))
23
  return results
24
 
25
+ inputs = inputs.Textbox(label="Enter Arabic text")
26
+ outputs = outputs.Table(label="Results", columns=["Tokenizer", "Tokens", "Encoded Output", "Decoded Text"])
27
 
28
  iface = Interface(fn=compare_tokenizers, inputs=inputs, outputs=outputs)
29