HeshamHaroon commited on
Commit
4c91389
1 Parent(s): 3b0ce68

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -1
app.py CHANGED
@@ -36,6 +36,7 @@ def compare_tokenizers(tokenizer_name, text):
36
  if tokenizer_name in ["FreedomIntelligence/AceGPT-13B", "FreedomIntelligence/AceGPT-7B", "inception-mbzuai/jais-13b"]:
37
  tokenizer = tokenizers[tokenizer_name]()
38
  tokens = tokenizer.tokenize(text)
 
39
  encoded_output = tokenizer.encode(text, add_special_tokens=True, return_tensors="pt")
40
  decoded_text = tokenizer.decode(encoded_output[0], skip_special_tokens=True)
41
  else:
@@ -57,7 +58,7 @@ inputs_component = [
57
  ]
58
 
59
  outputs_component = gr.Dataframe(
60
- headers=["Tokenizer", "Tokens", "Encoded Output", "Decoded Text"],
61
  label="Results",
62
  type="pandas"
63
  )
 
36
  if tokenizer_name in ["FreedomIntelligence/AceGPT-13B", "FreedomIntelligence/AceGPT-7B", "inception-mbzuai/jais-13b"]:
37
  tokenizer = tokenizers[tokenizer_name]()
38
  tokens = tokenizer.tokenize(text)
39
+ tokens_arabic = [token.encode('utf-8').decode('utf-8') for token in tokens]
40
  encoded_output = tokenizer.encode(text, add_special_tokens=True, return_tensors="pt")
41
  decoded_text = tokenizer.decode(encoded_output[0], skip_special_tokens=True)
42
  else:
 
58
  ]
59
 
60
  outputs_component = gr.Dataframe(
61
+ headers=["Tokenizer", "Tokens", "Encoded", "Decoded"],
62
  label="Results",
63
  type="pandas"
64
  )