HeshamHaroon commited on
Commit
5ad6fed
1 Parent(s): 7760bbc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +2 -0
app.py CHANGED
@@ -42,6 +42,8 @@ def compare_tokenizers(tokenizer_name, text):
42
  # AraNizer tokenizers
43
  tokenizer = tokenizers[tokenizer_name]()
44
  tokens = tokenizer.tokenize(text)
 
 
45
  encoded_output = tokenizer.encode(text, add_special_tokens=True)
46
  decoded_text = tokenizer.decode(encoded_output)
47
 
 
42
  # AraNizer tokenizers
43
  tokenizer = tokenizers[tokenizer_name]()
44
  tokens = tokenizer.tokenize(text)
45
+ for token in tokens:
46
+ print(token.encode('utf-8').decode('utf-8'))
47
  encoded_output = tokenizer.encode(text, add_special_tokens=True)
48
  decoded_text = tokenizer.decode(encoded_output)
49