HeshamHaroon commited on
Commit
31687bc
1 Parent(s): b9f9278

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +3 -3
app.py CHANGED
@@ -40,8 +40,8 @@ def compare_tokenizers(tokenizer_name, text):
40
  tokenizer = tokenizers[tokenizer_name]()
41
  tokens = tokenizer.tokenize(text)
42
  tokens_arabic = [token.encode('utf-8').decode('utf-8') for token in tokens]
43
- encoded_output = tokenizer.encode(text, add_special_tokens=True, return_tensors="pt")
44
- decoded_text = tokenizer.decode(encoded_output[0], skip_special_tokens=True)
45
  else:
46
  # AraNizer tokenizers
47
  tokenizer = tokenizers[tokenizer_name]()
@@ -51,7 +51,7 @@ def compare_tokenizers(tokenizer_name, text):
51
  tokens_arabic = [token.encode('utf-8').decode('utf-8') for token in tokens]
52
 
53
  # Prepare the results to be displayed
54
- results = [(tokenizer_name, tokens_arabic, encoded_output.tolist(), decoded_text)]
55
  return results
56
 
57
  # Define the Gradio interface components with a dropdown for model selection
 
40
  tokenizer = tokenizers[tokenizer_name]()
41
  tokens = tokenizer.tokenize(text)
42
  tokens_arabic = [token.encode('utf-8').decode('utf-8') for token in tokens]
43
+ encoded_output = tokenizer.encode(text, add_special_tokens=True)
44
+ decoded_text = tokenizer.decode(encoded_output, skip_special_tokens=True)
45
  else:
46
  # AraNizer tokenizers
47
  tokenizer = tokenizers[tokenizer_name]()
 
51
  tokens_arabic = [token.encode('utf-8').decode('utf-8') for token in tokens]
52
 
53
  # Prepare the results to be displayed
54
+ results = [(tokenizer_name, tokens_arabic, encoded_output, decoded_text)]
55
  return results
56
 
57
  # Define the Gradio interface components with a dropdown for model selection