Spaces:
Runtime error
Runtime error
import gradio as gr | |
import aranizer | |
# Function to dynamically load tokenizers | |
def load_tokenizers(): | |
tokenizer_methods = [ | |
"aranizer_bpe32k", "aranizer_bpe50k", "aranizer_bpe64k", "aranizer_bpe86k", | |
"aranizer_sp32k", "aranizer_sp50k", "aranizer_sp64k", "aranizer_sp86k" | |
] | |
tokenizers = {} | |
for method in tokenizer_methods: | |
module, function = method.rsplit("_", 1) | |
try: | |
tokenizer = getattr(getattr(aranizer, module), f'get_tokenizer_{function}')() | |
tokenizers[method] = tokenizer | |
except AttributeError as e: | |
print(f"Error loading tokenizer {method}: {e}") | |
return tokenizers | |
# Now, let's load all available tokenizers | |
tokenizers = load_tokenizers() | |
def compare_tokenizers(text): | |
results = [] | |
for name, tokenizer in tokenizers.items(): | |
try: | |
tokens = tokenizer.tokenize(text) | |
encoded_output = tokenizer.encode(text, add_special_tokens=True) | |
decoded_text = tokenizer.decode(encoded_output) | |
results.append((name, tokens, encoded_output, decoded_text)) | |
except Exception as e: | |
results.append((name, f"Error: {e}", "", "")) | |
return results | |
inputs_component = gr.inputs.Textbox(lines=2, placeholder="Enter Arabic text here...", label="Input Text") | |
outputs_component = gr.outputs.Table(label="Results", headers=["Tokenizer", "Tokens", "Encoded Output", "Decoded Text"]) | |
iface = gr.Interface(fn=compare_tokenizers, inputs=inputs_component, outputs=outputs_component, title="Tokenizer Comparison Tool") | |
iface.launch() |