import gradio as gr from transformers import AutoTokenizer # Define a function to tokenize text with a selected tokenizer def tokenize_text(text, tokenizer_name): tokenizer = AutoTokenizer.from_pretrained(tokenizer_name) tokenized_text = tokenizer.tokenize(text) input_ids = tokenizer.convert_tokens_to_ids(tokenized_text) return f"Tokenized Text: {tokenized_text}\nInput IDs: {input_ids}" # Define available tokenizers tokenizer_names = [ "riotu-lab/ArabianGPT-01B", "riotu-lab/ArabianGPT-03B", "riotu-lab/ArabianGPT-08B", # Add more tokenizers here ] # Create the Gradio interface iface = gr.Interface( fn=tokenize_text, inputs=[ gr.Textbox(label="Enter Text"), gr.Dropdown(choices=tokenizer_names, label="Select Tokenizer"), ], outputs="text", title="Hugging Face Tokenizer Demo", description="Try different tokenizers and see the tokenized form with input IDs.", ) # Launch the app iface.launch()