import gradio as gr from transformers import AutoTokenizer # Define a function to tokenize text with a selected tokenizer def tokenize_text(text, tokenizer_name): tokenizer = AutoTokenizer.from_pretrained(tokenizer_name) tokenized_text = tokenizer.tokenize(text) input_ids = tokenizer.convert_tokens_to_ids(tokenized_text) decoded_text = tokenizer.decode(input_ids) # Decode the input IDs return f"Tokenized Text: {tokenized_text}\nInput IDs: {input_ids}\nDecoded Text: {decoded_text}" # Define available tokenizers tokenizer_names = [ "riotu-lab/ArabianGPT-01B", "riotu-lab/ArabianGPT-03B", "riotu-lab/ArabianGPT-08B", "FreedomIntelligence/AceGPT-13B", "FreedomIntelligence/AceGPT-7B", "inception-mbzuai/jais-13b", "aubmindlab/aragpt2-base", "aubmindlab/aragpt2-medium", "aubmindlab/aragpt2-large", "aubmindlab/aragpt2-mega" ] # Create the Gradio interface iface = gr.Interface( fn=tokenize_text, inputs=[ gr.Textbox(label="Enter Text"), gr.Dropdown(choices=tokenizer_names, label="Select Tokenizer"), ], outputs="text", title="Hugging Face Tokenizer Demo", description="Try different tokenizers and see the tokenized form with input IDs.", ) # Launch the app iface.launch()