import gradio as gr from transformers import T5TokenizerFast, CLIPTokenizer def count_tokens(text): # Load the common tokenizers t5_tokenizer = T5TokenizerFast.from_pretrained("google/t5-v1_1-xxl", legacy=False) clip_tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-base-patch32") # Get token counts directly using the encode method t5_count = len(t5_tokenizer.encode(text)) clip_count = len(clip_tokenizer.encode(text)) return f"T5: {t5_count} tokens", f"CLIP: {clip_count} tokens" # Create a Gradio interface iface = gr.Interface( fn=count_tokens, inputs=[ gr.Textbox(label="Text", placeholder="Enter text here...") ], outputs=[ gr.Textbox(label="T5 Tokenizer"), gr.Textbox(label="CLIP Tokenizer") ], title="Common Diffusion Model Token Counter", description="Enter text to count tokens using T5 and CLIP tokenizers, commonly used in diffusion models." ) # Launch the app iface.launch()