Spaces:
Sleeping
Sleeping
import gradio as gr | |
from transformers import AutoTokenizer | |
def load_tokenizer(repo_path): | |
try: | |
# Load the tokenizer from the provided repository path | |
tokenizer = AutoTokenizer.from_pretrained(repo_path, trust_remote_code=True) | |
messages = [ | |
{"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"}, | |
{"role": "user", "content": "Who are you?"}, | |
] | |
input_ids = tokenizer.apply_chat_template( | |
messages, | |
add_generation_prompt=True, | |
tokenize=False, | |
) | |
# Extract relevant details about the tokenizer and chat template | |
details = { | |
"Tokenizer Name": tokenizer.name_or_path, | |
"Vocabulary Size": tokenizer.vocab_size, | |
"Model Max Length": tokenizer.model_max_length, | |
"Special Tokens": tokenizer.all_special_tokens, | |
"Chat Template": input_ids, | |
} | |
# Convert details to a formatted string for display | |
details_str = "\n".join([f"{key}: {value}" for key, value in details.items()]) | |
return details_str | |
except Exception as e: | |
return str(e) | |
# Create the Gradio interface | |
iface = gr.Interface( | |
fn=load_tokenizer, | |
inputs=gr.Textbox(label="Hugging Face Repository Path (e.g., user/repo)"), | |
outputs=gr.Textbox(label="Tokenizer Details"), | |
title="Hugging Face Tokenizer Loader", | |
description="Enter the Hugging Face repository path to load the tokenizer and view its details." | |
) | |
# Launch the app | |
iface.launch() |