Spaces:

thesven
/

Show-Me-Your-Chat-Template

Sleeping

Update app.py

ac0ed37 about 1 year ago

1.59 kB

	import gradio as gr
	from transformers import AutoTokenizer

	def load_tokenizer(repo_path):
	try:
	# Load the tokenizer from the provided repository path
	tokenizer = AutoTokenizer.from_pretrained(repo_path, trust_remote_code=True)

	messages = [
	{"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
	{"role": "user", "content": "Who are you?"},
	]

	input_ids = tokenizer.apply_chat_template(
	messages,
	add_generation_prompt=True,
	tokenize=False,
	)

	# Extract relevant details about the tokenizer and chat template
	details = {
	"Tokenizer Name": tokenizer.name_or_path,
	"Vocabulary Size": tokenizer.vocab_size,
	"Model Max Length": tokenizer.model_max_length,
	"Special Tokens": tokenizer.all_special_tokens,
	"Chat Template": input_ids,
	}

	# Convert details to a formatted string for display
	details_str = "\n".join([f"{key}: {value}" for key, value in details.items()])
	return details_str
	except Exception as e:
	return str(e)

	# Create the Gradio interface
	iface = gr.Interface(
	fn=load_tokenizer,
	inputs=gr.Textbox(label="Hugging Face Repository Path (e.g., user/repo)"),
	outputs=gr.Textbox(label="Tokenizer Details"),
	title="Hugging Face Tokenizer Loader",
	description="Enter the Hugging Face repository path to load the tokenizer and view its details."
	)

	# Launch the app
	iface.launch()