File size: 1,592 Bytes
0e62613
ac0ed37
0e62613
ac0ed37
 
 
 
0e62613
ac0ed37
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
import gradio as gr
from transformers import AutoTokenizer

def load_tokenizer(repo_path):
    try:
        # Load the tokenizer from the provided repository path
        tokenizer = AutoTokenizer.from_pretrained(repo_path, trust_remote_code=True)

        messages = [
            {"role": "system", "content": "You are a pirate chatbot who always responds in pirate speak!"},
            {"role": "user", "content": "Who are you?"},
        ]

        input_ids = tokenizer.apply_chat_template(
            messages,
            add_generation_prompt=True,
            tokenize=False,
        )
        
        # Extract relevant details about the tokenizer and chat template
        details = {
            "Tokenizer Name": tokenizer.name_or_path,
            "Vocabulary Size": tokenizer.vocab_size,
            "Model Max Length": tokenizer.model_max_length,
            "Special Tokens": tokenizer.all_special_tokens,
            "Chat Template": input_ids,
        }
        
        # Convert details to a formatted string for display
        details_str = "\n".join([f"{key}: {value}" for key, value in details.items()])
        return details_str
    except Exception as e:
        return str(e)

# Create the Gradio interface
iface = gr.Interface(
    fn=load_tokenizer,
    inputs=gr.Textbox(label="Hugging Face Repository Path (e.g., user/repo)"),
    outputs=gr.Textbox(label="Tokenizer Details"),
    title="Hugging Face Tokenizer Loader",
    description="Enter the Hugging Face repository path to load the tokenizer and view its details."
)

# Launch the app
iface.launch()