Spaces:

ProCreations
/

smollm3

Running on Zero

File size: 15,442 Bytes

da40d52

import gradio as gr
import spaces
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM

# Load model and tokenizer
model_name = "HuggingFaceTB/SmolLM3-3B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")

@spaces.GPU
def chat_with_smollm3(message, history, system_prompt="", enable_thinking=True, temperature=0.6, top_p=0.95, max_tokens=32768):
    """
    Chat with SmolLM3-3B model with full feature support
    """
    # Prepare messages
    messages = []
    
    # Add system prompt if provided
    if system_prompt.strip():
        # Handle thinking mode flags in system prompt
        if enable_thinking and "/no_think" not in system_prompt:
            if "/think" not in system_prompt:
                system_prompt += "/think"
        elif not enable_thinking and "/think" not in system_prompt:
            if "/no_think" not in system_prompt:
                system_prompt += "/no_think"
        messages.append({"role": "system", "content": system_prompt})
    else:
        # Use enable_thinking parameter if no system prompt
        if not enable_thinking:
            messages.append({"role": "system", "content": "/no_think"})
    
    # Add conversation history
    for human_msg, assistant_msg in history:
        messages.append({"role": "user", "content": human_msg})
        if assistant_msg:
            messages.append({"role": "assistant", "content": assistant_msg})
    
    # Add current message
    messages.append({"role": "user", "content": message})
    
    # Apply chat template
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
        enable_thinking=enable_thinking if not system_prompt.strip() else None
    )
    
    # Tokenize input
    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
    
    # Generate response
    with torch.no_grad():
        generated_ids = model.generate(
            **model_inputs,
            max_new_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id
        )
    
    # Decode response
    output_ids = generated_ids[0][len(model_inputs.input_ids[0]):]
    response = tokenizer.decode(output_ids, skip_special_tokens=True)
    
    return response

@spaces.GPU
def chat_with_tools(message, history, tools_json="", system_prompt="", enable_thinking=False, temperature=0.6, top_p=0.95, max_tokens=32768):
    """
    Chat with SmolLM3-3B using tool calling capabilities
    """
    # Parse tools if provided
    tools = []
    if tools_json.strip():
        try:
            import json
            tools = json.loads(tools_json)
        except:
            return "Error: Invalid JSON format for tools"
    
    # Prepare messages
    messages = []
    
    # Add system prompt if provided
    if system_prompt.strip():
        messages.append({"role": "system", "content": system_prompt})
    
    # Add conversation history
    for human_msg, assistant_msg in history:
        messages.append({"role": "user", "content": human_msg})
        if assistant_msg:
            messages.append({"role": "assistant", "content": assistant_msg})
    
    # Add current message
    messages.append({"role": "user", "content": message})
    
    # Apply chat template with tools
    text = tokenizer.apply_chat_template(
        messages,
        tokenize=False,
        add_generation_prompt=True,
        enable_thinking=enable_thinking,
        xml_tools=tools if tools else None
    )
    
    # Tokenize input
    model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
    
    # Generate response
    with torch.no_grad():
        generated_ids = model.generate(
            **model_inputs,
            max_new_tokens=max_tokens,
            temperature=temperature,
            top_p=top_p,
            do_sample=True,
            pad_token_id=tokenizer.eos_token_id
        )
    
    # Decode response
    output_ids = generated_ids[0][len(model_inputs.input_ids[0]):]
    response = tokenizer.decode(output_ids, skip_special_tokens=True)
    
    return response

# Example tools for demonstration
example_tools = """[
    {
        "name": "get_weather",
        "description": "Get the weather in a city",
        "parameters": {
            "type": "object",
            "properties": {
                "city": {
                    "type": "string",
                    "description": "The city to get the weather for"
                }
            }
        }
    },
    {
        "name": "calculate",
        "description": "Perform basic mathematical calculations",
        "parameters": {
            "type": "object",
            "properties": {
                "expression": {
                    "type": "string",
                    "description": "Mathematical expression to evaluate"
                }
            }
        }
    }
]"""

# Create Gradio interface with dark theme and mobile support
with gr.Blocks(
    title="SmolLM3-3B Chat", 
    theme=gr.themes.Base().set(
        background_fill_primary="#1a1a1a",
        background_fill_secondary="#2d2d2d",
        border_color_primary="#404040",
        button_primary_background_fill="#4a9eff",
        button_primary_background_fill_hover="#5aa3ff",
        button_primary_text_color="#ffffff",
        block_background_fill="#2d2d2d",
        block_border_color="#404040",
        input_background_fill="#3a3a3a",
        input_border_color="#404040",
        slider_color="#4a9eff",
    ),
    css="""
    /* Mobile-first responsive design */
    @media (max-width: 768px) {
        .gradio-container {
            padding: 8px !important;
        }
        .gr-row {
            flex-direction: column !important;
        }
        .gr-column {
            width: 100% !important;
            min-width: 0 !important;
        }
        .gr-tabs {
            font-size: 14px !important;
        }
        .gr-button {
            width: 100% !important;
            margin: 2px 0 !important;
        }
        .gr-textbox {
            font-size: 16px !important;
        }
        .gr-chatbot {
            height: 400px !important;
        }
        .gr-markdown {
            font-size: 14px !important;
        }
        .gr-slider {
            width: 100% !important;
        }
        .settings-panel {
            margin-top: 20px !important;
        }
    }
    
    /* Settings panel styling */
    .settings-panel {
        background-color: #2d2d2d !important;
        border: 1px solid #404040 !important;
        border-radius: 8px !important;
        padding: 16px !important;
        margin-top: 12px !important;
    }
    
    .settings-button {
        background-color: #3a3a3a !important;
        border: 1px solid #404040 !important;
        color: #ffffff !important;
        padding: 8px 16px !important;
        border-radius: 6px !important;
        cursor: pointer !important;
        font-size: 14px !important;
        margin-bottom: 8px !important;
    }
    
    .settings-button:hover {
        background-color: #4a4a4a !important;
    }
    
    /* Dark mode improvements */
    .gr-chatbot {
        background-color: #2d2d2d !important;
    }
    
    .gr-chatbot .message {
        background-color: #3a3a3a !important;
        border: 1px solid #404040 !important;
        border-radius: 8px !important;
        margin: 4px 0 !important;
        padding: 8px !important;
    }
    
    .gr-chatbot .message.user {
        background-color: #4a9eff !important;
        color: white !important;
    }
    
    .gr-chatbot .message.bot {
        background-color: #3a3a3a !important;
        color: #ffffff !important;
    }
    
    /* Better mobile touch targets */
    @media (max-width: 768px) {
        .gr-button {
            min-height: 44px !important;
            padding: 12px !important;
        }
        .gr-slider input {
            min-height: 44px !important;
        }
        .gr-checkbox {
            min-height: 44px !important;
        }
    }
    
    /* Improve readability */
    .gr-markdown h1, .gr-markdown h2, .gr-markdown h3 {
        color: #ffffff !important;
    }
    
    .gr-markdown p, .gr-markdown li {
        color: #e0e0e0 !important;
    }
    
    /* Tab styling */
    .gr-tabs .gr-tab {
        background-color: #3a3a3a !important;
        color: #ffffff !important;
        border-color: #404040 !important;
    }
    
    .gr-tabs .gr-tab.selected {
        background-color: #4a9eff !important;
        color: #ffffff !important;
    }
    """
) as demo:
    gr.Markdown("# 🤖 SmolLM3-3B Chat Interface")
    gr.Markdown("Chat with SmolLM3-3B, a 3B parameter model with advanced reasoning, long context support, and tool calling capabilities.")
    
    with gr.Tabs():
        with gr.TabItem("💬 Standard Chat"):
            chatbot = gr.Chatbot(height=500, label="Chat with SmolLM3-3B")
            msg = gr.Textbox(label="Your message", placeholder="Type your message here...")
            
            with gr.Row():
                submit = gr.Button("Send", variant="primary")
                clear = gr.Button("Clear")
                settings_btn = gr.Button("⚙️ Settings", size="sm")
            
            with gr.Column(visible=False, elem_classes="settings-panel") as settings_panel:
                gr.Markdown("### ⚙️ Advanced Settings")
                system_prompt = gr.Textbox(
                    label="System Prompt",
                    placeholder="Enter system instructions (optional)",
                    lines=3,
                    value="You are an AI assistant trained by HuggingFace. You are helpful, harmless, and honest."
                )
                enable_thinking = gr.Checkbox(
                    label="Enable Extended Thinking",
                    value=True,
                    info="Enable reasoning traces for better responses"
                )
                temperature = gr.Slider(
                    minimum=0.0,
                    maximum=2.0,
                    value=0.6,
                    step=0.1,
                    label="Temperature"
                )
                top_p = gr.Slider(
                    minimum=0.0,
                    maximum=1.0,
                    value=0.95,
                    step=0.05,
                    label="Top-p"
                )
                max_tokens = gr.Slider(
                    minimum=1,
                    maximum=32768,
                    value=32768,
                    step=1,
                    label="Max Tokens"
                )
            
            def respond(message, history, sys_prompt, thinking, temp, top_p_val, max_tok):
                response = chat_with_smollm3(message, history, sys_prompt, thinking, temp, top_p_val, max_tok)
                history.append((message, response))
                return "", history
            
            def toggle_settings():
                return gr.update(visible=not settings_panel.visible)
            
            submit.click(respond, [msg, chatbot, system_prompt, enable_thinking, temperature, top_p, max_tokens], [msg, chatbot])
            msg.submit(respond, [msg, chatbot, system_prompt, enable_thinking, temperature, top_p, max_tokens], [msg, chatbot])
            clear.click(lambda: ([], ""), outputs=[chatbot, msg])
            settings_btn.click(toggle_settings, outputs=[settings_panel])
        
        with gr.TabItem("🛠️ Tool Calling"):
            tool_chatbot = gr.Chatbot(height=500, label="Chat with Tools")
            tool_msg = gr.Textbox(label="Your message", placeholder="Ask me to use tools...")
            
            with gr.Row():
                tool_submit = gr.Button("Send", variant="primary")
                tool_clear = gr.Button("Clear")
                tool_settings_btn = gr.Button("⚙️ Settings", size="sm")
            
            with gr.Column(visible=False, elem_classes="settings-panel") as tool_settings_panel:
                gr.Markdown("### 🛠️ Tool Settings")
                tools_json = gr.Textbox(
                    label="Tools JSON",
                    placeholder="Enter tools as JSON array",
                    lines=10,
                    value=example_tools
                )
                tool_system_prompt = gr.Textbox(
                    label="System Prompt",
                    placeholder="Enter system instructions (optional)",
                    lines=2,
                    value="You are an AI assistant trained by HuggingFace. You are helpful, harmless, and honest."
                )
                tool_thinking = gr.Checkbox(
                    label="Enable Extended Thinking",
                    value=False,
                    info="Enable reasoning traces for tool usage"
                )
                tool_temperature = gr.Slider(
                    minimum=0.0,
                    maximum=2.0,
                    value=0.6,
                    step=0.1,
                    label="Temperature"
                )
                tool_top_p = gr.Slider(
                    minimum=0.0,
                    maximum=1.0,
                    value=0.95,
                    step=0.05,
                    label="Top-p"
                )
                tool_max_tokens = gr.Slider(
                    minimum=1,
                    maximum=32768,
                    value=32768,
                    step=1,
                    label="Max Tokens"
                )
            
            def tool_respond(message, history, tools, sys_prompt, thinking, temp, top_p_val, max_tok):
                response = chat_with_tools(message, history, tools, sys_prompt, thinking, temp, top_p_val, max_tok)
                history.append((message, response))
                return "", history
            
            def toggle_tool_settings():
                return gr.update(visible=not tool_settings_panel.visible)
            
            tool_submit.click(tool_respond, [tool_msg, tool_chatbot, tools_json, tool_system_prompt, tool_thinking, tool_temperature, tool_top_p, tool_max_tokens], [tool_msg, tool_chatbot])
            tool_msg.submit(tool_respond, [tool_msg, tool_chatbot, tools_json, tool_system_prompt, tool_thinking, tool_temperature, tool_top_p, tool_max_tokens], [tool_msg, tool_chatbot])
            tool_clear.click(lambda: ([], ""), outputs=[tool_chatbot, tool_msg])
            tool_settings_btn.click(toggle_tool_settings, outputs=[tool_settings_panel])
    
    gr.Markdown("""
    ### 📚 Model Information
    - **Model**: HuggingFaceTB/SmolLM3-3B
    - **Features**: Advanced reasoning, long context (up to 128k tokens), multilingual support
    - **Languages**: English, French, Spanish, German, Italian, Portuguese (+ Arabic, Chinese, Russian)
    - **Extended Thinking**: Provides reasoning traces for better responses
    - **Tool Calling**: Supports XML-based tool calling for agentic workflows
    
    ### 💡 Usage Tips
    - Use Extended Thinking for complex reasoning tasks
    - Adjust temperature (0.6 recommended) for response creativity
    - Try different system prompts for specialized behaviors
    - Use tool calling for function-based interactions
    """)

if __name__ == "__main__":
    demo.launch()