smollm3 / app.py
ProCreations's picture
Update app.py
b16f80c verified
import gradio as gr
import spaces
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
# Load model and tokenizer
model_name = "HuggingFaceTB/SmolLM3-3B"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto")
@spaces.GPU
def chat_with_smollm3(message, history, system_prompt="", enable_thinking=True, temperature=0.6, top_p=0.95, max_tokens=32768):
"""
Chat with SmolLM3-3B model with full feature support
"""
# Prepare messages
messages = []
# Add system prompt if provided
if system_prompt.strip():
# Handle thinking mode flags in system prompt
if enable_thinking and "/no_think" not in system_prompt:
if "/think" not in system_prompt:
system_prompt += "/think"
elif not enable_thinking and "/think" not in system_prompt:
if "/no_think" not in system_prompt:
system_prompt += "/no_think"
messages.append({"role": "system", "content": system_prompt})
else:
# Use enable_thinking parameter if no system prompt
if not enable_thinking:
messages.append({"role": "system", "content": "/no_think"})
# Add conversation history
for human_msg, assistant_msg in history:
messages.append({"role": "user", "content": human_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
# Add current message
messages.append({"role": "user", "content": message})
# Apply chat template
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
enable_thinking=enable_thinking if not system_prompt.strip() else None
)
# Tokenize input
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
# Generate response
with torch.no_grad():
generated_ids = model.generate(
**model_inputs,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
# Decode response
output_ids = generated_ids[0][len(model_inputs.input_ids[0]):]
response = tokenizer.decode(output_ids, skip_special_tokens=True)
return response
@spaces.GPU
def chat_with_tools(message, history, tools_json="", system_prompt="", enable_thinking=False, temperature=0.6, top_p=0.95, max_tokens=32768):
"""
Chat with SmolLM3-3B using tool calling capabilities
"""
# Parse tools if provided
tools = []
if tools_json.strip():
try:
import json
tools = json.loads(tools_json)
except:
return "Error: Invalid JSON format for tools"
# Prepare messages
messages = []
# Add system prompt if provided
if system_prompt.strip():
messages.append({"role": "system", "content": system_prompt})
# Add conversation history
for human_msg, assistant_msg in history:
messages.append({"role": "user", "content": human_msg})
if assistant_msg:
messages.append({"role": "assistant", "content": assistant_msg})
# Add current message
messages.append({"role": "user", "content": message})
# Apply chat template with tools
text = tokenizer.apply_chat_template(
messages,
tokenize=False,
add_generation_prompt=True,
enable_thinking=enable_thinking,
xml_tools=tools if tools else None
)
# Tokenize input
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
# Generate response
with torch.no_grad():
generated_ids = model.generate(
**model_inputs,
max_new_tokens=max_tokens,
temperature=temperature,
top_p=top_p,
do_sample=True,
pad_token_id=tokenizer.eos_token_id
)
# Decode response
output_ids = generated_ids[0][len(model_inputs.input_ids[0]):]
response = tokenizer.decode(output_ids, skip_special_tokens=True)
return response
# Example tools for demonstration
example_tools = """[
{
"name": "get_weather",
"description": "Get the weather in a city",
"parameters": {
"type": "object",
"properties": {
"city": {
"type": "string",
"description": "The city to get the weather for"
}
}
}
},
{
"name": "calculate",
"description": "Perform basic mathematical calculations",
"parameters": {
"type": "object",
"properties": {
"expression": {
"type": "string",
"description": "Mathematical expression to evaluate"
}
}
}
}
]"""
# Create Gradio interface with dark theme and mobile support
with gr.Blocks(
title="SmolLM3-3B Chat",
theme=gr.themes.Base().set(
background_fill_primary="#1a1a1a",
background_fill_secondary="#2d2d2d",
border_color_primary="#404040",
button_primary_background_fill="#4a9eff",
button_primary_background_fill_hover="#5aa3ff",
button_primary_text_color="#ffffff",
block_background_fill="#2d2d2d",
block_border_color="#404040",
input_background_fill="#3a3a3a",
input_border_color="#404040",
slider_color="#4a9eff",
),
css="""
/* Mobile-first responsive design */
@media (max-width: 768px) {
.gradio-container {
padding: 8px !important;
}
.gr-row {
flex-direction: column !important;
}
.gr-column {
width: 100% !important;
min-width: 0 !important;
}
.gr-tabs {
font-size: 14px !important;
}
.gr-button {
width: 100% !important;
margin: 2px 0 !important;
}
.gr-textbox {
font-size: 16px !important;
}
.gr-chatbot {
height: 400px !important;
}
.gr-markdown {
font-size: 14px !important;
}
.gr-slider {
width: 100% !important;
}
.settings-panel {
margin-top: 20px !important;
}
}
/* Settings panel styling */
.settings-panel {
background-color: #2d2d2d !important;
border: 1px solid #404040 !important;
border-radius: 8px !important;
padding: 16px !important;
margin-top: 12px !important;
}
.settings-button {
background-color: #3a3a3a !important;
border: 1px solid #404040 !important;
color: #ffffff !important;
padding: 8px 16px !important;
border-radius: 6px !important;
cursor: pointer !important;
font-size: 14px !important;
margin-bottom: 8px !important;
}
.settings-button:hover {
background-color: #4a4a4a !important;
}
/* Dark mode improvements */
.gr-chatbot {
background-color: #2d2d2d !important;
}
.gr-chatbot .message {
background-color: #3a3a3a !important;
border: 1px solid #404040 !important;
border-radius: 8px !important;
margin: 4px 0 !important;
padding: 8px !important;
}
.gr-chatbot .message.user {
background-color: #4a9eff !important;
color: white !important;
}
.gr-chatbot .message.bot {
background-color: #3a3a3a !important;
color: #ffffff !important;
}
/* Better mobile touch targets */
@media (max-width: 768px) {
.gr-button {
min-height: 44px !important;
padding: 12px !important;
}
.gr-slider input {
min-height: 44px !important;
}
.gr-checkbox {
min-height: 44px !important;
}
}
/* Improve readability */
.gr-markdown h1, .gr-markdown h2, .gr-markdown h3 {
color: #ffffff !important;
}
.gr-markdown p, .gr-markdown li {
color: #e0e0e0 !important;
}
/* Tab styling */
.gr-tabs .gr-tab {
background-color: #3a3a3a !important;
color: #ffffff !important;
border-color: #404040 !important;
}
.gr-tabs .gr-tab.selected {
background-color: #4a9eff !important;
color: #ffffff !important;
}
"""
) as demo:
gr.Markdown("# πŸ€– SmolLM3-3B Chat Interface")
gr.Markdown("Chat with SmolLM3-3B, a 3B parameter model with advanced reasoning, long context support, and tool calling capabilities.")
with gr.Tabs():
with gr.TabItem("πŸ’¬ Standard Chat"):
chatbot = gr.Chatbot(height=500, label="Chat with SmolLM3-3B")
msg = gr.Textbox(label="Your message", placeholder="Type your message here...")
with gr.Row():
submit = gr.Button("Send", variant="primary")
clear = gr.Button("Clear")
settings_btn = gr.Button("βš™οΈ Settings", size="sm")
with gr.Column(visible=False, elem_classes="settings-panel") as settings_panel:
gr.Markdown("### βš™οΈ Advanced Settings")
system_prompt = gr.Textbox(
label="System Prompt",
placeholder="Enter system instructions (optional)",
lines=3,
value="You are an AI assistant trained by HuggingFace. You are helpful, harmless, and honest."
)
enable_thinking = gr.Checkbox(
label="Enable Extended Thinking",
value=True,
info="Enable reasoning traces for better responses"
)
temperature = gr.Slider(
minimum=0.0,
maximum=2.0,
value=0.6,
step=0.1,
label="Temperature"
)
top_p = gr.Slider(
minimum=0.0,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p"
)
max_tokens = gr.Slider(
minimum=1,
maximum=32768,
value=32768,
step=1,
label="Max Tokens"
)
def respond(message, history, sys_prompt, thinking, temp, top_p_val, max_tok):
response = chat_with_smollm3(message, history, sys_prompt, thinking, temp, top_p_val, max_tok)
history.append((message, response))
return "", history
def toggle_settings():
return gr.update(visible=not settings_panel.visible)
submit.click(respond, [msg, chatbot, system_prompt, enable_thinking, temperature, top_p, max_tokens], [msg, chatbot])
msg.submit(respond, [msg, chatbot, system_prompt, enable_thinking, temperature, top_p, max_tokens], [msg, chatbot])
clear.click(lambda: ([], ""), outputs=[chatbot, msg])
settings_btn.click(toggle_settings, outputs=[settings_panel])
with gr.TabItem("πŸ› οΈ Tool Calling"):
tool_chatbot = gr.Chatbot(height=500, label="Chat with Tools")
tool_msg = gr.Textbox(label="Your message", placeholder="Ask me to use tools...")
with gr.Row():
tool_submit = gr.Button("Send", variant="primary")
tool_clear = gr.Button("Clear")
tool_settings_btn = gr.Button("βš™οΈ Settings", size="sm")
with gr.Column(visible=False, elem_classes="settings-panel") as tool_settings_panel:
gr.Markdown("### πŸ› οΈ Tool Settings")
tools_json = gr.Textbox(
label="Tools JSON",
placeholder="Enter tools as JSON array",
lines=10,
value=example_tools
)
tool_system_prompt = gr.Textbox(
label="System Prompt",
placeholder="Enter system instructions (optional)",
lines=2,
value="You are an AI assistant trained by HuggingFace. You are helpful, harmless, and honest."
)
tool_thinking = gr.Checkbox(
label="Enable Extended Thinking",
value=False,
info="Enable reasoning traces for tool usage"
)
tool_temperature = gr.Slider(
minimum=0.0,
maximum=2.0,
value=0.6,
step=0.1,
label="Temperature"
)
tool_top_p = gr.Slider(
minimum=0.0,
maximum=1.0,
value=0.95,
step=0.05,
label="Top-p"
)
tool_max_tokens = gr.Slider(
minimum=1,
maximum=32768,
value=32768,
step=1,
label="Max Tokens"
)
def tool_respond(message, history, tools, sys_prompt, thinking, temp, top_p_val, max_tok):
response = chat_with_tools(message, history, tools, sys_prompt, thinking, temp, top_p_val, max_tok)
history.append((message, response))
return "", history
def toggle_tool_settings():
return gr.update(visible=not tool_settings_panel.visible)
tool_submit.click(tool_respond, [tool_msg, tool_chatbot, tools_json, tool_system_prompt, tool_thinking, tool_temperature, tool_top_p, tool_max_tokens], [tool_msg, tool_chatbot])
tool_msg.submit(tool_respond, [tool_msg, tool_chatbot, tools_json, tool_system_prompt, tool_thinking, tool_temperature, tool_top_p, tool_max_tokens], [tool_msg, tool_chatbot])
tool_clear.click(lambda: ([], ""), outputs=[tool_chatbot, tool_msg])
tool_settings_btn.click(toggle_tool_settings, outputs=[tool_settings_panel])
gr.Markdown("""
### πŸ“š Model Information
- **Model**: HuggingFaceTB/SmolLM3-3B
- **Features**: Advanced reasoning, long context (up to 128k tokens), multilingual support
- **Languages**: English, French, Spanish, German, Italian, Portuguese (+ Arabic, Chinese, Russian)
- **Extended Thinking**: Provides reasoning traces for better responses
- **Tool Calling**: Supports XML-based tool calling for agentic workflows
### πŸ’‘ Usage Tips
- Use Extended Thinking for complex reasoning tasks
- Adjust temperature (0.6 recommended) for response creativity
- Try different system prompts for specialized behaviors
- Use tool calling for function-based interactions
""")
if __name__ == "__main__":
demo.launch()