Spaces:
Running
on
Zero
Running
on
Zero
import gradio as gr | |
import spaces | |
import torch | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
# Load model and tokenizer | |
model_name = "HuggingFaceTB/SmolLM3-3B" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.bfloat16, device_map="auto") | |
def chat_with_smollm3(message, history, system_prompt="", enable_thinking=True, temperature=0.6, top_p=0.95, max_tokens=32768): | |
""" | |
Chat with SmolLM3-3B model with full feature support | |
""" | |
# Prepare messages | |
messages = [] | |
# Add system prompt if provided | |
if system_prompt.strip(): | |
# Handle thinking mode flags in system prompt | |
if enable_thinking and "/no_think" not in system_prompt: | |
if "/think" not in system_prompt: | |
system_prompt += "/think" | |
elif not enable_thinking and "/think" not in system_prompt: | |
if "/no_think" not in system_prompt: | |
system_prompt += "/no_think" | |
messages.append({"role": "system", "content": system_prompt}) | |
else: | |
# Use enable_thinking parameter if no system prompt | |
if not enable_thinking: | |
messages.append({"role": "system", "content": "/no_think"}) | |
# Add conversation history | |
for human_msg, assistant_msg in history: | |
messages.append({"role": "user", "content": human_msg}) | |
if assistant_msg: | |
messages.append({"role": "assistant", "content": assistant_msg}) | |
# Add current message | |
messages.append({"role": "user", "content": message}) | |
# Apply chat template | |
text = tokenizer.apply_chat_template( | |
messages, | |
tokenize=False, | |
add_generation_prompt=True, | |
enable_thinking=enable_thinking if not system_prompt.strip() else None | |
) | |
# Tokenize input | |
model_inputs = tokenizer([text], return_tensors="pt").to(model.device) | |
# Generate response | |
with torch.no_grad(): | |
generated_ids = model.generate( | |
**model_inputs, | |
max_new_tokens=max_tokens, | |
temperature=temperature, | |
top_p=top_p, | |
do_sample=True, | |
pad_token_id=tokenizer.eos_token_id | |
) | |
# Decode response | |
output_ids = generated_ids[0][len(model_inputs.input_ids[0]):] | |
response = tokenizer.decode(output_ids, skip_special_tokens=True) | |
return response | |
def chat_with_tools(message, history, tools_json="", system_prompt="", enable_thinking=False, temperature=0.6, top_p=0.95, max_tokens=32768): | |
""" | |
Chat with SmolLM3-3B using tool calling capabilities | |
""" | |
# Parse tools if provided | |
tools = [] | |
if tools_json.strip(): | |
try: | |
import json | |
tools = json.loads(tools_json) | |
except: | |
return "Error: Invalid JSON format for tools" | |
# Prepare messages | |
messages = [] | |
# Add system prompt if provided | |
if system_prompt.strip(): | |
messages.append({"role": "system", "content": system_prompt}) | |
# Add conversation history | |
for human_msg, assistant_msg in history: | |
messages.append({"role": "user", "content": human_msg}) | |
if assistant_msg: | |
messages.append({"role": "assistant", "content": assistant_msg}) | |
# Add current message | |
messages.append({"role": "user", "content": message}) | |
# Apply chat template with tools | |
text = tokenizer.apply_chat_template( | |
messages, | |
tokenize=False, | |
add_generation_prompt=True, | |
enable_thinking=enable_thinking, | |
xml_tools=tools if tools else None | |
) | |
# Tokenize input | |
model_inputs = tokenizer([text], return_tensors="pt").to(model.device) | |
# Generate response | |
with torch.no_grad(): | |
generated_ids = model.generate( | |
**model_inputs, | |
max_new_tokens=max_tokens, | |
temperature=temperature, | |
top_p=top_p, | |
do_sample=True, | |
pad_token_id=tokenizer.eos_token_id | |
) | |
# Decode response | |
output_ids = generated_ids[0][len(model_inputs.input_ids[0]):] | |
response = tokenizer.decode(output_ids, skip_special_tokens=True) | |
return response | |
# Example tools for demonstration | |
example_tools = """[ | |
{ | |
"name": "get_weather", | |
"description": "Get the weather in a city", | |
"parameters": { | |
"type": "object", | |
"properties": { | |
"city": { | |
"type": "string", | |
"description": "The city to get the weather for" | |
} | |
} | |
} | |
}, | |
{ | |
"name": "calculate", | |
"description": "Perform basic mathematical calculations", | |
"parameters": { | |
"type": "object", | |
"properties": { | |
"expression": { | |
"type": "string", | |
"description": "Mathematical expression to evaluate" | |
} | |
} | |
} | |
} | |
]""" | |
# Create Gradio interface with dark theme and mobile support | |
with gr.Blocks( | |
title="SmolLM3-3B Chat", | |
theme=gr.themes.Base().set( | |
background_fill_primary="#1a1a1a", | |
background_fill_secondary="#2d2d2d", | |
border_color_primary="#404040", | |
button_primary_background_fill="#4a9eff", | |
button_primary_background_fill_hover="#5aa3ff", | |
button_primary_text_color="#ffffff", | |
block_background_fill="#2d2d2d", | |
block_border_color="#404040", | |
input_background_fill="#3a3a3a", | |
input_border_color="#404040", | |
slider_color="#4a9eff", | |
), | |
css=""" | |
/* Mobile-first responsive design */ | |
@media (max-width: 768px) { | |
.gradio-container { | |
padding: 8px !important; | |
} | |
.gr-row { | |
flex-direction: column !important; | |
} | |
.gr-column { | |
width: 100% !important; | |
min-width: 0 !important; | |
} | |
.gr-tabs { | |
font-size: 14px !important; | |
} | |
.gr-button { | |
width: 100% !important; | |
margin: 2px 0 !important; | |
} | |
.gr-textbox { | |
font-size: 16px !important; | |
} | |
.gr-chatbot { | |
height: 400px !important; | |
} | |
.gr-markdown { | |
font-size: 14px !important; | |
} | |
.gr-slider { | |
width: 100% !important; | |
} | |
.settings-panel { | |
margin-top: 20px !important; | |
} | |
} | |
/* Settings panel styling */ | |
.settings-panel { | |
background-color: #2d2d2d !important; | |
border: 1px solid #404040 !important; | |
border-radius: 8px !important; | |
padding: 16px !important; | |
margin-top: 12px !important; | |
} | |
.settings-button { | |
background-color: #3a3a3a !important; | |
border: 1px solid #404040 !important; | |
color: #ffffff !important; | |
padding: 8px 16px !important; | |
border-radius: 6px !important; | |
cursor: pointer !important; | |
font-size: 14px !important; | |
margin-bottom: 8px !important; | |
} | |
.settings-button:hover { | |
background-color: #4a4a4a !important; | |
} | |
/* Dark mode improvements */ | |
.gr-chatbot { | |
background-color: #2d2d2d !important; | |
} | |
.gr-chatbot .message { | |
background-color: #3a3a3a !important; | |
border: 1px solid #404040 !important; | |
border-radius: 8px !important; | |
margin: 4px 0 !important; | |
padding: 8px !important; | |
} | |
.gr-chatbot .message.user { | |
background-color: #4a9eff !important; | |
color: white !important; | |
} | |
.gr-chatbot .message.bot { | |
background-color: #3a3a3a !important; | |
color: #ffffff !important; | |
} | |
/* Better mobile touch targets */ | |
@media (max-width: 768px) { | |
.gr-button { | |
min-height: 44px !important; | |
padding: 12px !important; | |
} | |
.gr-slider input { | |
min-height: 44px !important; | |
} | |
.gr-checkbox { | |
min-height: 44px !important; | |
} | |
} | |
/* Improve readability */ | |
.gr-markdown h1, .gr-markdown h2, .gr-markdown h3 { | |
color: #ffffff !important; | |
} | |
.gr-markdown p, .gr-markdown li { | |
color: #e0e0e0 !important; | |
} | |
/* Tab styling */ | |
.gr-tabs .gr-tab { | |
background-color: #3a3a3a !important; | |
color: #ffffff !important; | |
border-color: #404040 !important; | |
} | |
.gr-tabs .gr-tab.selected { | |
background-color: #4a9eff !important; | |
color: #ffffff !important; | |
} | |
""" | |
) as demo: | |
gr.Markdown("# π€ SmolLM3-3B Chat Interface") | |
gr.Markdown("Chat with SmolLM3-3B, a 3B parameter model with advanced reasoning, long context support, and tool calling capabilities.") | |
with gr.Tabs(): | |
with gr.TabItem("π¬ Standard Chat"): | |
chatbot = gr.Chatbot(height=500, label="Chat with SmolLM3-3B") | |
msg = gr.Textbox(label="Your message", placeholder="Type your message here...") | |
with gr.Row(): | |
submit = gr.Button("Send", variant="primary") | |
clear = gr.Button("Clear") | |
settings_btn = gr.Button("βοΈ Settings", size="sm") | |
with gr.Column(visible=False, elem_classes="settings-panel") as settings_panel: | |
gr.Markdown("### βοΈ Advanced Settings") | |
system_prompt = gr.Textbox( | |
label="System Prompt", | |
placeholder="Enter system instructions (optional)", | |
lines=3, | |
value="You are an AI assistant trained by HuggingFace. You are helpful, harmless, and honest." | |
) | |
enable_thinking = gr.Checkbox( | |
label="Enable Extended Thinking", | |
value=True, | |
info="Enable reasoning traces for better responses" | |
) | |
temperature = gr.Slider( | |
minimum=0.0, | |
maximum=2.0, | |
value=0.6, | |
step=0.1, | |
label="Temperature" | |
) | |
top_p = gr.Slider( | |
minimum=0.0, | |
maximum=1.0, | |
value=0.95, | |
step=0.05, | |
label="Top-p" | |
) | |
max_tokens = gr.Slider( | |
minimum=1, | |
maximum=32768, | |
value=32768, | |
step=1, | |
label="Max Tokens" | |
) | |
def respond(message, history, sys_prompt, thinking, temp, top_p_val, max_tok): | |
response = chat_with_smollm3(message, history, sys_prompt, thinking, temp, top_p_val, max_tok) | |
history.append((message, response)) | |
return "", history | |
def toggle_settings(): | |
return gr.update(visible=not settings_panel.visible) | |
submit.click(respond, [msg, chatbot, system_prompt, enable_thinking, temperature, top_p, max_tokens], [msg, chatbot]) | |
msg.submit(respond, [msg, chatbot, system_prompt, enable_thinking, temperature, top_p, max_tokens], [msg, chatbot]) | |
clear.click(lambda: ([], ""), outputs=[chatbot, msg]) | |
settings_btn.click(toggle_settings, outputs=[settings_panel]) | |
with gr.TabItem("π οΈ Tool Calling"): | |
tool_chatbot = gr.Chatbot(height=500, label="Chat with Tools") | |
tool_msg = gr.Textbox(label="Your message", placeholder="Ask me to use tools...") | |
with gr.Row(): | |
tool_submit = gr.Button("Send", variant="primary") | |
tool_clear = gr.Button("Clear") | |
tool_settings_btn = gr.Button("βοΈ Settings", size="sm") | |
with gr.Column(visible=False, elem_classes="settings-panel") as tool_settings_panel: | |
gr.Markdown("### π οΈ Tool Settings") | |
tools_json = gr.Textbox( | |
label="Tools JSON", | |
placeholder="Enter tools as JSON array", | |
lines=10, | |
value=example_tools | |
) | |
tool_system_prompt = gr.Textbox( | |
label="System Prompt", | |
placeholder="Enter system instructions (optional)", | |
lines=2, | |
value="You are an AI assistant trained by HuggingFace. You are helpful, harmless, and honest." | |
) | |
tool_thinking = gr.Checkbox( | |
label="Enable Extended Thinking", | |
value=False, | |
info="Enable reasoning traces for tool usage" | |
) | |
tool_temperature = gr.Slider( | |
minimum=0.0, | |
maximum=2.0, | |
value=0.6, | |
step=0.1, | |
label="Temperature" | |
) | |
tool_top_p = gr.Slider( | |
minimum=0.0, | |
maximum=1.0, | |
value=0.95, | |
step=0.05, | |
label="Top-p" | |
) | |
tool_max_tokens = gr.Slider( | |
minimum=1, | |
maximum=32768, | |
value=32768, | |
step=1, | |
label="Max Tokens" | |
) | |
def tool_respond(message, history, tools, sys_prompt, thinking, temp, top_p_val, max_tok): | |
response = chat_with_tools(message, history, tools, sys_prompt, thinking, temp, top_p_val, max_tok) | |
history.append((message, response)) | |
return "", history | |
def toggle_tool_settings(): | |
return gr.update(visible=not tool_settings_panel.visible) | |
tool_submit.click(tool_respond, [tool_msg, tool_chatbot, tools_json, tool_system_prompt, tool_thinking, tool_temperature, tool_top_p, tool_max_tokens], [tool_msg, tool_chatbot]) | |
tool_msg.submit(tool_respond, [tool_msg, tool_chatbot, tools_json, tool_system_prompt, tool_thinking, tool_temperature, tool_top_p, tool_max_tokens], [tool_msg, tool_chatbot]) | |
tool_clear.click(lambda: ([], ""), outputs=[tool_chatbot, tool_msg]) | |
tool_settings_btn.click(toggle_tool_settings, outputs=[tool_settings_panel]) | |
gr.Markdown(""" | |
### π Model Information | |
- **Model**: HuggingFaceTB/SmolLM3-3B | |
- **Features**: Advanced reasoning, long context (up to 128k tokens), multilingual support | |
- **Languages**: English, French, Spanish, German, Italian, Portuguese (+ Arabic, Chinese, Russian) | |
- **Extended Thinking**: Provides reasoning traces for better responses | |
- **Tool Calling**: Supports XML-based tool calling for agentic workflows | |
### π‘ Usage Tips | |
- Use Extended Thinking for complex reasoning tasks | |
- Adjust temperature (0.6 recommended) for response creativity | |
- Try different system prompts for specialized behaviors | |
- Use tool calling for function-based interactions | |
""") | |
if __name__ == "__main__": | |
demo.launch() | |