smolLM / app.py
Anselm
update app.py
51910c5
import torch
import gradio as gr
import os
from transformers import AutoModelForCausalLM, AutoTokenizer
model_name = "HuggingFaceTB/SmolLM3-3B"
TOKEN = os.getenv("HF_TOKEN")
device = "cuda" if torch.cuda.is_available() else "cpu" # for GPU usage or "cpu" for CPU usage
# load the tokenizer and the model
tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=TOKEN,
)
model = AutoModelForCausalLM.from_pretrained(
model_name,use_auth_token=TOKEN,
).to(device)
def generate_text(prompt, max_length, temperature, top_p):
inputs = tokenizer(prompt, return_tensors="pt")
outputs = model.generate(
**inputs,
max_new_tokens=max_length,
temperature=0.6,
top_p=0.95,
pad_token_id=tokenizer.eos_token_id
)
return tokenizer.decode(outputs[0], skip_special_tokens=True)
# prepare the model input
prompt = "Give me a brief explanation of gravity in simple terms."
messages_think = [
{"role": "user", "content": prompt}
]
text = tokenizer.apply_chat_template(
messages_think,
tokenize=False,
add_generation_prompt=True,
)
model_inputs = tokenizer([text], return_tensors="pt").to(model.device)
# Generate the output
generated_ids = model.generate(**model_inputs, max_new_tokens=32768)
# Get and decode the output
output_ids = generated_ids[0][len(model_inputs.input_ids[0]) :]
print(tokenizer.decode(output_ids, skip_special_tokens=True))
# Build Gradio interface
interface = gr.Interface(
fn=generate_text,
inputs="text", # single text‐in box
outputs="text", # single text‐out box
title="SmolLM3-3B Demo",
description="Type your prompt and hit Submit"
)
if __name__ == "__main__":
interface.launch() # on Spaces this will auto-bind correctly