Spaces:

slemiu
/

smolLM

Runtime error

smolLM / app.py

Anselm

update app.py

51910c5 3 days ago

1.77 kB

	import torch
	import gradio as gr
	import os
	from transformers import AutoModelForCausalLM, AutoTokenizer

	model_name = "HuggingFaceTB/SmolLM3-3B"
	TOKEN = os.getenv("HF_TOKEN")
	device = "cuda" if torch.cuda.is_available() else "cpu" # for GPU usage or "cpu" for CPU usage

	# load the tokenizer and the model
	tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=TOKEN,
	)
	model = AutoModelForCausalLM.from_pretrained(
	model_name,use_auth_token=TOKEN,
	).to(device)

	def generate_text(prompt, max_length, temperature, top_p):
	inputs = tokenizer(prompt, return_tensors="pt")
	outputs = model.generate(
	**inputs,
	max_new_tokens=max_length,
	temperature=0.6,
	top_p=0.95,
	pad_token_id=tokenizer.eos_token_id
	)
	return tokenizer.decode(outputs[0], skip_special_tokens=True)

	# prepare the model input
	prompt = "Give me a brief explanation of gravity in simple terms."
	messages_think = [
	{"role": "user", "content": prompt}
	]

	text = tokenizer.apply_chat_template(
	messages_think,
	tokenize=False,
	add_generation_prompt=True,
	)
	model_inputs = tokenizer([text], return_tensors="pt").to(model.device)

	# Generate the output
	generated_ids = model.generate(**model_inputs, max_new_tokens=32768)

	# Get and decode the output
	output_ids = generated_ids[0][len(model_inputs.input_ids[0]) :]
	print(tokenizer.decode(output_ids, skip_special_tokens=True))

	# Build Gradio interface
	interface = gr.Interface(
	fn=generate_text,
	inputs="text", # single text‐in box
	outputs="text", # single text‐out box
	title="SmolLM3-3B Demo",
	description="Type your prompt and hit Submit"
	)

	if __name__ == "__main__":
	interface.launch() # on Spaces this will auto-bind correctly