Spaces:
Runtime error
Runtime error
import gradio as gr | |
import torch | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
model_name = "beomi/gemma-ko-2b" | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained(model_name, torch_dtype=torch.float32) | |
device = "cuda" if torch.cuda.is_available() else "cpu" | |
model.to(device) | |
def chatbot(prompt): | |
with torch.no_grad(): | |
tokens = tokenizer(prompt, return_tensors='pt').to(device) | |
gen_tokens = model.generate(tokens, do_sample=True, temperature=0.8, max_length=64) | |
return tokenizer.decode(gen_tokens[0], skip_special_tokens=True) | |
iface = gr.Interface(fn=chatbot, inputs="text", outputs="text") | |
iface.launch(server_port=8080, share=True) |