Spaces:
Running
Running
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import transformers | |
import torch | |
import gradio as gr | |
model_id = "gg-hf/gemma-7b-it" | |
dtype = torch.bfloat16 | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
model = AutoModelForCausalLM.from_pretrained( | |
model_id, | |
device_map="cuda", | |
torch_dtype=dtype, | |
) | |
chat = [ | |
{ "role": "user", "content": "Write a hello world program" }, | |
] | |
prompt = tokenizer.apply_chat_template(chat, tokenize=False, add_generation_prompt=True) | |