Spaces:
Runtime error
Runtime error
from fastapi import FastAPI | |
from transformers import AutoTokenizer, AutoModelForCausalLM | |
import torch | |
model_id = "meta-llama/Meta-Llama-Guard-2-8B" | |
device = "cuda" | |
dtype = torch.bfloat16 | |
tokenizer = AutoTokenizer.from_pretrained(model_id) | |
model = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=dtype, device_map=device) | |
app = FastAPI() | |
def moderate(chat): | |
input_ids = tokenizer.apply_chat_template(chat, return_tensors="pt").to(device) | |
output = model.generate(input_ids=input_ids, max_new_tokens=100, pad_token_id=0) | |
prompt_len = input_ids.shape[-1] | |
return tokenizer.decode(output[0][prompt_len:], skip_special_tokens=True) | |
def greet_json(): | |
return {"Hello": "World!"} | |
def helloName(input): | |
result = moderate([ | |
{"role": "user", "content": "()".format(input)} | |
]) | |
return {"hello": "()".format(result)} | |