Spaces:
Runtime error
Runtime error
from fastapi import FastAPI | |
from pydantic import BaseModel | |
from transformers import AutoModelForCausalLM, AutoTokenizer | |
import torch | |
app = FastAPI() | |
# Load your model | |
model_name = "tiiuae/falcon-7b-instruct" # replace with your own model | |
tokenizer = AutoTokenizer.from_pretrained(model_name) | |
model = AutoModelForCausalLM.from_pretrained( | |
model_name, | |
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, | |
device_map="auto" | |
) | |
class PromptRequest(BaseModel): | |
prompt: str | |
max_tokens: int = 200 | |
temperature: float = 0.7 | |
def generate_text(data: PromptRequest): | |
inputs = tokenizer(data.prompt, return_tensors="pt").to(model.device) | |
output = model.generate( | |
**inputs, | |
max_new_tokens=data.max_tokens, | |
temperature=data.temperature, | |
do_sample=True, | |
top_p=0.95, | |
top_k=50, | |
) | |
result = tokenizer.decode(output[0], skip_special_tokens=True) | |
return {"response": result} |