Spaces:
Runtime error
Runtime error
File size: 993 Bytes
8fa6dc8 5a0b64f 536b932 8fa6dc8 5a0b64f 8fa6dc8 9ce9794 efb127d 8fa6dc8 e5dfae6 8fa6dc8 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
from fastapi import FastAPI
from pydantic import BaseModel
from transformers import AutoModelForCausalLM, AutoTokenizer
import torch
app = FastAPI()
# Load your model
model_name = "tiiuae/falcon-7b-instruct" # replace with your own model
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(
model_name,
torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
device_map="auto"
)
class PromptRequest(BaseModel):
prompt: str
max_tokens: int = 200
temperature: float = 0.7
@app.post("/generate")
def generate_text(data: PromptRequest):
inputs = tokenizer(data.prompt, return_tensors="pt").to(model.device)
output = model.generate(
**inputs,
max_new_tokens=data.max_tokens,
temperature=data.temperature,
do_sample=True,
top_p=0.95,
top_k=50,
)
result = tokenizer.decode(output[0], skip_special_tokens=True)
return {"response": result} |