Sudheer-N
Fix: set HF cache to /app/cache to avoid permission error
795fd05
from fastapi import FastAPI
from transformers import AutoTokenizer, AutoModelForCausalLM
from peft import PeftModel
import torch
from pydantic import BaseModel
import os
os.environ["HF_HOME"] = "/app/cache"
os.environ["TRANSFORMERS_CACHE"] = "/app/cache"
app = FastAPI()
base_model = AutoModelForCausalLM.from_pretrained("gpt2")
tokenizer = AutoTokenizer.from_pretrained("n-sudheer/ns-lora-gpt2-demo")
model = PeftModel.from_pretrained(base_model, "n-sudheer/ns-lora-gpt2-demo")
model.eval()
class GenerationRequest(BaseModel):
prompt: str
max_length: int = 50
@app.post("/generate")
def generate_text(request: GenerationRequest):
if '@NS' in request.prompt:
return {"generated_text": "NS is God!!!"}
inputs = tokenizer(request.prompt, return_tensors="pt")
outputs = model.generate(**inputs, max_length=request.max_length)
text = tokenizer.decode(outputs[0], skip_special_tokens=True, do_sample=True, top_k=20)
print("Input Prompt:", request.prompt)
print("Generated Text:", text)
return {"generated_text": text}