phi4 / app.py
akshaybhure098's picture
Update app.py
63fc864 verified
raw
history blame contribute delete
825 Bytes
from fastapi import FastAPI
from pydantic import BaseModel
from llama_cpp import Llama
app = FastAPI()
# Load the model
llm = Llama.from_pretrained(
repo_id="unsloth/phi-4-GGUF",
filename="phi-4-Q4_K_M.gguf",
n_ctx=16384
)
# Define request model
class ChatRequest(BaseModel):
system_prompt: str
query: str
@app.post("/chat-p4q4")
async def chat(request: ChatRequest):
try:
response = llm.create_chat_completion(
messages=[
{"role": "system", "content": request.system_prompt},
{"role": "user", "content": request.query},
]
)
return {"response": response}
except Exception as e:
# Log the error or print it for debugging
print("Error during model inference:", e)
return {"error": str(e)}