Spaces:
Sleeping
Sleeping
File size: 825 Bytes
9cf4b85 63fc864 9cf4b85 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 |
from fastapi import FastAPI
from pydantic import BaseModel
from llama_cpp import Llama
app = FastAPI()
# Load the model
llm = Llama.from_pretrained(
repo_id="unsloth/phi-4-GGUF",
filename="phi-4-Q4_K_M.gguf",
n_ctx=16384
)
# Define request model
class ChatRequest(BaseModel):
system_prompt: str
query: str
@app.post("/chat-p4q4")
async def chat(request: ChatRequest):
try:
response = llm.create_chat_completion(
messages=[
{"role": "system", "content": request.system_prompt},
{"role": "user", "content": request.query},
]
)
return {"response": response}
except Exception as e:
# Log the error or print it for debugging
print("Error during model inference:", e)
return {"error": str(e)}
|