File size: 825 Bytes
9cf4b85
 
 
 
 
 
 
 
 
 
63fc864
9cf4b85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
from fastapi import FastAPI
from pydantic import BaseModel
from llama_cpp import Llama

app = FastAPI()

# Load the model
llm = Llama.from_pretrained(
    repo_id="unsloth/phi-4-GGUF",
    filename="phi-4-Q4_K_M.gguf",
    n_ctx=16384 
)

# Define request model
class ChatRequest(BaseModel):
    system_prompt: str
    query: str

@app.post("/chat-p4q4")
async def chat(request: ChatRequest):
    try:
        response = llm.create_chat_completion(
            messages=[
                {"role": "system", "content": request.system_prompt},
                {"role": "user", "content": request.query},
            ]
        )
        return {"response": response}
    except Exception as e:
        # Log the error or print it for debugging
        print("Error during model inference:", e)
        return {"error": str(e)}