SharmaGroups07 commited on
Commit
23e0603
·
verified ·
1 Parent(s): ee7bc70

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +41 -0
app.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI
2
+ from pydantic import BaseModel
3
+ from llama_cpp import Llama
4
+ from huggingface_hub import hf_hub_download
5
+
6
+ app = FastAPI()
7
+
8
+ MODEL_REPO = "bartowski/Qwen2.5-Coder-1.5B-Instruct-GGUF"
9
+ MODEL_FILE = "Qwen2.5-Coder-1.5B-Instruct-Q4_K_M.gguf"
10
+
11
+ model_path = hf_hub_download(
12
+ repo_id=MODEL_REPO,
13
+ filename=MODEL_FILE
14
+ )
15
+
16
+ llm = Llama(
17
+ model_path=model_path,
18
+ n_ctx=2048,
19
+ n_threads=2
20
+ )
21
+
22
+ class ChatRequest(BaseModel):
23
+ message: str
24
+
25
+ @app.get("/")
26
+ def root():
27
+ return {"status": "AI engine running"}
28
+
29
+ @app.post("/chat")
30
+ def chat(req: ChatRequest):
31
+ output = llm(
32
+ f"<|user|>{req.message}<|assistant|>",
33
+ max_tokens=300,
34
+ stop=["<|end|>"]
35
+ )
36
+ return {"reply": output["choices"][0]["text"]}
37
+
38
+ # ⭐ THIS PART WAS MISSING
39
+ if __name__ == "__main__":
40
+ import uvicorn
41
+ uvicorn.run(app, host="0.0.0.0", port=7860)