Spaces:

omaryasserhassan
/

llm_server

Sleeping

omaryasserhassan commited on Aug 14

Commit

b5dfa0f

verified ·

1 Parent(s): 35678d4

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -6,8 +6,7 @@ from llama_cpp import Llama
 REPO_ID  = "bartowski/Llama-3.2-3B-Instruct-GGUF"
 FILENAME = "Llama-3.2-3B-Instruct-Q4_K_M.gguf"
-CACHE_DIR = os.environ.get("HF_HOME", "/tmp/hf_cache")
 os.makedirs(CACHE_DIR, exist_ok=True)
 app = FastAPI()
@@ -17,6 +16,7 @@ def get_model():
     global _model
     if _model is not None:
         return _model
     local_path = hf_hub_download(
         repo_id=REPO_ID,
         filename=FILENAME,
@@ -50,3 +50,11 @@ def generate_text(req: PromptRequest):
         return {"ok": True, "response": output["choices"][0]["text"]}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))

 REPO_ID  = "bartowski/Llama-3.2-3B-Instruct-GGUF"
 FILENAME = "Llama-3.2-3B-Instruct-Q4_K_M.gguf"
+CACHE_DIR = "/app/models"  # matches Dockerfile pre-download
 os.makedirs(CACHE_DIR, exist_ok=True)
 app = FastAPI()
     global _model
     if _model is not None:
         return _model
     local_path = hf_hub_download(
         repo_id=REPO_ID,
         filename=FILENAME,
         return {"ok": True, "response": output["choices"][0]["text"]}
     except Exception as e:
         raise HTTPException(status_code=500, detail=str(e))
+@app.get("/health")
+def health():
+    try:
+        _ = get_model()
+        return {"ok": True}
+    except Exception as e:
+        return {"ok": False, "error": str(e)}