Spaces:

rayymaxx
/

DirectEd-AI-LoRA-API

Sleeping

App Files Files Community

rayymaxx commited on Aug 23

Commit

29e3684

1 Parent(s): 156ac82

Fixed error

Browse files

Files changed (2) hide show

app..py +0 -7
app.py +71 -0

app..py DELETED Viewed

@@ -1,7 +0,0 @@
-# app.py (temporary test)
-from fastapi import FastAPI
-app = FastAPI()
-@app.get("/")
-def root():
-    return {"status": "ok", "message": "minimal app works"}

app.py ADDED Viewed

	@@ -0,0 +1,71 @@

+# app.py — robust pattern (use this as your main app)
+import os
+from fastapi import FastAPI, HTTPException
+from pydantic import BaseModel
+import logging
+# Configure writable cache for HF libs (prevents '/.cache' PermissionError in Spaces)
+CACHE_DIR = "/app/cache"
+os.makedirs(CACHE_DIR, exist_ok=True)
+os.environ["TRANSFORMERS_CACHE"] = CACHE_DIR
+os.environ["HF_HOME"] = CACHE_DIR
+os.environ["HF_DATASETS_CACHE"] = CACHE_DIR
+os.environ["HF_METRICS_CACHE"] = CACHE_DIR
+app = FastAPI(title="DirectEd LoRA API (safe startup)")
+# lightweight health endpoint
+@app.get("/health")
+def health():
+    return {"ok": True}
+class Request(BaseModel):
+    prompt: str
+    max_new_tokens: int = 150
+    temperature: float = 0.7
+# global placeholders set at startup
+model = None
+tokenizer = None
+pipe = None
+# delayed import & model load on startup
+@app.on_event("startup")
+def load_model():
+    global model, tokenizer, pipe
+    try:
+        # import heavy libraries here so import(app) is lightweight
+        from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+        from peft import PeftModel
+        BASE_MODEL = "unsloth/llama-3-8b-Instruct-bnb-4bit"
+        ADAPTER_REPO = "rayymaxx/DirectEd-AI-LoRA"  # replace
+        # load tokenizer and base model (wrap in try/except to catch runtime issues)
+        tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
+        base_model = AutoModelForCausalLM.from_pretrained(
+            BASE_MODEL, device_map="auto", torch_dtype="auto"
+        )
+        model = PeftModel.from_pretrained(base_model, ADAPTER_REPO)
+        pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto")
+        logging.info("Model and adapter loaded successfully.")
+    except Exception as e:
+        # Log the exception — the Space logs will show this traceback
+        logging.exception("Failed to load model at startup: %s", e)
+        # keep app importable; /generate will return 500 until model loads
+        model = None
+        tokenizer = None
+        pipe = None
+@app.post("/generate")
+def generate(req: Request):
+    if pipe is None:
+        raise HTTPException(status_code=503, detail="Model not loaded yet. Check logs.")
+    try:
+        out = pipe(req.prompt, max_new_tokens=req.max_new_tokens, temperature=req.temperature, do_sample=True)
+        return {"response": out[0]["generated_text"]}
+    except Exception as e:
+        logging.exception("Generation failed: %s", e)
+        raise HTTPException(status_code=500, detail=str(e))