rayymaxx commited on
Commit
29e3684
·
1 Parent(s): 156ac82

Fixed error

Browse files
Files changed (2) hide show
  1. app..py +0 -7
  2. app.py +71 -0
app..py DELETED
@@ -1,7 +0,0 @@
1
- # app.py (temporary test)
2
- from fastapi import FastAPI
3
- app = FastAPI()
4
-
5
- @app.get("/")
6
- def root():
7
- return {"status": "ok", "message": "minimal app works"}
 
 
 
 
 
 
 
 
app.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # app.py — robust pattern (use this as your main app)
2
+ import os
3
+ from fastapi import FastAPI, HTTPException
4
+ from pydantic import BaseModel
5
+ import logging
6
+
7
+ # Configure writable cache for HF libs (prevents '/.cache' PermissionError in Spaces)
8
+ CACHE_DIR = "/app/cache"
9
+ os.makedirs(CACHE_DIR, exist_ok=True)
10
+ os.environ["TRANSFORMERS_CACHE"] = CACHE_DIR
11
+ os.environ["HF_HOME"] = CACHE_DIR
12
+ os.environ["HF_DATASETS_CACHE"] = CACHE_DIR
13
+ os.environ["HF_METRICS_CACHE"] = CACHE_DIR
14
+
15
+ app = FastAPI(title="DirectEd LoRA API (safe startup)")
16
+
17
+ # lightweight health endpoint
18
+ @app.get("/health")
19
+ def health():
20
+ return {"ok": True}
21
+
22
+ class Request(BaseModel):
23
+ prompt: str
24
+ max_new_tokens: int = 150
25
+ temperature: float = 0.7
26
+
27
+ # global placeholders set at startup
28
+ model = None
29
+ tokenizer = None
30
+ pipe = None
31
+
32
+ # delayed import & model load on startup
33
+ @app.on_event("startup")
34
+ def load_model():
35
+ global model, tokenizer, pipe
36
+ try:
37
+ # import heavy libraries here so import(app) is lightweight
38
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
39
+ from peft import PeftModel
40
+
41
+ BASE_MODEL = "unsloth/llama-3-8b-Instruct-bnb-4bit"
42
+ ADAPTER_REPO = "rayymaxx/DirectEd-AI-LoRA" # replace
43
+
44
+ # load tokenizer and base model (wrap in try/except to catch runtime issues)
45
+ tokenizer = AutoTokenizer.from_pretrained(BASE_MODEL)
46
+ base_model = AutoModelForCausalLM.from_pretrained(
47
+ BASE_MODEL, device_map="auto", torch_dtype="auto"
48
+ )
49
+ model = PeftModel.from_pretrained(base_model, ADAPTER_REPO)
50
+
51
+ pipe = pipeline("text-generation", model=model, tokenizer=tokenizer, device_map="auto")
52
+
53
+ logging.info("Model and adapter loaded successfully.")
54
+ except Exception as e:
55
+ # Log the exception — the Space logs will show this traceback
56
+ logging.exception("Failed to load model at startup: %s", e)
57
+ # keep app importable; /generate will return 500 until model loads
58
+ model = None
59
+ tokenizer = None
60
+ pipe = None
61
+
62
+ @app.post("/generate")
63
+ def generate(req: Request):
64
+ if pipe is None:
65
+ raise HTTPException(status_code=503, detail="Model not loaded yet. Check logs.")
66
+ try:
67
+ out = pipe(req.prompt, max_new_tokens=req.max_new_tokens, temperature=req.temperature, do_sample=True)
68
+ return {"response": out[0]["generated_text"]}
69
+ except Exception as e:
70
+ logging.exception("Generation failed: %s", e)
71
+ raise HTTPException(status_code=500, detail=str(e))