Spaces:
Running
Running
github-actions[bot] commited on
Commit ·
36e0175
1
Parent(s): 483b9cf
🚀 Auto-deploy backend from GitHub (c322c5f)
Browse files- rag/firebase_storage_loader.py +8 -2
- scripts/download_vectorstore_from_firebase.py +53 -11
- startup.sh +14 -9
rag/firebase_storage_loader.py
CHANGED
|
@@ -17,6 +17,7 @@ _FIREBASE_INITIALIZED = False
|
|
| 17 |
|
| 18 |
def _init_firebase_storage() -> Tuple[any, any]:
|
| 19 |
global _FIREBASE_INITIALIZED
|
|
|
|
| 20 |
if _FIREBASE_INITIALIZED:
|
| 21 |
try:
|
| 22 |
from firebase_admin import storage as fb_storage
|
|
@@ -24,6 +25,7 @@ def _init_firebase_storage() -> Tuple[any, any]:
|
|
| 24 |
return fb_storage, bucket
|
| 25 |
except Exception as e:
|
| 26 |
logger.warning("Firebase storage unavailable: %s", e)
|
|
|
|
| 27 |
return None, None
|
| 28 |
|
| 29 |
try:
|
|
@@ -35,8 +37,12 @@ def _init_firebase_storage() -> Tuple[any, any]:
|
|
| 35 |
|
| 36 |
if firebase_admin._apps:
|
| 37 |
_FIREBASE_INITIALIZED = True
|
| 38 |
-
|
| 39 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
|
| 41 |
sa_json = os.getenv("FIREBASE_SERVICE_ACCOUNT_JSON")
|
| 42 |
sa_file = os.getenv("FIREBASE_SERVICE_ACCOUNT_FILE")
|
|
|
|
| 17 |
|
| 18 |
def _init_firebase_storage() -> Tuple[any, any]:
|
| 19 |
global _FIREBASE_INITIALIZED
|
| 20 |
+
|
| 21 |
if _FIREBASE_INITIALIZED:
|
| 22 |
try:
|
| 23 |
from firebase_admin import storage as fb_storage
|
|
|
|
| 25 |
return fb_storage, bucket
|
| 26 |
except Exception as e:
|
| 27 |
logger.warning("Firebase storage unavailable: %s", e)
|
| 28 |
+
_FIREBASE_INITIALIZED = False
|
| 29 |
return None, None
|
| 30 |
|
| 31 |
try:
|
|
|
|
| 37 |
|
| 38 |
if firebase_admin._apps:
|
| 39 |
_FIREBASE_INITIALIZED = True
|
| 40 |
+
try:
|
| 41 |
+
bucket = storage.bucket()
|
| 42 |
+
return storage, bucket
|
| 43 |
+
except Exception as e:
|
| 44 |
+
logger.warning("Firebase storage bucket unavailable: %s", e)
|
| 45 |
+
return None, None
|
| 46 |
|
| 47 |
sa_json = os.getenv("FIREBASE_SERVICE_ACCOUNT_JSON")
|
| 48 |
sa_file = os.getenv("FIREBASE_SERVICE_ACCOUNT_FILE")
|
scripts/download_vectorstore_from_firebase.py
CHANGED
|
@@ -1,10 +1,11 @@
|
|
| 1 |
"""
|
| 2 |
Download vectorstore directory from Firebase Storage at container startup.
|
| 3 |
-
Run: python
|
| 4 |
"""
|
| 5 |
|
| 6 |
from __future__ import annotations
|
| 7 |
|
|
|
|
| 8 |
import logging
|
| 9 |
import os
|
| 10 |
import sys
|
|
@@ -12,11 +13,55 @@ from pathlib import Path
|
|
| 12 |
|
| 13 |
logger = logging.getLogger("mathpulse.download_vectorstore")
|
| 14 |
|
| 15 |
-
|
|
|
|
| 16 |
|
| 17 |
-
from backend.rag.firebase_storage_loader import _init_firebase_storage
|
| 18 |
|
| 19 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
|
| 22 |
def _resolve_dest_dir() -> Path:
|
|
@@ -27,8 +72,7 @@ def _resolve_dest_dir() -> Path:
|
|
| 27 |
|
| 28 |
|
| 29 |
def download_vectorstore(dest_dir: Path, prefix: str = REMOTE_PREFIX):
|
| 30 |
-
|
| 31 |
-
_, bucket = _init_firebase_storage()
|
| 32 |
if bucket is None:
|
| 33 |
logger.warning("Firebase Storage not available, vectorstore download skipped")
|
| 34 |
return False
|
|
@@ -41,6 +85,7 @@ def download_vectorstore(dest_dir: Path, prefix: str = REMOTE_PREFIX):
|
|
| 41 |
return False
|
| 42 |
|
| 43 |
downloaded = 0
|
|
|
|
| 44 |
errors = 0
|
| 45 |
|
| 46 |
for blob in blobs:
|
|
@@ -54,6 +99,7 @@ def download_vectorstore(dest_dir: Path, prefix: str = REMOTE_PREFIX):
|
|
| 54 |
try:
|
| 55 |
if local_path.exists() and blob.size is not None and local_path.stat().st_size == blob.size:
|
| 56 |
logger.info("Skipped (already up-to-date): %s", blob.name)
|
|
|
|
| 57 |
continue
|
| 58 |
blob.download_to_filename(str(local_path))
|
| 59 |
logger.info("Downloaded: %s (%d bytes)", blob.name, blob.size or 0)
|
|
@@ -62,15 +108,11 @@ def download_vectorstore(dest_dir: Path, prefix: str = REMOTE_PREFIX):
|
|
| 62 |
logger.error("Failed to download %s: %s", blob.name, e)
|
| 63 |
errors += 1
|
| 64 |
|
| 65 |
-
logger.info("Download complete: %d
|
| 66 |
return errors == 0
|
| 67 |
|
| 68 |
|
| 69 |
if __name__ == "__main__":
|
| 70 |
-
import firebase_admin
|
| 71 |
-
print("DEBUG: firebase_admin location:", firebase_admin.__file__)
|
| 72 |
-
print("DEBUG: firebase_admin apps:", firebase_admin._apps)
|
| 73 |
-
print("DEBUG: FIREBASE_SERVICE_ACCOUNT_JSON set:", bool(firebase_admin._GLOBAL_APP is None))
|
| 74 |
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
|
| 75 |
dest_dir = _resolve_dest_dir()
|
| 76 |
print(f"INFO: Using vectorstore destination: {dest_dir}")
|
|
|
|
| 1 |
"""
|
| 2 |
Download vectorstore directory from Firebase Storage at container startup.
|
| 3 |
+
Run: python /app/scripts/download_vectorstore_from_firebase.py
|
| 4 |
"""
|
| 5 |
|
| 6 |
from __future__ import annotations
|
| 7 |
|
| 8 |
+
import json
|
| 9 |
import logging
|
| 10 |
import os
|
| 11 |
import sys
|
|
|
|
| 13 |
|
| 14 |
logger = logging.getLogger("mathpulse.download_vectorstore")
|
| 15 |
|
| 16 |
+
REMOTE_PREFIX = "vectorstore/"
|
| 17 |
+
_FIREBASE_INITIALIZED = False
|
| 18 |
|
|
|
|
| 19 |
|
| 20 |
+
def _init_firebase() -> any | None:
|
| 21 |
+
global _FIREBASE_INITIALIZED
|
| 22 |
+
|
| 23 |
+
if _FIREBASE_INITIALIZED:
|
| 24 |
+
try:
|
| 25 |
+
from firebase_admin import storage as fb_storage
|
| 26 |
+
return fb_storage.bucket()
|
| 27 |
+
except Exception as e:
|
| 28 |
+
logger.warning("Firebase storage unavailable: %s", e)
|
| 29 |
+
_FIREBASE_INITIALIZED = False
|
| 30 |
+
return None
|
| 31 |
+
|
| 32 |
+
try:
|
| 33 |
+
import firebase_admin
|
| 34 |
+
from firebase_admin import credentials, storage
|
| 35 |
+
except ImportError:
|
| 36 |
+
logger.warning("firebase_admin not installed")
|
| 37 |
+
return None
|
| 38 |
+
|
| 39 |
+
if firebase_admin._apps:
|
| 40 |
+
_FIREBASE_INITIALIZED = True
|
| 41 |
+
try:
|
| 42 |
+
return storage.bucket()
|
| 43 |
+
except Exception as e:
|
| 44 |
+
logger.warning("Firebase storage bucket unavailable: %s", e)
|
| 45 |
+
return None
|
| 46 |
+
|
| 47 |
+
sa_json = os.getenv("FIREBASE_SERVICE_ACCOUNT_JSON")
|
| 48 |
+
sa_file = os.getenv("FIREBASE_SERVICE_ACCOUNT_FILE")
|
| 49 |
+
bucket_name = os.getenv("FIREBASE_STORAGE_BUCKET", "mathpulse-ai-2026.firebasestorage.app")
|
| 50 |
+
|
| 51 |
+
try:
|
| 52 |
+
if sa_json:
|
| 53 |
+
creds = credentials.Certificate(json.loads(sa_json))
|
| 54 |
+
elif sa_file and Path(sa_file).exists():
|
| 55 |
+
creds = credentials.Certificate(sa_file)
|
| 56 |
+
else:
|
| 57 |
+
creds = credentials.ApplicationDefault()
|
| 58 |
+
|
| 59 |
+
firebase_admin.initialize_app(creds, {"storageBucket": bucket_name})
|
| 60 |
+
_FIREBASE_INITIALIZED = True
|
| 61 |
+
return storage.bucket()
|
| 62 |
+
except Exception as e:
|
| 63 |
+
logger.error("Firebase init failed: %s", e)
|
| 64 |
+
return None
|
| 65 |
|
| 66 |
|
| 67 |
def _resolve_dest_dir() -> Path:
|
|
|
|
| 72 |
|
| 73 |
|
| 74 |
def download_vectorstore(dest_dir: Path, prefix: str = REMOTE_PREFIX):
|
| 75 |
+
bucket = _init_firebase()
|
|
|
|
| 76 |
if bucket is None:
|
| 77 |
logger.warning("Firebase Storage not available, vectorstore download skipped")
|
| 78 |
return False
|
|
|
|
| 85 |
return False
|
| 86 |
|
| 87 |
downloaded = 0
|
| 88 |
+
skipped = 0
|
| 89 |
errors = 0
|
| 90 |
|
| 91 |
for blob in blobs:
|
|
|
|
| 99 |
try:
|
| 100 |
if local_path.exists() and blob.size is not None and local_path.stat().st_size == blob.size:
|
| 101 |
logger.info("Skipped (already up-to-date): %s", blob.name)
|
| 102 |
+
skipped += 1
|
| 103 |
continue
|
| 104 |
blob.download_to_filename(str(local_path))
|
| 105 |
logger.info("Downloaded: %s (%d bytes)", blob.name, blob.size or 0)
|
|
|
|
| 108 |
logger.error("Failed to download %s: %s", blob.name, e)
|
| 109 |
errors += 1
|
| 110 |
|
| 111 |
+
logger.info("Download complete: %d downloaded, %d skipped, %d errors", downloaded, skipped, errors)
|
| 112 |
return errors == 0
|
| 113 |
|
| 114 |
|
| 115 |
if __name__ == "__main__":
|
|
|
|
|
|
|
|
|
|
|
|
|
| 116 |
logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
|
| 117 |
dest_dir = _resolve_dest_dir()
|
| 118 |
print(f"INFO: Using vectorstore destination: {dest_dir}")
|
startup.sh
CHANGED
|
@@ -18,13 +18,11 @@ echo "MathPulse AI Startup"
|
|
| 18 |
echo "=========================================="
|
| 19 |
echo "VECTORSTORE_DIR=${VECTORSTORE_DIR}"
|
| 20 |
echo "CURRICULUM_VECTORSTORE_DIR=${CURRICULUM_VECTORSTORE_DIR}"
|
|
|
|
| 21 |
echo "FIREBASE_SERVICE_ACCOUNT_JSON set: $(if [ -n "${FIREBASE_SERVICE_ACCOUNT_JSON:-}" ]; then echo YES; else echo NO; fi)"
|
| 22 |
echo "FIREBASE_STORAGE_BUCKET=${FIREBASE_STORAGE_BUCKET:-not set}"
|
| 23 |
echo "=========================================="
|
| 24 |
|
| 25 |
-
echo "Resolved VECTORSTORE_DIR=${VECTORSTORE_DIR}"
|
| 26 |
-
echo "Resolved CURRICULUM_VECTORSTORE_DIR=${CURRICULUM_VECTORSTORE_DIR}"
|
| 27 |
-
|
| 28 |
mkdir -p "${CURRICULUM_DIR}" "${VECTORSTORE_DIR}"
|
| 29 |
|
| 30 |
_vectorstore_cache_dir="${VECTORSTORE_DIR}/.chroma"
|
|
@@ -35,7 +33,11 @@ fi
|
|
| 35 |
|
| 36 |
_ingest_script="/app/scripts/ingest_curriculum.py"
|
| 37 |
if [ -f "${_ingest_script}" ]; then
|
| 38 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
echo "INFO: Running curriculum ingestion (optional)..."
|
| 40 |
python "${_ingest_script}" && echo "INFO: Curriculum ingestion completed" || echo "WARNING: Curriculum ingestion failed, continuing anyway"
|
| 41 |
else
|
|
@@ -47,12 +49,15 @@ fi
|
|
| 47 |
|
| 48 |
_vectorstore_download_script="/app/scripts/download_vectorstore_from_firebase.py"
|
| 49 |
if [ -f "${_vectorstore_download_script}" ]; then
|
| 50 |
-
echo "INFO: Vectorstore files present before download:"
|
| 51 |
ls -la "${VECTORSTORE_DIR}/"
|
| 52 |
-
echo "INFO: Vectorstore download script path: ${_vectorstore_download_script}"
|
| 53 |
-
echo "INFO: CURRICULUM_VECTORSTORE_DIR at download time: ${CURRICULUM_VECTORSTORE_DIR}"
|
| 54 |
echo "INFO: Downloading vectorstore from Firebase Storage..."
|
| 55 |
-
python "${_vectorstore_download_script}" ||
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
echo "INFO: Vectorstore files present after download:"
|
| 57 |
ls -la "${VECTORSTORE_DIR}/"
|
| 58 |
_vectorstore_summary_file="${VECTORSTORE_DIR}/ingest_summary.json"
|
|
@@ -65,4 +70,4 @@ else
|
|
| 65 |
echo "INFO: Vectorstore download script not found at ${_vectorstore_download_script}; skipping"
|
| 66 |
fi
|
| 67 |
|
| 68 |
-
exec uvicorn main:app --host 0.0.0.0 --port 7860 --workers 1
|
|
|
|
| 18 |
echo "=========================================="
|
| 19 |
echo "VECTORSTORE_DIR=${VECTORSTORE_DIR}"
|
| 20 |
echo "CURRICULUM_VECTORSTORE_DIR=${CURRICULUM_VECTORSTORE_DIR}"
|
| 21 |
+
echo "CURRICULUM_SOURCE_REPO_ID set: $(if [ -n "${CURRICULUM_SOURCE_REPO_ID:-}" ]; then echo YES; else echo NO; fi)"
|
| 22 |
echo "FIREBASE_SERVICE_ACCOUNT_JSON set: $(if [ -n "${FIREBASE_SERVICE_ACCOUNT_JSON:-}" ]; then echo YES; else echo NO; fi)"
|
| 23 |
echo "FIREBASE_STORAGE_BUCKET=${FIREBASE_STORAGE_BUCKET:-not set}"
|
| 24 |
echo "=========================================="
|
| 25 |
|
|
|
|
|
|
|
|
|
|
| 26 |
mkdir -p "${CURRICULUM_DIR}" "${VECTORSTORE_DIR}"
|
| 27 |
|
| 28 |
_vectorstore_cache_dir="${VECTORSTORE_DIR}/.chroma"
|
|
|
|
| 33 |
|
| 34 |
_ingest_script="/app/scripts/ingest_curriculum.py"
|
| 35 |
if [ -f "${_ingest_script}" ]; then
|
| 36 |
+
_has_pdfs=false
|
| 37 |
+
if [ -d "${CURRICULUM_DIR}" ] && find "${CURRICULUM_DIR}" -type f -name '*.pdf' -print -quit >/dev/null 2>&1; then
|
| 38 |
+
_has_pdfs=true
|
| 39 |
+
fi
|
| 40 |
+
if [ "${_has_pdfs}" = true ] || [ -n "${CURRICULUM_SOURCE_REPO_ID:-}" ]; then
|
| 41 |
echo "INFO: Running curriculum ingestion (optional)..."
|
| 42 |
python "${_ingest_script}" && echo "INFO: Curriculum ingestion completed" || echo "WARNING: Curriculum ingestion failed, continuing anyway"
|
| 43 |
else
|
|
|
|
| 49 |
|
| 50 |
_vectorstore_download_script="/app/scripts/download_vectorstore_from_firebase.py"
|
| 51 |
if [ -f "${_vectorstore_download_script}" ]; then
|
| 52 |
+
echo "INFO: Vectorstore files present before download:"
|
| 53 |
ls -la "${VECTORSTORE_DIR}/"
|
|
|
|
|
|
|
| 54 |
echo "INFO: Downloading vectorstore from Firebase Storage..."
|
| 55 |
+
python "${_vectorstore_download_script}" && _result=0 || _result=1
|
| 56 |
+
if [ $_result -eq 0 ]; then
|
| 57 |
+
echo "INFO: Vectorstore download succeeded"
|
| 58 |
+
else
|
| 59 |
+
echo "WARNING: Vectorstore download failed, continuing anyway"
|
| 60 |
+
fi
|
| 61 |
echo "INFO: Vectorstore files present after download:"
|
| 62 |
ls -la "${VECTORSTORE_DIR}/"
|
| 63 |
_vectorstore_summary_file="${VECTORSTORE_DIR}/ingest_summary.json"
|
|
|
|
| 70 |
echo "INFO: Vectorstore download script not found at ${_vectorstore_download_script}; skipping"
|
| 71 |
fi
|
| 72 |
|
| 73 |
+
exec uvicorn main:app --host 0.0.0.0 --port 7860 --workers 1
|