github-actions[bot] commited on
Commit
36e0175
·
1 Parent(s): 483b9cf

🚀 Auto-deploy backend from GitHub (c322c5f)

Browse files
rag/firebase_storage_loader.py CHANGED
@@ -17,6 +17,7 @@ _FIREBASE_INITIALIZED = False
17
 
18
  def _init_firebase_storage() -> Tuple[any, any]:
19
  global _FIREBASE_INITIALIZED
 
20
  if _FIREBASE_INITIALIZED:
21
  try:
22
  from firebase_admin import storage as fb_storage
@@ -24,6 +25,7 @@ def _init_firebase_storage() -> Tuple[any, any]:
24
  return fb_storage, bucket
25
  except Exception as e:
26
  logger.warning("Firebase storage unavailable: %s", e)
 
27
  return None, None
28
 
29
  try:
@@ -35,8 +37,12 @@ def _init_firebase_storage() -> Tuple[any, any]:
35
 
36
  if firebase_admin._apps:
37
  _FIREBASE_INITIALIZED = True
38
- bucket = storage.bucket()
39
- return storage, bucket
 
 
 
 
40
 
41
  sa_json = os.getenv("FIREBASE_SERVICE_ACCOUNT_JSON")
42
  sa_file = os.getenv("FIREBASE_SERVICE_ACCOUNT_FILE")
 
17
 
18
  def _init_firebase_storage() -> Tuple[any, any]:
19
  global _FIREBASE_INITIALIZED
20
+
21
  if _FIREBASE_INITIALIZED:
22
  try:
23
  from firebase_admin import storage as fb_storage
 
25
  return fb_storage, bucket
26
  except Exception as e:
27
  logger.warning("Firebase storage unavailable: %s", e)
28
+ _FIREBASE_INITIALIZED = False
29
  return None, None
30
 
31
  try:
 
37
 
38
  if firebase_admin._apps:
39
  _FIREBASE_INITIALIZED = True
40
+ try:
41
+ bucket = storage.bucket()
42
+ return storage, bucket
43
+ except Exception as e:
44
+ logger.warning("Firebase storage bucket unavailable: %s", e)
45
+ return None, None
46
 
47
  sa_json = os.getenv("FIREBASE_SERVICE_ACCOUNT_JSON")
48
  sa_file = os.getenv("FIREBASE_SERVICE_ACCOUNT_FILE")
scripts/download_vectorstore_from_firebase.py CHANGED
@@ -1,10 +1,11 @@
1
  """
2
  Download vectorstore directory from Firebase Storage at container startup.
3
- Run: python -m backend.scripts.download_vectorstore_from_firebase
4
  """
5
 
6
  from __future__ import annotations
7
 
 
8
  import logging
9
  import os
10
  import sys
@@ -12,11 +13,55 @@ from pathlib import Path
12
 
13
  logger = logging.getLogger("mathpulse.download_vectorstore")
14
 
15
- sys.path.insert(0, str(Path(__file__).resolve().parents[2]))
 
16
 
17
- from backend.rag.firebase_storage_loader import _init_firebase_storage
18
 
19
- REMOTE_PREFIX = "vectorstore/"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
 
21
 
22
  def _resolve_dest_dir() -> Path:
@@ -27,8 +72,7 @@ def _resolve_dest_dir() -> Path:
27
 
28
 
29
  def download_vectorstore(dest_dir: Path, prefix: str = REMOTE_PREFIX):
30
- """Download all files under a prefix from Firebase Storage, preserving structure."""
31
- _, bucket = _init_firebase_storage()
32
  if bucket is None:
33
  logger.warning("Firebase Storage not available, vectorstore download skipped")
34
  return False
@@ -41,6 +85,7 @@ def download_vectorstore(dest_dir: Path, prefix: str = REMOTE_PREFIX):
41
  return False
42
 
43
  downloaded = 0
 
44
  errors = 0
45
 
46
  for blob in blobs:
@@ -54,6 +99,7 @@ def download_vectorstore(dest_dir: Path, prefix: str = REMOTE_PREFIX):
54
  try:
55
  if local_path.exists() and blob.size is not None and local_path.stat().st_size == blob.size:
56
  logger.info("Skipped (already up-to-date): %s", blob.name)
 
57
  continue
58
  blob.download_to_filename(str(local_path))
59
  logger.info("Downloaded: %s (%d bytes)", blob.name, blob.size or 0)
@@ -62,15 +108,11 @@ def download_vectorstore(dest_dir: Path, prefix: str = REMOTE_PREFIX):
62
  logger.error("Failed to download %s: %s", blob.name, e)
63
  errors += 1
64
 
65
- logger.info("Download complete: %d files downloaded, %d errors", downloaded, errors)
66
  return errors == 0
67
 
68
 
69
  if __name__ == "__main__":
70
- import firebase_admin
71
- print("DEBUG: firebase_admin location:", firebase_admin.__file__)
72
- print("DEBUG: firebase_admin apps:", firebase_admin._apps)
73
- print("DEBUG: FIREBASE_SERVICE_ACCOUNT_JSON set:", bool(firebase_admin._GLOBAL_APP is None))
74
  logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
75
  dest_dir = _resolve_dest_dir()
76
  print(f"INFO: Using vectorstore destination: {dest_dir}")
 
1
  """
2
  Download vectorstore directory from Firebase Storage at container startup.
3
+ Run: python /app/scripts/download_vectorstore_from_firebase.py
4
  """
5
 
6
  from __future__ import annotations
7
 
8
+ import json
9
  import logging
10
  import os
11
  import sys
 
13
 
14
  logger = logging.getLogger("mathpulse.download_vectorstore")
15
 
16
+ REMOTE_PREFIX = "vectorstore/"
17
+ _FIREBASE_INITIALIZED = False
18
 
 
19
 
20
+ def _init_firebase() -> any | None:
21
+ global _FIREBASE_INITIALIZED
22
+
23
+ if _FIREBASE_INITIALIZED:
24
+ try:
25
+ from firebase_admin import storage as fb_storage
26
+ return fb_storage.bucket()
27
+ except Exception as e:
28
+ logger.warning("Firebase storage unavailable: %s", e)
29
+ _FIREBASE_INITIALIZED = False
30
+ return None
31
+
32
+ try:
33
+ import firebase_admin
34
+ from firebase_admin import credentials, storage
35
+ except ImportError:
36
+ logger.warning("firebase_admin not installed")
37
+ return None
38
+
39
+ if firebase_admin._apps:
40
+ _FIREBASE_INITIALIZED = True
41
+ try:
42
+ return storage.bucket()
43
+ except Exception as e:
44
+ logger.warning("Firebase storage bucket unavailable: %s", e)
45
+ return None
46
+
47
+ sa_json = os.getenv("FIREBASE_SERVICE_ACCOUNT_JSON")
48
+ sa_file = os.getenv("FIREBASE_SERVICE_ACCOUNT_FILE")
49
+ bucket_name = os.getenv("FIREBASE_STORAGE_BUCKET", "mathpulse-ai-2026.firebasestorage.app")
50
+
51
+ try:
52
+ if sa_json:
53
+ creds = credentials.Certificate(json.loads(sa_json))
54
+ elif sa_file and Path(sa_file).exists():
55
+ creds = credentials.Certificate(sa_file)
56
+ else:
57
+ creds = credentials.ApplicationDefault()
58
+
59
+ firebase_admin.initialize_app(creds, {"storageBucket": bucket_name})
60
+ _FIREBASE_INITIALIZED = True
61
+ return storage.bucket()
62
+ except Exception as e:
63
+ logger.error("Firebase init failed: %s", e)
64
+ return None
65
 
66
 
67
  def _resolve_dest_dir() -> Path:
 
72
 
73
 
74
  def download_vectorstore(dest_dir: Path, prefix: str = REMOTE_PREFIX):
75
+ bucket = _init_firebase()
 
76
  if bucket is None:
77
  logger.warning("Firebase Storage not available, vectorstore download skipped")
78
  return False
 
85
  return False
86
 
87
  downloaded = 0
88
+ skipped = 0
89
  errors = 0
90
 
91
  for blob in blobs:
 
99
  try:
100
  if local_path.exists() and blob.size is not None and local_path.stat().st_size == blob.size:
101
  logger.info("Skipped (already up-to-date): %s", blob.name)
102
+ skipped += 1
103
  continue
104
  blob.download_to_filename(str(local_path))
105
  logger.info("Downloaded: %s (%d bytes)", blob.name, blob.size or 0)
 
108
  logger.error("Failed to download %s: %s", blob.name, e)
109
  errors += 1
110
 
111
+ logger.info("Download complete: %d downloaded, %d skipped, %d errors", downloaded, skipped, errors)
112
  return errors == 0
113
 
114
 
115
  if __name__ == "__main__":
 
 
 
 
116
  logging.basicConfig(level=logging.INFO, format="%(levelname)s: %(message)s")
117
  dest_dir = _resolve_dest_dir()
118
  print(f"INFO: Using vectorstore destination: {dest_dir}")
startup.sh CHANGED
@@ -18,13 +18,11 @@ echo "MathPulse AI Startup"
18
  echo "=========================================="
19
  echo "VECTORSTORE_DIR=${VECTORSTORE_DIR}"
20
  echo "CURRICULUM_VECTORSTORE_DIR=${CURRICULUM_VECTORSTORE_DIR}"
 
21
  echo "FIREBASE_SERVICE_ACCOUNT_JSON set: $(if [ -n "${FIREBASE_SERVICE_ACCOUNT_JSON:-}" ]; then echo YES; else echo NO; fi)"
22
  echo "FIREBASE_STORAGE_BUCKET=${FIREBASE_STORAGE_BUCKET:-not set}"
23
  echo "=========================================="
24
 
25
- echo "Resolved VECTORSTORE_DIR=${VECTORSTORE_DIR}"
26
- echo "Resolved CURRICULUM_VECTORSTORE_DIR=${CURRICULUM_VECTORSTORE_DIR}"
27
-
28
  mkdir -p "${CURRICULUM_DIR}" "${VECTORSTORE_DIR}"
29
 
30
  _vectorstore_cache_dir="${VECTORSTORE_DIR}/.chroma"
@@ -35,7 +33,11 @@ fi
35
 
36
  _ingest_script="/app/scripts/ingest_curriculum.py"
37
  if [ -f "${_ingest_script}" ]; then
38
- if [ -n "${CURRICULUM_SOURCE_REPO_ID:-}" ] || find "${CURRICULUM_DIR}" -type f -name '*.pdf' -print -quit >/dev/null 2>&1; then
 
 
 
 
39
  echo "INFO: Running curriculum ingestion (optional)..."
40
  python "${_ingest_script}" && echo "INFO: Curriculum ingestion completed" || echo "WARNING: Curriculum ingestion failed, continuing anyway"
41
  else
@@ -47,12 +49,15 @@ fi
47
 
48
  _vectorstore_download_script="/app/scripts/download_vectorstore_from_firebase.py"
49
  if [ -f "${_vectorstore_download_script}" ]; then
50
- echo "INFO: Vectorstore files present before download:"
51
  ls -la "${VECTORSTORE_DIR}/"
52
- echo "INFO: Vectorstore download script path: ${_vectorstore_download_script}"
53
- echo "INFO: CURRICULUM_VECTORSTORE_DIR at download time: ${CURRICULUM_VECTORSTORE_DIR}"
54
  echo "INFO: Downloading vectorstore from Firebase Storage..."
55
- python "${_vectorstore_download_script}" || echo "WARNING: Vectorstore download failed, continuing anyway"
 
 
 
 
 
56
  echo "INFO: Vectorstore files present after download:"
57
  ls -la "${VECTORSTORE_DIR}/"
58
  _vectorstore_summary_file="${VECTORSTORE_DIR}/ingest_summary.json"
@@ -65,4 +70,4 @@ else
65
  echo "INFO: Vectorstore download script not found at ${_vectorstore_download_script}; skipping"
66
  fi
67
 
68
- exec uvicorn main:app --host 0.0.0.0 --port 7860 --workers 1
 
18
  echo "=========================================="
19
  echo "VECTORSTORE_DIR=${VECTORSTORE_DIR}"
20
  echo "CURRICULUM_VECTORSTORE_DIR=${CURRICULUM_VECTORSTORE_DIR}"
21
+ echo "CURRICULUM_SOURCE_REPO_ID set: $(if [ -n "${CURRICULUM_SOURCE_REPO_ID:-}" ]; then echo YES; else echo NO; fi)"
22
  echo "FIREBASE_SERVICE_ACCOUNT_JSON set: $(if [ -n "${FIREBASE_SERVICE_ACCOUNT_JSON:-}" ]; then echo YES; else echo NO; fi)"
23
  echo "FIREBASE_STORAGE_BUCKET=${FIREBASE_STORAGE_BUCKET:-not set}"
24
  echo "=========================================="
25
 
 
 
 
26
  mkdir -p "${CURRICULUM_DIR}" "${VECTORSTORE_DIR}"
27
 
28
  _vectorstore_cache_dir="${VECTORSTORE_DIR}/.chroma"
 
33
 
34
  _ingest_script="/app/scripts/ingest_curriculum.py"
35
  if [ -f "${_ingest_script}" ]; then
36
+ _has_pdfs=false
37
+ if [ -d "${CURRICULUM_DIR}" ] && find "${CURRICULUM_DIR}" -type f -name '*.pdf' -print -quit >/dev/null 2>&1; then
38
+ _has_pdfs=true
39
+ fi
40
+ if [ "${_has_pdfs}" = true ] || [ -n "${CURRICULUM_SOURCE_REPO_ID:-}" ]; then
41
  echo "INFO: Running curriculum ingestion (optional)..."
42
  python "${_ingest_script}" && echo "INFO: Curriculum ingestion completed" || echo "WARNING: Curriculum ingestion failed, continuing anyway"
43
  else
 
49
 
50
  _vectorstore_download_script="/app/scripts/download_vectorstore_from_firebase.py"
51
  if [ -f "${_vectorstore_download_script}" ]; then
52
+ echo "INFO: Vectorstore files present before download:"
53
  ls -la "${VECTORSTORE_DIR}/"
 
 
54
  echo "INFO: Downloading vectorstore from Firebase Storage..."
55
+ python "${_vectorstore_download_script}" && _result=0 || _result=1
56
+ if [ $_result -eq 0 ]; then
57
+ echo "INFO: Vectorstore download succeeded"
58
+ else
59
+ echo "WARNING: Vectorstore download failed, continuing anyway"
60
+ fi
61
  echo "INFO: Vectorstore files present after download:"
62
  ls -la "${VECTORSTORE_DIR}/"
63
  _vectorstore_summary_file="${VECTORSTORE_DIR}/ingest_summary.json"
 
70
  echo "INFO: Vectorstore download script not found at ${_vectorstore_download_script}; skipping"
71
  fi
72
 
73
+ exec uvicorn main:app --host 0.0.0.0 --port 7860 --workers 1