Davidtran99
commited on
Commit
·
07a574b
1
Parent(s):
18d99df
fix: disable all model preload to prevent CPU crash
Browse files- entrypoint.sh +2 -31
entrypoint.sh
CHANGED
|
@@ -31,37 +31,8 @@ log "Ensuring cache table exists..."
|
|
| 31 |
python hue_portal/manage.py createcachetable
|
| 32 |
log "Cache table ready."
|
| 33 |
|
| 34 |
-
|
| 35 |
-
|
| 36 |
-
python -c "
|
| 37 |
-
import os
|
| 38 |
-
import sys
|
| 39 |
-
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'hue_portal.hue_portal.settings')
|
| 40 |
-
import django
|
| 41 |
-
django.setup()
|
| 42 |
-
|
| 43 |
-
print('[ENTRYPOINT] 🔄 Starting model preload...', flush=True)
|
| 44 |
-
|
| 45 |
-
# 1. Preload Embedding Model (BGE-M3)
|
| 46 |
-
try:
|
| 47 |
-
print('[ENTRYPOINT] 📦 Preloading embedding model (BGE-M3)...', flush=True)
|
| 48 |
-
from hue_portal.core.embeddings import get_embedding_model
|
| 49 |
-
embedding_model = get_embedding_model()
|
| 50 |
-
if embedding_model:
|
| 51 |
-
print('[ENTRYPOINT] ✅ Embedding model preloaded successfully', flush=True)
|
| 52 |
-
else:
|
| 53 |
-
print('[ENTRYPOINT] ⚠️ Embedding model not loaded', flush=True)
|
| 54 |
-
except Exception as e:
|
| 55 |
-
print(f'[ENTRYPOINT] ⚠️ Embedding model preload failed: {e}', flush=True)
|
| 56 |
-
|
| 57 |
-
# 2. SKIP LLM Preload (CPU optimization - too heavy, will load on first request)
|
| 58 |
-
print('[ENTRYPOINT] ⏭️ Skipping LLM preload (CPU optimization - will load lazily on first request)', flush=True)
|
| 59 |
-
|
| 60 |
-
# 3. SKIP Reranker Preload (CPU optimization - too heavy, will load on first request)
|
| 61 |
-
print('[ENTRYPOINT] ⏭️ Skipping reranker preload (CPU optimization - will load lazily on first request)', flush=True)
|
| 62 |
-
|
| 63 |
-
print('[ENTRYPOINT] ✅ Model preload completed', flush=True) # v2.0-preload-all
|
| 64 |
-
" || log "⚠️ Model preload had errors (models will load on first request)"
|
| 65 |
|
| 66 |
log "Starting Gunicorn on port ${PORT:-7860}..."
|
| 67 |
|
|
|
|
| 31 |
python hue_portal/manage.py createcachetable
|
| 32 |
log "Cache table ready."
|
| 33 |
|
| 34 |
+
# Skip model preload to prevent CPU overload (models will load lazily on first request)
|
| 35 |
+
log "⏭️ Skipping model preload (CPU optimization - models will load on first request)"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
|
| 37 |
log "Starting Gunicorn on port ${PORT:-7860}..."
|
| 38 |
|