Davidtran99 commited on
Commit
07a574b
·
1 Parent(s): 18d99df

fix: disable all model preload to prevent CPU crash

Browse files
Files changed (1) hide show
  1. entrypoint.sh +2 -31
entrypoint.sh CHANGED
@@ -31,37 +31,8 @@ log "Ensuring cache table exists..."
31
  python hue_portal/manage.py createcachetable
32
  log "Cache table ready."
33
 
34
- log "Preloading all models to avoid first-request timeout..."
35
-
36
- python -c "
37
- import os
38
- import sys
39
- os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'hue_portal.hue_portal.settings')
40
- import django
41
- django.setup()
42
-
43
- print('[ENTRYPOINT] 🔄 Starting model preload...', flush=True)
44
-
45
- # 1. Preload Embedding Model (BGE-M3)
46
- try:
47
- print('[ENTRYPOINT] 📦 Preloading embedding model (BGE-M3)...', flush=True)
48
- from hue_portal.core.embeddings import get_embedding_model
49
- embedding_model = get_embedding_model()
50
- if embedding_model:
51
- print('[ENTRYPOINT] ✅ Embedding model preloaded successfully', flush=True)
52
- else:
53
- print('[ENTRYPOINT] ⚠️ Embedding model not loaded', flush=True)
54
- except Exception as e:
55
- print(f'[ENTRYPOINT] ⚠️ Embedding model preload failed: {e}', flush=True)
56
-
57
- # 2. SKIP LLM Preload (CPU optimization - too heavy, will load on first request)
58
- print('[ENTRYPOINT] ⏭️ Skipping LLM preload (CPU optimization - will load lazily on first request)', flush=True)
59
-
60
- # 3. SKIP Reranker Preload (CPU optimization - too heavy, will load on first request)
61
- print('[ENTRYPOINT] ⏭️ Skipping reranker preload (CPU optimization - will load lazily on first request)', flush=True)
62
-
63
- print('[ENTRYPOINT] ✅ Model preload completed', flush=True) # v2.0-preload-all
64
- " || log "⚠️ Model preload had errors (models will load on first request)"
65
 
66
  log "Starting Gunicorn on port ${PORT:-7860}..."
67
 
 
31
  python hue_portal/manage.py createcachetable
32
  log "Cache table ready."
33
 
34
+ # Skip model preload to prevent CPU overload (models will load lazily on first request)
35
+ log "⏭️ Skipping model preload (CPU optimization - models will load on first request)"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
 
37
  log "Starting Gunicorn on port ${PORT:-7860}..."
38