Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -873,8 +873,8 @@ class ZeroEngine:
|
|
| 873 |
logger.warning(f"[BOOT] Cleanup warning: {e}")
|
| 874 |
|
| 875 |
# Calculate optimal parameters with token purchases
|
| 876 |
-
|
| 877 |
-
available_ram_gb =
|
| 878 |
|
| 879 |
# CPU-OPTIMIZED BATCH CALCULATION - Very aggressive for 16GB RAM
|
| 880 |
# Base calculation: use more RAM for batching on CPU
|
|
@@ -888,7 +888,7 @@ class ZeroEngine:
|
|
| 888 |
logger.info(f"[TOKEN] User batch size: {user_batch_size}")
|
| 889 |
|
| 890 |
# CPU can handle larger batches with quantized models
|
| 891 |
-
optimal_batch = max(256, min(
|
| 892 |
|
| 893 |
# Context size
|
| 894 |
optimal_ctx = quant_config["ctx_size"]
|
|
|
|
| 873 |
logger.warning(f"[BOOT] Cleanup warning: {e}")
|
| 874 |
|
| 875 |
# Calculate optimal parameters with token purchases
|
| 876 |
+
# Force use 16GB RAM instead of incorrect system detection
|
| 877 |
+
available_ram_gb = 16.0 * 0.7 # 70% of 16GB = ~11.2GB available
|
| 878 |
|
| 879 |
# CPU-OPTIMIZED BATCH CALCULATION - Very aggressive for 16GB RAM
|
| 880 |
# Base calculation: use more RAM for batching on CPU
|
|
|
|
| 888 |
logger.info(f"[TOKEN] User batch size: {user_batch_size}")
|
| 889 |
|
| 890 |
# CPU can handle larger batches with quantized models
|
| 891 |
+
optimal_batch = max(256, min(1024, optimal_batch)) # 256-1024 range for CPU (balanced performance)
|
| 892 |
|
| 893 |
# Context size
|
| 894 |
optimal_ctx = quant_config["ctx_size"]
|