Spaces:
Paused
Paused
gary-boon Claude Opus 4.6 (1M context) commited on
Commit ·
6c5265e
1
Parent(s): 82349c1
Fix model selector not showing loaded GPU model on CPU-detected hardware
Browse filesThe /models endpoint filtered models by hardware capability, hiding
GPU-only models (Devstral) when CUDA init failed (e.g. driver too old).
But the model was already loaded and running on CPU successfully.
Now always includes the currently loaded model in the available list,
regardless of hardware detection. If it's running, it's available.
Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
- backend/model_service.py +9 -3
backend/model_service.py
CHANGED
|
@@ -1311,17 +1311,23 @@ async def list_models():
|
|
| 1311 |
if has_gpu and torch.cuda.is_available():
|
| 1312 |
available_vram = torch.cuda.get_device_properties(0).total_memory / (1024**3) # GB
|
| 1313 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1314 |
models = []
|
| 1315 |
for model_id, config in SUPPORTED_MODELS.items():
|
| 1316 |
model_min_device = config.get("min_device", "cpu")
|
| 1317 |
|
| 1318 |
-
# GPU
|
| 1319 |
-
|
| 1320 |
-
if device_type == "gpu" or model_min_device == "cpu":
|
| 1321 |
# Check VRAM requirements for GPU models
|
| 1322 |
is_available = True
|
| 1323 |
if has_gpu and available_vram > 0 and available_vram < config["min_vram_gb"]:
|
| 1324 |
is_available = False
|
|
|
|
|
|
|
|
|
|
| 1325 |
|
| 1326 |
models.append({
|
| 1327 |
"id": model_id,
|
|
|
|
| 1311 |
if has_gpu and torch.cuda.is_available():
|
| 1312 |
available_vram = torch.cuda.get_device_properties(0).total_memory / (1024**3) # GB
|
| 1313 |
|
| 1314 |
+
# Always include the currently loaded model (it's running, regardless of
|
| 1315 |
+
# what the hardware check thinks — e.g. CUDA driver too old but CPU works)
|
| 1316 |
+
current_model_id = manager.model_id if manager else None
|
| 1317 |
+
|
| 1318 |
models = []
|
| 1319 |
for model_id, config in SUPPORTED_MODELS.items():
|
| 1320 |
model_min_device = config.get("min_device", "cpu")
|
| 1321 |
|
| 1322 |
+
# Include if: GPU backend, or CPU-compatible model, or currently loaded
|
| 1323 |
+
if device_type == "gpu" or model_min_device == "cpu" or model_id == current_model_id:
|
|
|
|
| 1324 |
# Check VRAM requirements for GPU models
|
| 1325 |
is_available = True
|
| 1326 |
if has_gpu and available_vram > 0 and available_vram < config["min_vram_gb"]:
|
| 1327 |
is_available = False
|
| 1328 |
+
# Currently loaded model is always available
|
| 1329 |
+
if model_id == current_model_id:
|
| 1330 |
+
is_available = True
|
| 1331 |
|
| 1332 |
models.append({
|
| 1333 |
"id": model_id,
|