Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
|
@@ -22,6 +22,7 @@ class LLMManager:
|
|
| 22 |
self.models = {}
|
| 23 |
self.models_config = models_config
|
| 24 |
self.executor = ThreadPoolExecutor(max_workers=2)
|
|
|
|
| 25 |
self.load_all_models()
|
| 26 |
|
| 27 |
def load_all_models(self):
|
|
@@ -94,30 +95,37 @@ class LLMManager:
|
|
| 94 |
|
| 95 |
def chat_completion(self, model_name, messages, **kwargs):
|
| 96 |
"""Generar respuesta con modelo espec铆fico"""
|
| 97 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 98 |
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
response = model_data["instance"].create_chat_completion(
|
| 106 |
-
messages=messages,
|
| 107 |
-
**kwargs
|
| 108 |
-
)
|
| 109 |
|
| 110 |
-
|
| 111 |
-
|
| 112 |
-
|
|
|
|
| 113 |
|
| 114 |
-
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 121 |
|
| 122 |
def get_all_models_status(self):
|
| 123 |
"""Obtener estado de todos los modelos"""
|
|
|
|
| 22 |
self.models = {}
|
| 23 |
self.models_config = models_config
|
| 24 |
self.executor = ThreadPoolExecutor(max_workers=2)
|
| 25 |
+
self.generation_lock = theading.Lock()
|
| 26 |
self.load_all_models()
|
| 27 |
|
| 28 |
def load_all_models(self):
|
|
|
|
| 95 |
|
| 96 |
def chat_completion(self, model_name, messages, **kwargs):
|
| 97 |
"""Generar respuesta con modelo espec铆fico"""
|
| 98 |
+
if not self.generation_lock.acquire(blocking=False):
|
| 99 |
+
return {"error": "Servidor ocupado - Generaci贸n en progreso"}
|
| 100 |
+
|
| 101 |
+
try:
|
| 102 |
+
model_data = self.get_model(model_name)
|
| 103 |
|
| 104 |
+
if not model_data or not model_data["loaded"]:
|
| 105 |
+
error_msg = f"Modelo {model_name} no cargado"
|
| 106 |
+
if model_data and "error" in model_data:
|
| 107 |
+
error_msg += f": {model_data['error']}"
|
| 108 |
+
return {"error": error_msg}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 109 |
|
| 110 |
+
response = model_data["instance"].create_chat_completion(
|
| 111 |
+
messages=messages,
|
| 112 |
+
**kwargs
|
| 113 |
+
)
|
| 114 |
|
| 115 |
+
response["provider"] = "telechars-ai"
|
| 116 |
+
response["model"] = model_name
|
| 117 |
+
return response
|
| 118 |
+
|
| 119 |
+
finally:
|
| 120 |
+
# Siempre liberar el lock
|
| 121 |
+
self.generation_lock.release()
|
| 122 |
+
def get_loaded_models(self):
|
| 123 |
+
"""Obtener lista de modelos cargados"""
|
| 124 |
+
loaded = []
|
| 125 |
+
for name, data in self.models.items():
|
| 126 |
+
if data["loaded"]:
|
| 127 |
+
loaded.append(name)
|
| 128 |
+
return loaded
|
| 129 |
|
| 130 |
def get_all_models_status(self):
|
| 131 |
"""Obtener estado de todos los modelos"""
|