MarcosFRGames commited on
Commit
f5bb8ea
verified
1 Parent(s): 2d16631

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +25 -15
app.py CHANGED
@@ -7,6 +7,7 @@ import requests
7
  import tempfile
8
  import json
9
  from concurrent.futures import ThreadPoolExecutor
 
10
 
11
  app = Flask(__name__)
12
  logging.basicConfig(level=logging.INFO)
@@ -22,7 +23,7 @@ class LLMManager:
22
  self.models = {}
23
  self.models_config = models_config
24
  self.executor = ThreadPoolExecutor(max_workers=2)
25
- self.generation_lock = theading.Lock()
26
  self.load_all_models()
27
 
28
  def load_all_models(self):
@@ -42,7 +43,7 @@ class LLMManager:
42
  llm_instance = Llama(
43
  model_path=temp_path,
44
  n_ctx=MAX_CONTEXT_TOKENS,
45
- n_batch=128,
46
  n_threads=2,
47
  n_threads_batch=2,
48
  use_mlock=True,
@@ -106,26 +107,35 @@ class LLMManager:
106
  if model_data and "error" in model_data:
107
  error_msg += f": {model_data['error']}"
108
  return {"error": error_msg}
109
-
110
- response = model_data["instance"].create_chat_completion(
111
- messages=messages,
112
- **kwargs
113
- )
 
 
 
 
 
 
 
 
 
114
 
115
  response["provider"] = "telechars-ai"
116
  response["model"] = model_name
117
  return response
118
 
119
  finally:
120
- # Siempre liberar el lock
121
  self.generation_lock.release()
122
- def get_loaded_models(self):
123
- """Obtener lista de modelos cargados"""
124
- loaded = []
125
- for name, data in self.models.items():
126
- if data["loaded"]:
127
- loaded.append(name)
128
- return loaded
 
129
 
130
  def get_all_models_status(self):
131
  """Obtener estado de todos los modelos"""
 
7
  import tempfile
8
  import json
9
  from concurrent.futures import ThreadPoolExecutor
10
+ import signal
11
 
12
  app = Flask(__name__)
13
  logging.basicConfig(level=logging.INFO)
 
23
  self.models = {}
24
  self.models_config = models_config
25
  self.executor = ThreadPoolExecutor(max_workers=2)
26
+ self.generation_lock = threading.Lock()
27
  self.load_all_models()
28
 
29
  def load_all_models(self):
 
43
  llm_instance = Llama(
44
  model_path=temp_path,
45
  n_ctx=MAX_CONTEXT_TOKENS,
46
+ n_batch=96,
47
  n_threads=2,
48
  n_threads_batch=2,
49
  use_mlock=True,
 
107
  if model_data and "error" in model_data:
108
  error_msg += f": {model_data['error']}"
109
  return {"error": error_msg}
110
+
111
+ def timeout_handler(signum, frame):
112
+ raise TimeoutError("Timeout en generaci贸n (120 segundos)")
113
+
114
+ signal.signal(signal.SIGALRM, timeout_handler)
115
+ signal.alarm(120)
116
+
117
+ try:
118
+ response = model_data["instance"].create_chat_completion(
119
+ messages=messages,
120
+ **kwargs
121
+ )
122
+ finally:
123
+ signal.alarm(0)
124
 
125
  response["provider"] = "telechars-ai"
126
  response["model"] = model_name
127
  return response
128
 
129
  finally:
 
130
  self.generation_lock.release()
131
+
132
+ def get_loaded_models(self):
133
+ """Obtener lista de modelos cargados"""
134
+ loaded = []
135
+ for name, data in self.models.items():
136
+ if data["loaded"]:
137
+ loaded.append(name)
138
+ return loaded
139
 
140
  def get_all_models_status(self):
141
  """Obtener estado de todos los modelos"""