fuvty commited on
Commit
821387d
·
1 Parent(s): 3cb5609

[debug] zeroGPU

Browse files
Files changed (1) hide show
  1. app.py +20 -6
app.py CHANGED
@@ -106,6 +106,7 @@ class ModelManager:
106
  self.single_model_name, self.device
107
  )
108
  set_default_chat_template(self.single_tokenizer, self.single_model_name)
 
109
  # Move to CUDA if available (following HuggingFace ZeroGPU pattern)
110
  if torch.cuda.is_available():
111
  self.single_model = self.single_model.to('cuda')
@@ -186,6 +187,7 @@ class ModelManager:
186
  self.c2c_model, self.c2c_tokenizer = load_rosetta_model(
187
  model_config, eval_config, self.device
188
  )
 
189
  # Move to CUDA if available (following HuggingFace ZeroGPU pattern)
190
  if torch.cuda.is_available():
191
  self.c2c_model = self.c2c_model.to('cuda')
@@ -259,9 +261,15 @@ class ModelManager:
259
 
260
  # Stream tokens
261
  generated_text = ""
262
- for token in streamer:
263
- generated_text += token
264
- yield generated_text
 
 
 
 
 
 
265
 
266
  @spaces.GPU(duration=90)
267
  def generate_t2t(self, user_input: str) -> Generator[tuple[str, str], None, None]:
@@ -391,9 +399,15 @@ class ModelManager:
391
 
392
  # Stream tokens
393
  generated_text = ""
394
- for token in streamer:
395
- generated_text += token
396
- yield generated_text
 
 
 
 
 
 
397
 
398
 
399
  def create_demo(model_manager: ModelManager):
 
106
  self.single_model_name, self.device
107
  )
108
  set_default_chat_template(self.single_tokenizer, self.single_model_name)
109
+
110
  # Move to CUDA if available (following HuggingFace ZeroGPU pattern)
111
  if torch.cuda.is_available():
112
  self.single_model = self.single_model.to('cuda')
 
187
  self.c2c_model, self.c2c_tokenizer = load_rosetta_model(
188
  model_config, eval_config, self.device
189
  )
190
+
191
  # Move to CUDA if available (following HuggingFace ZeroGPU pattern)
192
  if torch.cuda.is_available():
193
  self.c2c_model = self.c2c_model.to('cuda')
 
261
 
262
  # Stream tokens
263
  generated_text = ""
264
+ try:
265
+ for token in streamer:
266
+ generated_text += token
267
+ yield generated_text
268
+ except Exception as e:
269
+ print(f"[Single] Streaming error: {e}")
270
+ yield f"Error generating response: {e}"
271
+ finally:
272
+ thread.join()
273
 
274
  @spaces.GPU(duration=90)
275
  def generate_t2t(self, user_input: str) -> Generator[tuple[str, str], None, None]:
 
399
 
400
  # Stream tokens
401
  generated_text = ""
402
+ try:
403
+ for token in streamer:
404
+ generated_text += token
405
+ yield generated_text
406
+ except Exception as e:
407
+ print(f"[C2C] Streaming error: {e}")
408
+ yield f"Error generating response: {e}"
409
+ finally:
410
+ thread.join()
411
 
412
 
413
  def create_demo(model_manager: ModelManager):