Spaces:
Sleeping
Sleeping
[debug] zeroGPU
Browse files
app.py
CHANGED
|
@@ -106,6 +106,7 @@ class ModelManager:
|
|
| 106 |
self.single_model_name, self.device
|
| 107 |
)
|
| 108 |
set_default_chat_template(self.single_tokenizer, self.single_model_name)
|
|
|
|
| 109 |
# Move to CUDA if available (following HuggingFace ZeroGPU pattern)
|
| 110 |
if torch.cuda.is_available():
|
| 111 |
self.single_model = self.single_model.to('cuda')
|
|
@@ -186,6 +187,7 @@ class ModelManager:
|
|
| 186 |
self.c2c_model, self.c2c_tokenizer = load_rosetta_model(
|
| 187 |
model_config, eval_config, self.device
|
| 188 |
)
|
|
|
|
| 189 |
# Move to CUDA if available (following HuggingFace ZeroGPU pattern)
|
| 190 |
if torch.cuda.is_available():
|
| 191 |
self.c2c_model = self.c2c_model.to('cuda')
|
|
@@ -259,9 +261,15 @@ class ModelManager:
|
|
| 259 |
|
| 260 |
# Stream tokens
|
| 261 |
generated_text = ""
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 265 |
|
| 266 |
@spaces.GPU(duration=90)
|
| 267 |
def generate_t2t(self, user_input: str) -> Generator[tuple[str, str], None, None]:
|
|
@@ -391,9 +399,15 @@ class ModelManager:
|
|
| 391 |
|
| 392 |
# Stream tokens
|
| 393 |
generated_text = ""
|
| 394 |
-
|
| 395 |
-
|
| 396 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 397 |
|
| 398 |
|
| 399 |
def create_demo(model_manager: ModelManager):
|
|
|
|
| 106 |
self.single_model_name, self.device
|
| 107 |
)
|
| 108 |
set_default_chat_template(self.single_tokenizer, self.single_model_name)
|
| 109 |
+
|
| 110 |
# Move to CUDA if available (following HuggingFace ZeroGPU pattern)
|
| 111 |
if torch.cuda.is_available():
|
| 112 |
self.single_model = self.single_model.to('cuda')
|
|
|
|
| 187 |
self.c2c_model, self.c2c_tokenizer = load_rosetta_model(
|
| 188 |
model_config, eval_config, self.device
|
| 189 |
)
|
| 190 |
+
|
| 191 |
# Move to CUDA if available (following HuggingFace ZeroGPU pattern)
|
| 192 |
if torch.cuda.is_available():
|
| 193 |
self.c2c_model = self.c2c_model.to('cuda')
|
|
|
|
| 261 |
|
| 262 |
# Stream tokens
|
| 263 |
generated_text = ""
|
| 264 |
+
try:
|
| 265 |
+
for token in streamer:
|
| 266 |
+
generated_text += token
|
| 267 |
+
yield generated_text
|
| 268 |
+
except Exception as e:
|
| 269 |
+
print(f"[Single] Streaming error: {e}")
|
| 270 |
+
yield f"Error generating response: {e}"
|
| 271 |
+
finally:
|
| 272 |
+
thread.join()
|
| 273 |
|
| 274 |
@spaces.GPU(duration=90)
|
| 275 |
def generate_t2t(self, user_input: str) -> Generator[tuple[str, str], None, None]:
|
|
|
|
| 399 |
|
| 400 |
# Stream tokens
|
| 401 |
generated_text = ""
|
| 402 |
+
try:
|
| 403 |
+
for token in streamer:
|
| 404 |
+
generated_text += token
|
| 405 |
+
yield generated_text
|
| 406 |
+
except Exception as e:
|
| 407 |
+
print(f"[C2C] Streaming error: {e}")
|
| 408 |
+
yield f"Error generating response: {e}"
|
| 409 |
+
finally:
|
| 410 |
+
thread.join()
|
| 411 |
|
| 412 |
|
| 413 |
def create_demo(model_manager: ModelManager):
|