Spaces:

akisg
/

care-notes

Sleeping

App Files Files Community

Akis Giannoukos commited on 28 days ago

Commit

d044724

1 Parent(s): 628be0b

Disable global Torch compile/dynamo settings to prevent cudagraph assertion errors and remove the deprecated safe generation function for improved code clarity.

Browse files

Files changed (1) hide show

app.py +7 -37

app.py CHANGED Viewed

@@ -1,4 +1,9 @@
 import os
 import json
 import re
 import time
@@ -91,39 +96,6 @@ def get_textgen_pipeline():
         )
     return _gen_pipe
-def _safe_hf_generate(pipe, prompt: str, **gen_kwargs):
-    """Call HF generate pipeline with best-effort fallbacks to avoid TorchDynamo/Inductor issues."""
-    try:
-        return pipe(prompt, **gen_kwargs)
-    except Exception:
-        # Best-effort: disable dynamo via env and retry once
-        try:
-            os.environ["TORCHDYNAMO_DISABLE"] = "1"
-            os.environ["TORCH_COMPILE_DISABLE"] = "1"
-            os.environ["TORCHINDUCTOR_FREEZE"] = "1"
-        except Exception:
-            pass
-        try:
-            # Also disable cudagraphs if available
-            try:
-                import torch._inductor.config as _inductor_cfg  # type: ignore
-                _inductor_cfg.triton.cudagraphs = False
-            except Exception:
-                pass
-            return pipe(prompt, **gen_kwargs)
-        except Exception:
-            # Final fallback: CPU pipeline generation
-            try:
-                from transformers import pipeline as hf_pipeline
-                cpu_pipe = hf_pipeline(
-                    task="text-generation",
-                    model=pipe.model,
-                    tokenizer=pipe.tokenizer,
-                    device=-1,
-                )
-                return cpu_pipe(prompt, **gen_kwargs)
-            except Exception:
-                raise
 def set_current_model_id(new_model_id: str) -> str:
@@ -381,8 +353,7 @@ def generate_recording_agent_reply(chat_history: List[Tuple[str, str]]) -> str:
         import torch._dynamo as _dynamo  # type: ignore
     except Exception:
         _dynamo = None
-    gen = _safe_hf_generate(
-        pipe,
         prompt,
         max_new_tokens=96,
         temperature=0.7,
@@ -428,8 +399,7 @@ def scoring_agent_infer(chat_history: List[Tuple[str, str]], features: Dict[str,
         import torch._dynamo as _dynamo  # type: ignore
     except Exception:
         _dynamo = None
-    gen = _safe_hf_generate(
-        pipe,
         prompt,
         max_new_tokens=256,
         temperature=0.0,

 import os
+# Disable torch compile/dynamo globally to avoid cudagraph assertion errors
+os.environ["TORCHDYNAMO_DISABLE"] = "1"
+os.environ["TORCH_COMPILE_DISABLE"] = "1"
 import json
 import re
 import time
         )
     return _gen_pipe
 def set_current_model_id(new_model_id: str) -> str:
         import torch._dynamo as _dynamo  # type: ignore
     except Exception:
         _dynamo = None
+    gen = pipe(
         prompt,
         max_new_tokens=96,
         temperature=0.7,
         import torch._dynamo as _dynamo  # type: ignore
     except Exception:
         _dynamo = None
+    gen = pipe(
         prompt,
         max_new_tokens=256,
         temperature=0.0,