Spaces:

ruslanmv
/

ai-fast-image-server

Running on Zero

App Files Files Community

ruslanmv commited on Sep 28

Commit

c2fbaa7

1 Parent(s): 20ea5a4

Update app.py

Browse files

Files changed (1) hide show

app.py +17 -16

app.py CHANGED Viewed

@@ -11,7 +11,7 @@ from typing import Optional
 # ---------- Fast, safe defaults ----------
 os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")   # faster model downloads
-os.environ.setdefault("DEEPSPEED_DISABLE_NVML", "1")       # silence NVML in headless envs
 os.environ.setdefault("BITSANDBYTES_NOWELCOME", "1")
 # ---------- Logging ----------
@@ -34,7 +34,21 @@ QUEUE_SIZE = int(os.getenv("QUEUE_SIZE", "32"))
 ENABLE_SSR = os.getenv("ENABLE_SSR", "false").lower() == "true"  # SSR off by default for stability
 WARMUP = os.getenv("WARMUP", "false").lower() == "true"          # default False for ZeroGPU
-# ---------- Third-party imports ----------
 import warnings
 warnings.filterwarnings("ignore", message="Can't initialize NVML")
@@ -49,18 +63,6 @@ from diffusers import (
     AutoPipelineForText2Image,
 )
-# ---------- ZeroGPU decorator (works even off-Spaces) ----------
-try:
-    import spaces  # real decorator on HF Spaces
-except ImportError:
-    # Local/dev fallback: no-op decorator so app still runs without ZeroGPU
-    class _DummySpaces:
-        def GPU(self, *args, **kwargs):
-            def _wrap(f):
-                return f
-            return _wrap
-    spaces = _DummySpaces()
 # ---------- Version guard: Torch 2.1 + NumPy 2.x is incompatible ----------
 try:
     _np_major = int(np.__version__.split(".")[0])
@@ -97,7 +99,7 @@ pipe: Optional[DiffusionPipeline] = None
 def _gpu_mem_efficiency(p: DiffusionPipeline) -> None:
     """Enable memory-efficient attention and VAE tiling where possible."""
-    enabled = false_flag = False
     try:
         p.enable_xformers_memory_efficient_attention()
         enabled = True
@@ -203,7 +205,6 @@ def generate(
     secret_token: str = "",
 ) -> Image.Image:
     if secret_token != SECRET_TOKEN:
-        # Using gr.Error keeps the nice Gradio toast in UI
         raise gr.Error("Invalid secret token. Set SECRET_TOKEN or pass the correct token.")
     _p = ensure_pipe()

 # ---------- Fast, safe defaults ----------
 os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1")   # faster model downloads
+os.environ.setdefault("DEEPSPEED_DISABLE_NVML", "1")      # silence NVML in headless envs
 os.environ.setdefault("BITSANDBYTES_NOWELCOME", "1")
 # ---------- Logging ----------
 ENABLE_SSR = os.getenv("ENABLE_SSR", "false").lower() == "true"  # SSR off by default for stability
 WARMUP = os.getenv("WARMUP", "false").lower() == "true"          # default False for ZeroGPU
+# ============================================================
+# Import `spaces` BEFORE any CUDA-related libs (torch/diffusers)
+# ============================================================
+try:
+    import spaces  # real decorator on HF Spaces
+except ImportError:
+    # Local/dev fallback: no-op decorator so app still runs without ZeroGPU
+    class _DummySpaces:
+        def GPU(self, *args, **kwargs):
+            def _wrap(f):
+                return f
+            return _wrap
+    spaces = _DummySpaces()
+# ---------- Third-party imports (safe to import after `spaces`) ----------
 import warnings
 warnings.filterwarnings("ignore", message="Can't initialize NVML")
     AutoPipelineForText2Image,
 )
 # ---------- Version guard: Torch 2.1 + NumPy 2.x is incompatible ----------
 try:
     _np_major = int(np.__version__.split(".")[0])
 def _gpu_mem_efficiency(p: DiffusionPipeline) -> None:
     """Enable memory-efficient attention and VAE tiling where possible."""
+    enabled = False
     try:
         p.enable_xformers_memory_efficient_attention()
         enabled = True
     secret_token: str = "",
 ) -> Image.Image:
     if secret_token != SECRET_TOKEN:
         raise gr.Error("Invalid secret token. Set SECRET_TOKEN or pass the correct token.")
     _p = ensure_pipe()