Spaces:
Running
on
Zero
Running
on
Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -11,7 +11,7 @@ from typing import Optional
|
|
| 11 |
|
| 12 |
# ---------- Fast, safe defaults ----------
|
| 13 |
os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1") # faster model downloads
|
| 14 |
-
os.environ.setdefault("DEEPSPEED_DISABLE_NVML", "1")
|
| 15 |
os.environ.setdefault("BITSANDBYTES_NOWELCOME", "1")
|
| 16 |
|
| 17 |
# ---------- Logging ----------
|
|
@@ -34,7 +34,21 @@ QUEUE_SIZE = int(os.getenv("QUEUE_SIZE", "32"))
|
|
| 34 |
ENABLE_SSR = os.getenv("ENABLE_SSR", "false").lower() == "true" # SSR off by default for stability
|
| 35 |
WARMUP = os.getenv("WARMUP", "false").lower() == "true" # default False for ZeroGPU
|
| 36 |
|
| 37 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
import warnings
|
| 39 |
warnings.filterwarnings("ignore", message="Can't initialize NVML")
|
| 40 |
|
|
@@ -49,18 +63,6 @@ from diffusers import (
|
|
| 49 |
AutoPipelineForText2Image,
|
| 50 |
)
|
| 51 |
|
| 52 |
-
# ---------- ZeroGPU decorator (works even off-Spaces) ----------
|
| 53 |
-
try:
|
| 54 |
-
import spaces # real decorator on HF Spaces
|
| 55 |
-
except ImportError:
|
| 56 |
-
# Local/dev fallback: no-op decorator so app still runs without ZeroGPU
|
| 57 |
-
class _DummySpaces:
|
| 58 |
-
def GPU(self, *args, **kwargs):
|
| 59 |
-
def _wrap(f):
|
| 60 |
-
return f
|
| 61 |
-
return _wrap
|
| 62 |
-
spaces = _DummySpaces()
|
| 63 |
-
|
| 64 |
# ---------- Version guard: Torch 2.1 + NumPy 2.x is incompatible ----------
|
| 65 |
try:
|
| 66 |
_np_major = int(np.__version__.split(".")[0])
|
|
@@ -97,7 +99,7 @@ pipe: Optional[DiffusionPipeline] = None
|
|
| 97 |
|
| 98 |
def _gpu_mem_efficiency(p: DiffusionPipeline) -> None:
|
| 99 |
"""Enable memory-efficient attention and VAE tiling where possible."""
|
| 100 |
-
enabled =
|
| 101 |
try:
|
| 102 |
p.enable_xformers_memory_efficient_attention()
|
| 103 |
enabled = True
|
|
@@ -203,7 +205,6 @@ def generate(
|
|
| 203 |
secret_token: str = "",
|
| 204 |
) -> Image.Image:
|
| 205 |
if secret_token != SECRET_TOKEN:
|
| 206 |
-
# Using gr.Error keeps the nice Gradio toast in UI
|
| 207 |
raise gr.Error("Invalid secret token. Set SECRET_TOKEN or pass the correct token.")
|
| 208 |
|
| 209 |
_p = ensure_pipe()
|
|
|
|
| 11 |
|
| 12 |
# ---------- Fast, safe defaults ----------
|
| 13 |
os.environ.setdefault("HF_HUB_ENABLE_HF_TRANSFER", "1") # faster model downloads
|
| 14 |
+
os.environ.setdefault("DEEPSPEED_DISABLE_NVML", "1") # silence NVML in headless envs
|
| 15 |
os.environ.setdefault("BITSANDBYTES_NOWELCOME", "1")
|
| 16 |
|
| 17 |
# ---------- Logging ----------
|
|
|
|
| 34 |
ENABLE_SSR = os.getenv("ENABLE_SSR", "false").lower() == "true" # SSR off by default for stability
|
| 35 |
WARMUP = os.getenv("WARMUP", "false").lower() == "true" # default False for ZeroGPU
|
| 36 |
|
| 37 |
+
# ============================================================
|
| 38 |
+
# Import `spaces` BEFORE any CUDA-related libs (torch/diffusers)
|
| 39 |
+
# ============================================================
|
| 40 |
+
try:
|
| 41 |
+
import spaces # real decorator on HF Spaces
|
| 42 |
+
except ImportError:
|
| 43 |
+
# Local/dev fallback: no-op decorator so app still runs without ZeroGPU
|
| 44 |
+
class _DummySpaces:
|
| 45 |
+
def GPU(self, *args, **kwargs):
|
| 46 |
+
def _wrap(f):
|
| 47 |
+
return f
|
| 48 |
+
return _wrap
|
| 49 |
+
spaces = _DummySpaces()
|
| 50 |
+
|
| 51 |
+
# ---------- Third-party imports (safe to import after `spaces`) ----------
|
| 52 |
import warnings
|
| 53 |
warnings.filterwarnings("ignore", message="Can't initialize NVML")
|
| 54 |
|
|
|
|
| 63 |
AutoPipelineForText2Image,
|
| 64 |
)
|
| 65 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 66 |
# ---------- Version guard: Torch 2.1 + NumPy 2.x is incompatible ----------
|
| 67 |
try:
|
| 68 |
_np_major = int(np.__version__.split(".")[0])
|
|
|
|
| 99 |
|
| 100 |
def _gpu_mem_efficiency(p: DiffusionPipeline) -> None:
|
| 101 |
"""Enable memory-efficient attention and VAE tiling where possible."""
|
| 102 |
+
enabled = False
|
| 103 |
try:
|
| 104 |
p.enable_xformers_memory_efficient_attention()
|
| 105 |
enabled = True
|
|
|
|
| 205 |
secret_token: str = "",
|
| 206 |
) -> Image.Image:
|
| 207 |
if secret_token != SECRET_TOKEN:
|
|
|
|
| 208 |
raise gr.Error("Invalid secret token. Set SECRET_TOKEN or pass the correct token.")
|
| 209 |
|
| 210 |
_p = ensure_pipe()
|