Spaces:
Running on Zero
Running on Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -176,87 +176,67 @@ print("Pipeline initialized successfully!")
|
|
| 176 |
print("=" * 80)
|
| 177 |
|
| 178 |
# =============================================================================
|
| 179 |
-
# ZeroGPU Tensor Preloading
|
| 180 |
# =============================================================================
|
| 181 |
-
#
|
| 182 |
-
# We
|
| 183 |
-
#
|
| 184 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
|
| 186 |
-
print("Preloading non-CUDA components for ZeroGPU tensor packing...")
|
| 187 |
-
print("This may take a few minutes...")
|
| 188 |
-
|
| 189 |
-
# 1. Try loading video encoder (may work without GPU if just file loading)
|
| 190 |
-
print(" Loading video encoder...")
|
| 191 |
try:
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
|
|
|
|
| 195 |
except Exception as e:
|
| 196 |
-
print(f"
|
| 197 |
-
|
| 198 |
-
# 2. Try loading video decoder (VAE - may work without GPU)
|
| 199 |
-
print(" Loading video decoder...")
|
| 200 |
-
try:
|
| 201 |
-
_video_decoder = pipeline.video_decoder._decoder_builder()
|
| 202 |
-
pipeline.video_decoder._decoder_builder = lambda: _video_decoder
|
| 203 |
-
if hasattr(pipeline.video_decoder, '_decoder'):
|
| 204 |
-
pipeline.video_decoder._decoder = _video_decoder
|
| 205 |
-
print(f" Loaded video decoder: {type(_video_decoder)}")
|
| 206 |
-
except Exception as e:
|
| 207 |
-
print(f" Video decoder preload skipped: {e}")
|
| 208 |
-
|
| 209 |
-
# 3. Try loading audio decoder (VAE - may work without GPU)
|
| 210 |
-
print(" Loading audio decoder...")
|
| 211 |
-
try:
|
| 212 |
-
_audio_decoder = pipeline.audio_decoder._decoder_builder()
|
| 213 |
-
pipeline.audio_decoder._decoder_builder = lambda: _audio_decoder
|
| 214 |
-
if hasattr(pipeline.audio_decoder, '_decoder'):
|
| 215 |
-
pipeline.audio_decoder._decoder = _audio_decoder
|
| 216 |
-
print(f" Loaded audio decoder: {type(_audio_decoder)}")
|
| 217 |
-
except Exception as e:
|
| 218 |
-
print(f" Audio decoder preload skipped: {e}")
|
| 219 |
-
|
| 220 |
-
# 4. Try loading vocoder
|
| 221 |
-
print(" Loading vocoder...")
|
| 222 |
-
try:
|
| 223 |
-
if hasattr(pipeline.audio_decoder, '_vocoder_builder'):
|
| 224 |
-
_vocoder = pipeline.audio_decoder._vocoder_builder()
|
| 225 |
-
pipeline.audio_decoder._vocoder_builder = lambda: _vocoder
|
| 226 |
-
print(f" Loaded vocoder: {type(_vocoder)}")
|
| 227 |
-
except Exception as e:
|
| 228 |
-
print(f" Vocoder preload skipped: {e}")
|
| 229 |
-
|
| 230 |
-
# 5. Try loading spatial upsampler
|
| 231 |
-
print(" Loading spatial upsampler...")
|
| 232 |
-
try:
|
| 233 |
-
_spatial_upsampler = pipeline.upsampler._upsampler_builder()
|
| 234 |
-
pipeline.upsampler._upsampler_builder = lambda: _spatial_upsampler
|
| 235 |
-
if hasattr(pipeline.upsampler, '_encoder'):
|
| 236 |
-
pipeline.upsampler._encoder = _spatial_upsampler
|
| 237 |
-
print(f" Loaded spatial upsampler: {type(_spatial_upsampler)}")
|
| 238 |
-
except Exception as e:
|
| 239 |
-
print(f" Spatial upsampler preload skipped: {e}")
|
| 240 |
-
|
| 241 |
-
# 6. Load image conditioner
|
| 242 |
-
print(" Loading image conditioner...")
|
| 243 |
-
try:
|
| 244 |
-
if hasattr(pipeline, 'image_conditioner'):
|
| 245 |
-
if hasattr(pipeline.image_conditioner, 'video_encoder'):
|
| 246 |
-
_ic_encoder = pipeline.image_conditioner.video_encoder()
|
| 247 |
-
pipeline.image_conditioner.video_encoder = lambda: _ic_encoder
|
| 248 |
-
print(f" Loaded image conditioner encoder")
|
| 249 |
-
except Exception as e:
|
| 250 |
-
print(f" Image conditioner preload skipped: {e}")
|
| 251 |
-
|
| 252 |
-
# 7. NOTE: Transformer loading is intentionally skipped here
|
| 253 |
-
# The transformer requires CUDA (LoRA fusion uses triton kernels)
|
| 254 |
-
# It will load during generate_video() when ZeroGPU provides a GPU
|
| 255 |
-
# ZeroGPU should capture it then
|
| 256 |
-
print(" Transformer: Will load during generation (requires GPU)")
|
| 257 |
-
print(" Text encoder: Will load during generation (requires GPU)")
|
| 258 |
|
| 259 |
-
print("
|
|
|
|
| 260 |
print("=" * 80)
|
| 261 |
|
| 262 |
# =============================================================================
|
|
|
|
| 176 |
print("=" * 80)
|
| 177 |
|
| 178 |
# =============================================================================
|
| 179 |
+
# ZeroGPU Tensor Preloading - CPU Tensor Approach
|
| 180 |
# =============================================================================
|
| 181 |
+
# ZeroGPU should pack any tensors in memory, not just GPU tensors.
|
| 182 |
+
# We load model weights to CPU as proxy tensors to trigger packing.
|
| 183 |
+
# During actual generation, ZeroGPU will move them to GPU.
|
| 184 |
+
|
| 185 |
+
print("Creating CPU proxy tensors for ZeroGPU tensor packing...")
|
| 186 |
+
print("This may take a few minutes (loading to CPU only)...")
|
| 187 |
+
|
| 188 |
+
import gc
|
| 189 |
+
|
| 190 |
+
# Create small proxy tensors for each model component
|
| 191 |
+
# These don't need to be the actual weights - just tensors to trigger packing
|
| 192 |
+
# ZeroGPU will pack whatever tensors exist when it runs
|
| 193 |
+
|
| 194 |
+
_proxy_tensors = []
|
| 195 |
+
|
| 196 |
+
def create_proxy(name, shape, dtype=torch.float32):
|
| 197 |
+
"""Create a proxy tensor and ensure ZeroGPU sees it."""
|
| 198 |
+
print(f" Creating proxy for {name}: {shape}")
|
| 199 |
+
t = torch.zeros(shape, dtype=dtype)
|
| 200 |
+
_proxy_tensors.append(t)
|
| 201 |
+
return t
|
| 202 |
+
|
| 203 |
+
# Create proxies for various model components
|
| 204 |
+
# These are just to ensure tensors exist in memory for ZeroGPU to pack
|
| 205 |
+
create_proxy("transformer_stage1", (1, 1024, 512))
|
| 206 |
+
create_proxy("transformer_stage2", (1, 1024, 512))
|
| 207 |
+
create_proxy("video_encoder", (1, 768, 512))
|
| 208 |
+
create_proxy("video_decoder", (1, 512, 512))
|
| 209 |
+
create_proxy("audio_decoder", (1, 256, 512))
|
| 210 |
+
create_proxy("spatial_upsampler", (1, 256, 512))
|
| 211 |
+
create_proxy("text_encoder", (1, 2048, 256))
|
| 212 |
+
create_proxy("vocoder", (1, 128, 256))
|
| 213 |
+
|
| 214 |
+
# Keep proxies alive by storing in module globals
|
| 215 |
+
proxy_stage1 = _proxy_tensors[0]
|
| 216 |
+
proxy_stage2 = _proxy_tensors[1]
|
| 217 |
+
proxy_venc = _proxy_tensors[2]
|
| 218 |
+
proxy_vdec = _proxy_tensors[3]
|
| 219 |
+
proxy_adec = _proxy_tensors[4]
|
| 220 |
+
proxy_upsamp = _proxy_tensors[5]
|
| 221 |
+
proxy_tenc = _proxy_tensors[6]
|
| 222 |
+
proxy_voc = _proxy_tensors[7]
|
| 223 |
+
|
| 224 |
+
# Clean up the temporary list
|
| 225 |
+
del _proxy_tensors
|
| 226 |
+
|
| 227 |
+
# Now trigger the actual model loading but catch GPU errors
|
| 228 |
+
print("\nAttempting model initialization (GPU errors expected)...")
|
| 229 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 230 |
try:
|
| 231 |
+
# Try to access components - this will trigger loading but fail on GPU
|
| 232 |
+
_ = pipeline.stage_1._transformer_ctx
|
| 233 |
+
_ = pipeline.prompt_encoder._text_encoder_ctx
|
| 234 |
+
print(" Model contexts accessed")
|
| 235 |
except Exception as e:
|
| 236 |
+
print(f" Context access: {type(e).__name__}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 237 |
|
| 238 |
+
print("\n" + "=" * 80)
|
| 239 |
+
print("Startup complete. Models will load to GPU during first generation.")
|
| 240 |
print("=" * 80)
|
| 241 |
|
| 242 |
# =============================================================================
|