TestingwithNeg

Running on Zero

App Files Files Community

dagloop5 commited on 7 days ago

Commit

1b2edab

verified ·

1 Parent(s): 7e969f2

Update app.py

Browse files

Files changed (1) hide show

app.py +56 -76

app.py CHANGED Viewed

@@ -176,87 +176,67 @@ print("Pipeline initialized successfully!")
 print("=" * 80)
 # =============================================================================
-# ZeroGPU Tensor Preloading
 # =============================================================================
-# NOTE: At Space startup, no GPU is available (ZeroGPU assigns it at runtime).
-# We can only preload components that don't require CUDA.
-# The transformer (and other GPU-heavy components) will load during generation
-# when ZeroGPU provides the GPU. ZeroGPU should capture them then.
-print("Preloading non-CUDA components for ZeroGPU tensor packing...")
-print("This may take a few minutes...")
-# 1. Try loading video encoder (may work without GPU if just file loading)
-print("  Loading video encoder...")
 try:
-    _video_encoder = pipeline.prompt_encoder.video_encoder()
-    pipeline.prompt_encoder.video_encoder = lambda: _video_encoder
-    print(f"    Loaded video encoder: {type(_video_encoder)}")
 except Exception as e:
-    print(f"    Video encoder preload skipped: {e}")
-# 2. Try loading video decoder (VAE - may work without GPU)
-print("  Loading video decoder...")
-try:
-    _video_decoder = pipeline.video_decoder._decoder_builder()
-    pipeline.video_decoder._decoder_builder = lambda: _video_decoder
-    if hasattr(pipeline.video_decoder, '_decoder'):
-        pipeline.video_decoder._decoder = _video_decoder
-    print(f"    Loaded video decoder: {type(_video_decoder)}")
-except Exception as e:
-    print(f"    Video decoder preload skipped: {e}")
-# 3. Try loading audio decoder (VAE - may work without GPU)
-print("  Loading audio decoder...")
-try:
-    _audio_decoder = pipeline.audio_decoder._decoder_builder()
-    pipeline.audio_decoder._decoder_builder = lambda: _audio_decoder
-    if hasattr(pipeline.audio_decoder, '_decoder'):
-        pipeline.audio_decoder._decoder = _audio_decoder
-    print(f"    Loaded audio decoder: {type(_audio_decoder)}")
-except Exception as e:
-    print(f"    Audio decoder preload skipped: {e}")
-# 4. Try loading vocoder
-print("  Loading vocoder...")
-try:
-    if hasattr(pipeline.audio_decoder, '_vocoder_builder'):
-        _vocoder = pipeline.audio_decoder._vocoder_builder()
-        pipeline.audio_decoder._vocoder_builder = lambda: _vocoder
-        print(f"    Loaded vocoder: {type(_vocoder)}")
-except Exception as e:
-    print(f"    Vocoder preload skipped: {e}")
-# 5. Try loading spatial upsampler
-print("  Loading spatial upsampler...")
-try:
-    _spatial_upsampler = pipeline.upsampler._upsampler_builder()
-    pipeline.upsampler._upsampler_builder = lambda: _spatial_upsampler
-    if hasattr(pipeline.upsampler, '_encoder'):
-        pipeline.upsampler._encoder = _spatial_upsampler
-    print(f"    Loaded spatial upsampler: {type(_spatial_upsampler)}")
-except Exception as e:
-    print(f"    Spatial upsampler preload skipped: {e}")
-# 6. Load image conditioner
-print("  Loading image conditioner...")
-try:
-    if hasattr(pipeline, 'image_conditioner'):
-        if hasattr(pipeline.image_conditioner, 'video_encoder'):
-            _ic_encoder = pipeline.image_conditioner.video_encoder()
-            pipeline.image_conditioner.video_encoder = lambda: _ic_encoder
-            print(f"    Loaded image conditioner encoder")
-except Exception as e:
-    print(f"    Image conditioner preload skipped: {e}")
-# 7. NOTE: Transformer loading is intentionally skipped here
-# The transformer requires CUDA (LoRA fusion uses triton kernels)
-# It will load during generate_video() when ZeroGPU provides a GPU
-# ZeroGPU should capture it then
-print("  Transformer: Will load during generation (requires GPU)")
-print("  Text encoder: Will load during generation (requires GPU)")
-print("Non-CUDA components preloaded!")
 print("=" * 80)
 # =============================================================================

 print("=" * 80)
 # =============================================================================
+# ZeroGPU Tensor Preloading - CPU Tensor Approach
 # =============================================================================
+# ZeroGPU should pack any tensors in memory, not just GPU tensors.
+# We load model weights to CPU as proxy tensors to trigger packing.
+# During actual generation, ZeroGPU will move them to GPU.
+print("Creating CPU proxy tensors for ZeroGPU tensor packing...")
+print("This may take a few minutes (loading to CPU only)...")
+import gc
+# Create small proxy tensors for each model component
+# These don't need to be the actual weights - just tensors to trigger packing
+# ZeroGPU will pack whatever tensors exist when it runs
+_proxy_tensors = []
+def create_proxy(name, shape, dtype=torch.float32):
+    """Create a proxy tensor and ensure ZeroGPU sees it."""
+    print(f"  Creating proxy for {name}: {shape}")
+    t = torch.zeros(shape, dtype=dtype)
+    _proxy_tensors.append(t)
+    return t
+# Create proxies for various model components
+# These are just to ensure tensors exist in memory for ZeroGPU to pack
+create_proxy("transformer_stage1", (1, 1024, 512))
+create_proxy("transformer_stage2", (1, 1024, 512))
+create_proxy("video_encoder", (1, 768, 512))
+create_proxy("video_decoder", (1, 512, 512))
+create_proxy("audio_decoder", (1, 256, 512))
+create_proxy("spatial_upsampler", (1, 256, 512))
+create_proxy("text_encoder", (1, 2048, 256))
+create_proxy("vocoder", (1, 128, 256))
+# Keep proxies alive by storing in module globals
+proxy_stage1 = _proxy_tensors[0]
+proxy_stage2 = _proxy_tensors[1]
+proxy_venc = _proxy_tensors[2]
+proxy_vdec = _proxy_tensors[3]
+proxy_adec = _proxy_tensors[4]
+proxy_upsamp = _proxy_tensors[5]
+proxy_tenc = _proxy_tensors[6]
+proxy_voc = _proxy_tensors[7]
+# Clean up the temporary list
+del _proxy_tensors
+# Now trigger the actual model loading but catch GPU errors
+print("\nAttempting model initialization (GPU errors expected)...")
 try:
+    # Try to access components - this will trigger loading but fail on GPU
+    _ = pipeline.stage_1._transformer_ctx
+    _ = pipeline.prompt_encoder._text_encoder_ctx
+    print("  Model contexts accessed")
 except Exception as e:
+    print(f"  Context access: {type(e).__name__}")
+print("\n" + "=" * 80)
+print("Startup complete. Models will load to GPU during first generation.")
 print("=" * 80)
 # =============================================================================