dagloop5 commited on
Commit
1b2edab
·
verified ·
1 Parent(s): 7e969f2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +56 -76
app.py CHANGED
@@ -176,87 +176,67 @@ print("Pipeline initialized successfully!")
176
  print("=" * 80)
177
 
178
  # =============================================================================
179
- # ZeroGPU Tensor Preloading
180
  # =============================================================================
181
- # NOTE: At Space startup, no GPU is available (ZeroGPU assigns it at runtime).
182
- # We can only preload components that don't require CUDA.
183
- # The transformer (and other GPU-heavy components) will load during generation
184
- # when ZeroGPU provides the GPU. ZeroGPU should capture them then.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
 
186
- print("Preloading non-CUDA components for ZeroGPU tensor packing...")
187
- print("This may take a few minutes...")
188
-
189
- # 1. Try loading video encoder (may work without GPU if just file loading)
190
- print(" Loading video encoder...")
191
  try:
192
- _video_encoder = pipeline.prompt_encoder.video_encoder()
193
- pipeline.prompt_encoder.video_encoder = lambda: _video_encoder
194
- print(f" Loaded video encoder: {type(_video_encoder)}")
 
195
  except Exception as e:
196
- print(f" Video encoder preload skipped: {e}")
197
-
198
- # 2. Try loading video decoder (VAE - may work without GPU)
199
- print(" Loading video decoder...")
200
- try:
201
- _video_decoder = pipeline.video_decoder._decoder_builder()
202
- pipeline.video_decoder._decoder_builder = lambda: _video_decoder
203
- if hasattr(pipeline.video_decoder, '_decoder'):
204
- pipeline.video_decoder._decoder = _video_decoder
205
- print(f" Loaded video decoder: {type(_video_decoder)}")
206
- except Exception as e:
207
- print(f" Video decoder preload skipped: {e}")
208
-
209
- # 3. Try loading audio decoder (VAE - may work without GPU)
210
- print(" Loading audio decoder...")
211
- try:
212
- _audio_decoder = pipeline.audio_decoder._decoder_builder()
213
- pipeline.audio_decoder._decoder_builder = lambda: _audio_decoder
214
- if hasattr(pipeline.audio_decoder, '_decoder'):
215
- pipeline.audio_decoder._decoder = _audio_decoder
216
- print(f" Loaded audio decoder: {type(_audio_decoder)}")
217
- except Exception as e:
218
- print(f" Audio decoder preload skipped: {e}")
219
-
220
- # 4. Try loading vocoder
221
- print(" Loading vocoder...")
222
- try:
223
- if hasattr(pipeline.audio_decoder, '_vocoder_builder'):
224
- _vocoder = pipeline.audio_decoder._vocoder_builder()
225
- pipeline.audio_decoder._vocoder_builder = lambda: _vocoder
226
- print(f" Loaded vocoder: {type(_vocoder)}")
227
- except Exception as e:
228
- print(f" Vocoder preload skipped: {e}")
229
-
230
- # 5. Try loading spatial upsampler
231
- print(" Loading spatial upsampler...")
232
- try:
233
- _spatial_upsampler = pipeline.upsampler._upsampler_builder()
234
- pipeline.upsampler._upsampler_builder = lambda: _spatial_upsampler
235
- if hasattr(pipeline.upsampler, '_encoder'):
236
- pipeline.upsampler._encoder = _spatial_upsampler
237
- print(f" Loaded spatial upsampler: {type(_spatial_upsampler)}")
238
- except Exception as e:
239
- print(f" Spatial upsampler preload skipped: {e}")
240
-
241
- # 6. Load image conditioner
242
- print(" Loading image conditioner...")
243
- try:
244
- if hasattr(pipeline, 'image_conditioner'):
245
- if hasattr(pipeline.image_conditioner, 'video_encoder'):
246
- _ic_encoder = pipeline.image_conditioner.video_encoder()
247
- pipeline.image_conditioner.video_encoder = lambda: _ic_encoder
248
- print(f" Loaded image conditioner encoder")
249
- except Exception as e:
250
- print(f" Image conditioner preload skipped: {e}")
251
-
252
- # 7. NOTE: Transformer loading is intentionally skipped here
253
- # The transformer requires CUDA (LoRA fusion uses triton kernels)
254
- # It will load during generate_video() when ZeroGPU provides a GPU
255
- # ZeroGPU should capture it then
256
- print(" Transformer: Will load during generation (requires GPU)")
257
- print(" Text encoder: Will load during generation (requires GPU)")
258
 
259
- print("Non-CUDA components preloaded!")
 
260
  print("=" * 80)
261
 
262
  # =============================================================================
 
176
  print("=" * 80)
177
 
178
  # =============================================================================
179
+ # ZeroGPU Tensor Preloading - CPU Tensor Approach
180
  # =============================================================================
181
+ # ZeroGPU should pack any tensors in memory, not just GPU tensors.
182
+ # We load model weights to CPU as proxy tensors to trigger packing.
183
+ # During actual generation, ZeroGPU will move them to GPU.
184
+
185
+ print("Creating CPU proxy tensors for ZeroGPU tensor packing...")
186
+ print("This may take a few minutes (loading to CPU only)...")
187
+
188
+ import gc
189
+
190
+ # Create small proxy tensors for each model component
191
+ # These don't need to be the actual weights - just tensors to trigger packing
192
+ # ZeroGPU will pack whatever tensors exist when it runs
193
+
194
+ _proxy_tensors = []
195
+
196
+ def create_proxy(name, shape, dtype=torch.float32):
197
+ """Create a proxy tensor and ensure ZeroGPU sees it."""
198
+ print(f" Creating proxy for {name}: {shape}")
199
+ t = torch.zeros(shape, dtype=dtype)
200
+ _proxy_tensors.append(t)
201
+ return t
202
+
203
+ # Create proxies for various model components
204
+ # These are just to ensure tensors exist in memory for ZeroGPU to pack
205
+ create_proxy("transformer_stage1", (1, 1024, 512))
206
+ create_proxy("transformer_stage2", (1, 1024, 512))
207
+ create_proxy("video_encoder", (1, 768, 512))
208
+ create_proxy("video_decoder", (1, 512, 512))
209
+ create_proxy("audio_decoder", (1, 256, 512))
210
+ create_proxy("spatial_upsampler", (1, 256, 512))
211
+ create_proxy("text_encoder", (1, 2048, 256))
212
+ create_proxy("vocoder", (1, 128, 256))
213
+
214
+ # Keep proxies alive by storing in module globals
215
+ proxy_stage1 = _proxy_tensors[0]
216
+ proxy_stage2 = _proxy_tensors[1]
217
+ proxy_venc = _proxy_tensors[2]
218
+ proxy_vdec = _proxy_tensors[3]
219
+ proxy_adec = _proxy_tensors[4]
220
+ proxy_upsamp = _proxy_tensors[5]
221
+ proxy_tenc = _proxy_tensors[6]
222
+ proxy_voc = _proxy_tensors[7]
223
+
224
+ # Clean up the temporary list
225
+ del _proxy_tensors
226
+
227
+ # Now trigger the actual model loading but catch GPU errors
228
+ print("\nAttempting model initialization (GPU errors expected)...")
229
 
 
 
 
 
 
230
  try:
231
+ # Try to access components - this will trigger loading but fail on GPU
232
+ _ = pipeline.stage_1._transformer_ctx
233
+ _ = pipeline.prompt_encoder._text_encoder_ctx
234
+ print(" Model contexts accessed")
235
  except Exception as e:
236
+ print(f" Context access: {type(e).__name__}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
 
238
+ print("\n" + "=" * 80)
239
+ print("Startup complete. Models will load to GPU during first generation.")
240
  print("=" * 80)
241
 
242
  # =============================================================================