AnimateDiff-Image-Init

Paused

fffiloni commited on Jul 25, 2023

Commit

8e4cf69

1 Parent(s): f7c49f8

Update animatediff/pipelines/pipeline_animation.py

Files changed (1) hide show

animatediff/pipelines/pipeline_animation.py CHANGED Viewed

@@ -317,25 +317,28 @@ class AnimationPipeline(DiffusionPipeline):
             rand_device = "cpu" if device.type == "mps" else device
             if isinstance(generator, list):
-                # Initialize latents as a random tensor
-                latents = torch.randn(shape, device=rand_device, dtype=dtype)
-                # If init_latents is not None, copy the values for each video frame
-                if init_latents is not None:
-                    for i in range(video_length):
-                        init_alpha = (video_length - float(i)) / video_length / 30
-                        latents[:, :, i, :, :] = init_latents * init_alpha + latents[:, :, i, :, :] * (1 - init_alpha)
-                latents = latents.to(device)
             else:
                 latents = torch.randn(shape, generator=generator, device=rand_device, dtype=dtype).to(device)
-                # If init_latents is not None, repeat it for the entire batch
                 if init_latents is not None:
-                    init_latents = init_latents.unsqueeze(0).repeat(batch_size, 1, 1, 1, 1)
                     for i in range(video_length):
-                        init_alpha = (video_length - float(i)) / video_length / 30
-                        latents[:, :, i, :, :] = init_latents[:, :, i, :, :] * init_alpha + latents[:, :, i, :, :] * (1 - init_alpha)
         else:
             if latents.shape != shape:

             rand_device = "cpu" if device.type == "mps" else device
             if isinstance(generator, list):
+                shape = shape
+                # shape = (1,) + shape[1:]
+                # ignore init latents for batch model
+                latents = [
+                    torch.randn(shape, generator=generator[i], device=rand_device, dtype=dtype)
+                    for i in range(batch_size)
+                ]
+                latents = torch.cat(latents, dim=0).to(device)
             else:
                 latents = torch.randn(shape, generator=generator, device=rand_device, dtype=dtype).to(device)
                 if init_latents is not None:
                     for i in range(video_length):
+                        # I just feel dividing by 30 yield stable result but I don't know why
+                        # gradully reduce init alpha along video frames (loosen restriction)
+                        init_alpha = (video_length - float(i)) / video_length / 30
+                        latents[:, :, i, :, :] = init_latents * init_alpha + latents[:, :, i, :, :] * (1 - init_alpha)
         else:
             if latents.shape != shape: