FlipSketch

Paused

App Files Files Community

Hmrishav commited on Nov 24, 2024

Commit

b1350bf

1 Parent(s): a7b9df8

resolve deps

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

README.md +11 -9
app.py +271 -0
app_full.py +243 -0
environment.yml +402 -0
gifs_filter.py +68 -0
invert_utils.py +89 -0
read_vids.py +27 -0
requirements.txt +44 -0
static/app_tmp/gif_logs/vid_sketch10-rand0_4e766a8e-9d22-4818-8991-e884ce17e5e5.gif +0 -0
static/app_tmp/gif_logs/vid_sketch10-rand0_508fa599-d685-462e-ad06-11ca4fd15d6f.gif +0 -0
static/app_tmp/gif_logs/vid_sketch10-rand0_9e7e07af-2adc-47b0-8aa4-716a934690e8.gif +0 -0
static/app_tmp/gif_logs/vid_sketch10-rand0_dfcba486-0d8c-4d68-9689-97f1fb889213.gif +0 -0
static/app_tmp/gif_logs/vid_sketch10-rand1_4e766a8e-9d22-4818-8991-e884ce17e5e5.gif +0 -0
static/app_tmp/gif_logs/vid_sketch10-rand1_508fa599-d685-462e-ad06-11ca4fd15d6f.gif +0 -0
static/app_tmp/gif_logs/vid_sketch10-rand1_9e7e07af-2adc-47b0-8aa4-716a934690e8.gif +0 -0
static/app_tmp/gif_logs/vid_sketch10-rand1_dfcba486-0d8c-4d68-9689-97f1fb889213.gif +0 -0
static/app_tmp/gif_logs/vid_sketch10-rand2_4e766a8e-9d22-4818-8991-e884ce17e5e5.gif +0 -0
static/app_tmp/gif_logs/vid_sketch10-rand2_508fa599-d685-462e-ad06-11ca4fd15d6f.gif +0 -0
static/app_tmp/gif_logs/vid_sketch10-rand2_9e7e07af-2adc-47b0-8aa4-716a934690e8.gif +0 -0
static/app_tmp/gif_logs/vid_sketch10-rand2_dfcba486-0d8c-4d68-9689-97f1fb889213.gif +0 -0
static/app_tmp/gif_logs/vid_sketch10-rand3_4e766a8e-9d22-4818-8991-e884ce17e5e5.gif +0 -0
static/app_tmp/gif_logs/vid_sketch10-rand3_508fa599-d685-462e-ad06-11ca4fd15d6f.gif +0 -0
static/app_tmp/gif_logs/vid_sketch10-rand3_9e7e07af-2adc-47b0-8aa4-716a934690e8.gif +0 -0
static/app_tmp/gif_logs/vid_sketch10-rand3_dfcba486-0d8c-4d68-9689-97f1fb889213.gif +0 -0
static/app_tmp/gif_logs/vid_sketch10-rand4_4e766a8e-9d22-4818-8991-e884ce17e5e5.gif +0 -0
static/app_tmp/gif_logs/vid_sketch10-rand4_508fa599-d685-462e-ad06-11ca4fd15d6f.gif +0 -0
static/app_tmp/gif_logs/vid_sketch10-rand4_9e7e07af-2adc-47b0-8aa4-716a934690e8.gif +0 -0
static/app_tmp/gif_logs/vid_sketch10-rand4_dfcba486-0d8c-4d68-9689-97f1fb889213.gif +0 -0
static/app_tmp/gif_logs/vid_sketch10-rand5_508fa599-d685-462e-ad06-11ca4fd15d6f.gif +0 -0
static/app_tmp/gif_logs/vid_sketch10-rand6_508fa599-d685-462e-ad06-11ca4fd15d6f.gif +0 -0
static/app_tmp/gif_logs/vid_sketch10-rand7_508fa599-d685-462e-ad06-11ca4fd15d6f.gif +0 -0
static/app_tmp/gif_logs/vid_sketch10-rand8_508fa599-d685-462e-ad06-11ca4fd15d6f.gif +0 -0
static/app_tmp/gif_logs/vid_sketch10-rand9_508fa599-d685-462e-ad06-11ca4fd15d6f.gif +0 -0
static/app_tmp/gif_logs/vid_sketch3-rand0_875203a1-f830-46e7-a287-4a0bc2c3a648.gif +0 -0
static/app_tmp/gif_logs/vid_sketch3-rand1_875203a1-f830-46e7-a287-4a0bc2c3a648.gif +0 -0
static/app_tmp/gif_logs/vid_sketch3-rand2_875203a1-f830-46e7-a287-4a0bc2c3a648.gif +0 -0
static/app_tmp/gif_logs/vid_sketch3-rand3_875203a1-f830-46e7-a287-4a0bc2c3a648.gif +0 -0
static/app_tmp/gif_logs/vid_sketch3-rand4_875203a1-f830-46e7-a287-4a0bc2c3a648.gif +0 -0
static/app_tmp/gif_logs/vid_sketch8-rand0_47fc0372-4688-4a2a-abb3-817ccfee8816.gif +0 -0
static/app_tmp/gif_logs/vid_sketch8-rand0_77158110-9239-4771-bb44-a83c3aa47567.gif +0 -0
static/app_tmp/gif_logs/vid_sketch8-rand0_fd1dace5-80a2-4a0f-afb1-c6aa0943c91a.gif +0 -0
static/app_tmp/gif_logs/vid_sketch8-rand1_47fc0372-4688-4a2a-abb3-817ccfee8816.gif +0 -0
static/app_tmp/gif_logs/vid_sketch8-rand1_77158110-9239-4771-bb44-a83c3aa47567.gif +0 -0
static/app_tmp/gif_logs/vid_sketch8-rand1_fd1dace5-80a2-4a0f-afb1-c6aa0943c91a.gif +0 -0
static/app_tmp/gif_logs/vid_sketch8-rand2_47fc0372-4688-4a2a-abb3-817ccfee8816.gif +0 -0
static/app_tmp/gif_logs/vid_sketch8-rand2_77158110-9239-4771-bb44-a83c3aa47567.gif +0 -0
static/app_tmp/gif_logs/vid_sketch8-rand2_fd1dace5-80a2-4a0f-afb1-c6aa0943c91a.gif +0 -0
static/app_tmp/gif_logs/vid_sketch8-rand3_47fc0372-4688-4a2a-abb3-817ccfee8816.gif +0 -0
static/app_tmp/gif_logs/vid_sketch8-rand3_77158110-9239-4771-bb44-a83c3aa47567.gif +0 -0
static/app_tmp/gif_logs/vid_sketch8-rand3_fd1dace5-80a2-4a0f-afb1-c6aa0943c91a.gif +0 -0

README.md CHANGED Viewed

@@ -1,12 +1,14 @@
 ---
-title: FlipSketch
-emoji: 🚀
-colorFrom: purple
-colorTo: green
-sdk: docker
-pinned: false
-license: mit
-short_description: Sketch Animations
 ---
-Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 ---
+title: FlipSketch
+emoji: 🚀
+colorFrom: blue
+colorTo: green
+sdk: gradio
+app_file: app.py
+pinned: false
 ---
+# FlipSketch
+FlipSketch: Flipping assets Drawings to Text-Guided Sketch Animations

app.py ADDED Viewed

	@@ -0,0 +1,271 @@

+from flask import Flask, render_template, request, jsonify
+import os
+import cv2
+import subprocess
+import torch
+import torchvision
+import warnings
+import numpy as np
+from PIL import Image, ImageSequence
+from moviepy.editor import VideoFileClip
+import imageio
+import uuid
+from diffusers import (
+    TextToVideoSDPipeline,
+    AutoencoderKL,
+    DDPMScheduler,
+    DDIMScheduler,
+    UNet3DConditionModel,
+)
+import time
+from transformers import CLIPTokenizer, CLIPTextModel
+from diffusers.utils import export_to_video
+from gifs_filter import filter
+from invert_utils import ddim_inversion as dd_inversion
+from text2vid_modded import TextToVideoSDPipelineModded
+def run_setup():
+    try:
+        # Step 1: Install Git LFS
+        subprocess.run(["git", "lfs", "install"], check=True)
+        # Step 2: Clone the repository
+        repo_url = "https://huggingface.co/Hmrishav/t2v_sketch-lora"
+        subprocess.run(["git", "clone", repo_url], check=True)
+        # Step 3: Move the checkpoint file
+        source = "t2v_sketch-lora/checkpoint-2500"
+        destination = "./checkpoint-2500/"
+        os.rename(source, destination)
+        print("Setup completed successfully!")
+    except subprocess.CalledProcessError as e:
+        print(f"Error during setup: {e}")
+    except FileNotFoundError as e:
+        print(f"File operation error: {e}")
+    except Exception as e:
+        print(f"Unexpected error: {e}")
+# Automatically run setup during app initialization
+run_setup()
+# Flask app setup
+app = Flask(__name__)
+app.config['UPLOAD_FOLDER'] = 'static/uploads'
+app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024  # 16MB max file size
+os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
+# Environment setup
+os.environ["TORCH_CUDNN_V8_API_ENABLED"] = "1"
+LORA_CHECKPOINT = "checkpoint-2500"
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+dtype = torch.bfloat16
+# Helper functions
+def cleanup_old_files(directory, age_in_seconds = 600):
+    """
+    Deletes files older than a certain age in the specified directory.
+    Args:
+        directory (str): The directory to clean up.
+        age_in_seconds (int): The age in seconds; files older than this will be deleted.
+    """
+    now = time.time()
+    for filename in os.listdir(directory):
+        file_path = os.path.join(directory, filename)
+        # Only delete files (not directories)
+        if os.path.isfile(file_path):
+            file_age = now - os.path.getmtime(file_path)
+            if file_age > age_in_seconds:
+                try:
+                    os.remove(file_path)
+                    print(f"Deleted old file: {file_path}")
+                except Exception as e:
+                    print(f"Error deleting file {file_path}: {e}")
+def load_frames(image: Image, mode='RGBA'):
+    return np.array([np.array(frame.convert(mode)) for frame in ImageSequence.Iterator(image)])
+def save_gif(frames, path):
+    imageio.mimsave(path, [frame.astype(np.uint8) for frame in frames], format='GIF', duration=1/10)
+def load_image(imgname, target_size=None):
+    pil_img = Image.open(imgname).convert('RGB')
+    if target_size:
+        if isinstance(target_size, int):
+            target_size = (target_size, target_size)
+        pil_img = pil_img.resize(target_size, Image.Resampling.LANCZOS)
+    return torchvision.transforms.ToTensor()(pil_img).unsqueeze(0)  # Add batch dimension
+def prepare_latents(pipe, x_aug):
+    with torch.cuda.amp.autocast():
+        batch_size, num_frames, channels, height, width = x_aug.shape
+        x_aug = x_aug.reshape(batch_size * num_frames, channels, height, width)
+        latents = pipe.vae.encode(x_aug).latent_dist.sample()
+        latents = latents.view(batch_size, num_frames, -1, latents.shape[2], latents.shape[3])
+        latents = latents.permute(0, 2, 1, 3, 4)
+    return pipe.vae.config.scaling_factor * latents
+@torch.no_grad()
+def invert(pipe, inv, load_name, device="cuda", dtype=torch.bfloat16):
+    input_img = [load_image(load_name, 256).to(device, dtype=dtype).unsqueeze(1)] * 5
+    input_img = torch.cat(input_img, dim=1)
+    latents = prepare_latents(pipe, input_img).to(torch.bfloat16)
+    inv.set_timesteps(25)
+    id_latents = dd_inversion(pipe, inv, video_latent=latents, num_inv_steps=25, prompt="")[-1].to(dtype)
+    return torch.mean(id_latents, dim=2, keepdim=True)
+def load_primary_models(pretrained_model_path):
+    return (
+        DDPMScheduler.from_config(pretrained_model_path, subfolder="scheduler"),
+        CLIPTokenizer.from_pretrained(pretrained_model_path, subfolder="tokenizer"),
+        CLIPTextModel.from_pretrained(pretrained_model_path, subfolder="text_encoder"),
+        AutoencoderKL.from_pretrained(pretrained_model_path, subfolder="vae"),
+        UNet3DConditionModel.from_pretrained(pretrained_model_path, subfolder="unet"),
+    )
+def initialize_pipeline(model: str, device: str = "cuda"):
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        scheduler, tokenizer, text_encoder, vae, unet = load_primary_models(model)
+    pipe = TextToVideoSDPipeline.from_pretrained(
+        pretrained_model_name_or_path="damo-vilab/text-to-video-ms-1.7b",
+        scheduler=scheduler,
+        tokenizer=tokenizer,
+        text_encoder=text_encoder.to(device=device, dtype=torch.bfloat16),
+        vae=vae.to(device=device, dtype=torch.bfloat16),
+        unet=unet.to(device=device, dtype=torch.bfloat16),
+    )
+    pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
+    return pipe, pipe.scheduler
+pipe_inversion, inv = initialize_pipeline(LORA_CHECKPOINT, device)
+pipe = TextToVideoSDPipelineModded.from_pretrained(
+    pretrained_model_name_or_path="damo-vilab/text-to-video-ms-1.7b",
+    scheduler=pipe_inversion.scheduler,
+    tokenizer=pipe_inversion.tokenizer,
+    text_encoder=pipe_inversion.text_encoder,
+    vae=pipe_inversion.vae,
+    unet=pipe_inversion.unet,
+).to(device)
+@torch.no_grad()
+def process(num_frames, num_seeds, generator, exp_dir, load_name, caption, lambda_):
+    pipe_inversion.to(device)
+    id_latents = invert(pipe_inversion, inv, load_name).to(device, dtype=dtype)
+    latents = id_latents.repeat(num_seeds, 1, 1, 1, 1)
+    generator = [torch.Generator(device="cuda").manual_seed(i) for i in range(num_seeds)]
+    video_frames = pipe(
+        prompt=caption,
+        negative_prompt="",
+        num_frames=num_frames,
+        num_inference_steps=25,
+        inv_latents=latents,
+        guidance_scale=9,
+        generator=generator,
+        lambda_=lambda_,
+    ).frames
+    try:
+        load_name = load_name.split("/")[-1]
+    except:
+        pass
+    gifs = []
+    for seed in range(num_seeds):
+        vid_name = f"{exp_dir}/mp4_logs/vid_{load_name[:-4]}-rand{seed}.mp4"
+        gif_name = f"{exp_dir}/gif_logs/vid_{load_name[:-4]}-rand{seed}.gif"
+        video_path = export_to_video(video_frames[seed], output_video_path=vid_name)
+        VideoFileClip(vid_name).write_gif(gif_name)
+        with Image.open(gif_name) as im:
+            frames = load_frames(im)
+        frames_collect = np.empty((0, 1024, 1024), int)
+        for frame in frames:
+            frame = cv2.resize(frame, (1024, 1024))[:, :, :3]
+            frame = cv2.cvtColor(255 - frame, cv2.COLOR_RGB2GRAY)
+            _, frame = cv2.threshold(255 - frame, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+            frames_collect = np.append(frames_collect, [frame], axis=0)
+        save_gif(frames_collect, gif_name)
+        gifs.append(gif_name)
+    return gifs
+def generate_gifs(filepath, prompt, num_seeds=5, lambda_=0):
+    exp_dir = "static/app_tmp"
+    os.makedirs(exp_dir, exist_ok=True)
+    gifs = process(
+        num_frames=10,
+        num_seeds=num_seeds,
+        generator=None,
+        exp_dir=exp_dir,
+        load_name=filepath,
+        caption=prompt,
+        lambda_=lambda_
+    )
+    return gifs
+@app.route('/')
+def index():
+    return render_template('index.html')
+@app.route('/generate', methods=['POST'])
+def generate():
+    directories_to_clean = [
+        app.config['UPLOAD_FOLDER'],
+        'static/app_tmp/mp4_logs',
+        'static/app_tmp/gif_logs',
+        'static/app_tmp/png_logs'
+    ]
+    # Perform cleanup
+    os.makedirs('static/app_tmp', exist_ok=True)
+    for directory in directories_to_clean:
+        os.makedirs(directory, exist_ok=True)  # Ensure the directory exists
+        cleanup_old_files(directory)
+    prompt = request.form.get('prompt', '')
+    num_gifs = int(request.form.get('seeds', 3))
+    lambda_value = 1 - float(request.form.get('lambda', 0.5))
+    selected_example = request.form.get('selected_example', None)
+    file = request.files.get('image')
+    if not file and not selected_example:
+        return jsonify({'error': 'No image file provided or example selected'}), 400
+    if selected_example:
+        # Use the selected example image
+        filepath = os.path.join('static', 'examples', selected_example)
+        unique_id = None  # No need for unique ID
+    else:
+        # Save the uploaded image
+        unique_id = str(uuid.uuid4())
+        filepath = os.path.join(app.config['UPLOAD_FOLDER'], f"{unique_id}_uploaded_image.png")
+        file.save(filepath)
+    generated_gifs = generate_gifs(filepath, prompt, num_seeds=num_gifs, lambda_=lambda_value)
+    unique_id = str(uuid.uuid4())
+    # Append unique id to each gif path
+    for i in range(len(generated_gifs)):
+        os.rename(generated_gifs[i], f"{generated_gifs[i].split('.')[0]}_{unique_id}.gif")
+        generated_gifs[i] = f"{generated_gifs[i].split('.')[0]}_{unique_id}.gif"
+        # Move the generated gifs to the static folder
+    filtered_gifs = filter(generated_gifs, filepath)
+    return jsonify({'gifs': filtered_gifs, 'prompt': prompt})
+if __name__ == '__main__':
+    app.run(debug=True)

app_full.py ADDED Viewed

	@@ -0,0 +1,243 @@

+from flask import Flask, render_template, request, jsonify
+import os
+import cv2
+import torch
+import torchvision
+import warnings
+import numpy as np
+from PIL import Image, ImageSequence
+from moviepy.editor import VideoFileClip
+import imageio
+import uuid
+from diffusers import (
+    TextToVideoSDPipeline,
+    AutoencoderKL,
+    DDPMScheduler,
+    DDIMScheduler,
+    UNet3DConditionModel,
+)
+import time
+from transformers import CLIPTokenizer, CLIPTextModel
+from diffusers.utils import export_to_video
+from gifs_filter import filter
+from invert_utils import ddim_inversion as dd_inversion
+from text2vid_modded_full import TextToVideoSDPipelineModded
+# Flask app setup
+app = Flask(__name__)
+app.config['UPLOAD_FOLDER'] = 'static/uploads'
+app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024  # 16MB max file size
+os.makedirs(app.config['UPLOAD_FOLDER'], exist_ok=True)
+# Environment setup
+os.environ["TORCH_CUDNN_V8_API_ENABLED"] = "1"
+LORA_CHECKPOINT = "checkpoint-2500"
+device = 'cuda' if torch.cuda.is_available() else 'cpu'
+dtype = torch.bfloat16
+# Helper functions
+def cleanup_old_files(directory, age_in_seconds = 600):
+    """
+    Deletes files older than a certain age in the specified directory.
+    Args:
+        directory (str): The directory to clean up.
+        age_in_seconds (int): The age in seconds; files older than this will be deleted.
+    """
+    now = time.time()
+    for filename in os.listdir(directory):
+        file_path = os.path.join(directory, filename)
+        # Only delete files (not directories)
+        if os.path.isfile(file_path):
+            file_age = now - os.path.getmtime(file_path)
+            if file_age > age_in_seconds:
+                try:
+                    os.remove(file_path)
+                    print(f"Deleted old file: {file_path}")
+                except Exception as e:
+                    print(f"Error deleting file {file_path}: {e}")
+def load_frames(image: Image, mode='RGBA'):
+    return np.array([np.array(frame.convert(mode)) for frame in ImageSequence.Iterator(image)])
+def save_gif(frames, path):
+    imageio.mimsave(path, [frame.astype(np.uint8) for frame in frames], format='GIF', duration=1/10)
+def load_image(imgname, target_size=None):
+    pil_img = Image.open(imgname).convert('RGB')
+    if target_size:
+        if isinstance(target_size, int):
+            target_size = (target_size, target_size)
+        pil_img = pil_img.resize(target_size, Image.Resampling.LANCZOS)
+    return torchvision.transforms.ToTensor()(pil_img).unsqueeze(0)  # Add batch dimension
+def prepare_latents(pipe, x_aug):
+    with torch.cuda.amp.autocast():
+        batch_size, num_frames, channels, height, width = x_aug.shape
+        x_aug = x_aug.reshape(batch_size * num_frames, channels, height, width)
+        latents = pipe.vae.encode(x_aug).latent_dist.sample()
+        latents = latents.view(batch_size, num_frames, -1, latents.shape[2], latents.shape[3])
+        latents = latents.permute(0, 2, 1, 3, 4)
+    return pipe.vae.config.scaling_factor * latents
+@torch.no_grad()
+def invert(pipe, inv, load_name, device="cuda", dtype=torch.bfloat16):
+    input_img = [load_image(load_name, 256).to(device, dtype=dtype).unsqueeze(1)] * 5
+    input_img = torch.cat(input_img, dim=1)
+    latents = prepare_latents(pipe, input_img).to(torch.bfloat16)
+    inv.set_timesteps(25)
+    id_latents = dd_inversion(pipe, inv, video_latent=latents, num_inv_steps=25, prompt="")[-1].to(dtype)
+    return torch.mean(id_latents, dim=2, keepdim=True)
+def load_primary_models(pretrained_model_path):
+    return (
+        DDPMScheduler.from_config(pretrained_model_path, subfolder="scheduler"),
+        CLIPTokenizer.from_pretrained(pretrained_model_path, subfolder="tokenizer"),
+        CLIPTextModel.from_pretrained(pretrained_model_path, subfolder="text_encoder"),
+        AutoencoderKL.from_pretrained(pretrained_model_path, subfolder="vae"),
+        UNet3DConditionModel.from_pretrained(pretrained_model_path, subfolder="unet"),
+    )
+def initialize_pipeline(model: str, device: str = "cuda"):
+    with warnings.catch_warnings():
+        warnings.simplefilter("ignore")
+        scheduler, tokenizer, text_encoder, vae, unet = load_primary_models(model)
+    pipe = TextToVideoSDPipeline.from_pretrained(
+        pretrained_model_name_or_path="damo-vilab/text-to-video-ms-1.7b",
+        scheduler=scheduler,
+        tokenizer=tokenizer,
+        text_encoder=text_encoder.to(device=device, dtype=torch.bfloat16),
+        vae=vae.to(device=device, dtype=torch.bfloat16),
+        unet=unet.to(device=device, dtype=torch.bfloat16),
+    )
+    pipe.scheduler = DDIMScheduler.from_config(pipe.scheduler.config)
+    return pipe, pipe.scheduler
+pipe_inversion, inv = initialize_pipeline(LORA_CHECKPOINT, device)
+pipe = TextToVideoSDPipelineModded.from_pretrained(
+    pretrained_model_name_or_path="damo-vilab/text-to-video-ms-1.7b",
+    scheduler=pipe_inversion.scheduler,
+    tokenizer=pipe_inversion.tokenizer,
+    text_encoder=pipe_inversion.text_encoder,
+    vae=pipe_inversion.vae,
+    unet=pipe_inversion.unet,
+).to(device)
+@torch.no_grad()
+def process(num_frames, num_seeds, generator, exp_dir, load_name, caption, lambda_):
+    pipe_inversion.to(device)
+    id_latents = invert(pipe_inversion, inv, load_name).to(device, dtype=dtype)
+    latents = id_latents.repeat(num_seeds, 1, 1, 1, 1)
+    generator = [torch.Generator(device="cuda").manual_seed(i) for i in range(num_seeds)]
+    video_frames = pipe(
+        prompt=caption,
+        negative_prompt="",
+        num_frames=num_frames,
+        num_inference_steps=25,
+        inv_latents=latents,
+        guidance_scale=9,
+        generator=generator,
+        lambda_=lambda_,
+    ).frames
+    try:
+        load_name = load_name.split("/")[-1]
+    except:
+        pass
+    gifs = []
+    for seed in range(num_seeds):
+        vid_name = f"{exp_dir}/mp4_logs/vid_{load_name[:-4]}-rand{seed}.mp4"
+        gif_name = f"{exp_dir}/gif_logs/vid_{load_name[:-4]}-rand{seed}.gif"
+        video_path = export_to_video(video_frames[seed], output_video_path=vid_name)
+        VideoFileClip(vid_name).write_gif(gif_name)
+        with Image.open(gif_name) as im:
+            frames = load_frames(im)
+        frames_collect = np.empty((0, 1024, 1024), int)
+        for frame in frames:
+            frame = cv2.resize(frame, (1024, 1024))[:, :, :3]
+            frame = cv2.cvtColor(255 - frame, cv2.COLOR_RGB2GRAY)
+            _, frame = cv2.threshold(255 - frame, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
+            frames_collect = np.append(frames_collect, [frame], axis=0)
+        save_gif(frames_collect, gif_name)
+        gifs.append(gif_name)
+    return gifs
+def generate_gifs(filepath, prompt, num_seeds=5, lambda_=0):
+    exp_dir = "static/app_tmp"
+    os.makedirs(exp_dir, exist_ok=True)
+    gifs = process(
+        num_frames=10,
+        num_seeds=num_seeds,
+        generator=None,
+        exp_dir=exp_dir,
+        load_name=filepath,
+        caption=prompt,
+        lambda_=lambda_
+    )
+    return gifs
+@app.route('/')
+def index():
+    return render_template('index.html')
+@app.route('/generate', methods=['POST'])
+def generate():
+    directories_to_clean = [
+        app.config['UPLOAD_FOLDER'],
+        'static/app_tmp/mp4_logs',
+        'static/app_tmp/gif_logs',
+        'static/app_tmp/png_logs'
+    ]
+    # Perform cleanup
+    os.makedirs('static/app_tmp', exist_ok=True)
+    for directory in directories_to_clean:
+        os.makedirs(directory, exist_ok=True)  # Ensure the directory exists
+        cleanup_old_files(directory)
+    prompt = request.form.get('prompt', '')
+    num_gifs = int(request.form.get('seeds', 3))
+    lambda_value = 1 - float(request.form.get('lambda', 0.5))
+    selected_example = request.form.get('selected_example', None)
+    file = request.files.get('image')
+    if not file and not selected_example:
+        return jsonify({'error': 'No image file provided or example selected'}), 400
+    if selected_example:
+        # Use the selected example image
+        filepath = os.path.join('static', 'examples', selected_example)
+        unique_id = None  # No need for unique ID
+    else:
+        # Save the uploaded image
+        unique_id = str(uuid.uuid4())
+        filepath = os.path.join(app.config['UPLOAD_FOLDER'], f"{unique_id}_uploaded_image.png")
+        file.save(filepath)
+    generated_gifs = generate_gifs(filepath, prompt, num_seeds=num_gifs, lambda_=lambda_value)
+    unique_id = str(uuid.uuid4())
+    # Append unique id to each gif path
+    for i in range(len(generated_gifs)):
+        os.rename(generated_gifs[i], f"{generated_gifs[i].split('.')[0]}_{unique_id}.gif")
+        generated_gifs[i] = f"{generated_gifs[i].split('.')[0]}_{unique_id}.gif"
+        # Move the generated gifs to the static folder
+    filtered_gifs = filter(generated_gifs, filepath)
+    return jsonify({'gifs': filtered_gifs, 'prompt': prompt})
+if __name__ == '__main__':
+    app.run(debug=True)

environment.yml ADDED Viewed

	@@ -0,0 +1,402 @@

+name: flipsketch
+channels:
+  - pytorch
+  - nvidia
+  - conda-forge
+  - defaults
+dependencies:
+  - _libgcc_mutex=0.1=main
+  - _openmp_mutex=5.1=1_gnu
+  - asttokens=2.4.1=pyhd8ed1ab_0
+  - blas=1.0=mkl
+  - brotli-python=1.0.9=py310hd8f1fbe_7
+  - bzip2=1.0.8=h7f98852_4
+  - ca-certificates=2024.2.2=hbcca054_0
+  - certifi=2024.2.2=pyhd8ed1ab_0
+  - charset-normalizer=2.0.4=pyhd8ed1ab_0
+  - comm=0.2.2=pyhd8ed1ab_0
+  - cuda=11.6.1=0
+  - cuda-cccl=11.6.55=hf6102b2_0
+  - cuda-command-line-tools=11.6.2=0
+  - cuda-compiler=11.6.2=0
+  - cuda-cudart=11.6.55=he381448_0
+  - cuda-cudart-dev=11.6.55=h42ad0f4_0
+  - cuda-cuobjdump=11.6.124=h2eeebcb_0
+  - cuda-cupti=11.6.124=h86345e5_0
+  - cuda-cuxxfilt=11.6.124=hecbf4f6_0
+  - cuda-driver-dev=11.6.55=0
+  - cuda-gdb=12.4.127=0
+  - cuda-libraries=11.6.1=0
+  - cuda-libraries-dev=11.6.1=0
+  - cuda-memcheck=11.8.86=0
+  - cuda-nsight=12.4.127=0
+  - cuda-nsight-compute=12.4.1=0
+  - cuda-nvcc=11.6.124=hbba6d2d_0
+  - cuda-nvdisasm=12.4.127=0
+  - cuda-nvml-dev=11.6.55=haa9ef22_0
+  - cuda-nvprof=12.4.127=0
+  - cuda-nvprune=11.6.124=he22ec0a_0
+  - cuda-nvrtc=11.6.124=h020bade_0
+  - cuda-nvrtc-dev=11.6.124=h249d397_0
+  - cuda-nvtx=11.6.124=h0630a44_0
+  - cuda-nvvp=12.4.127=0
+  - cuda-runtime=11.6.1=0
+  - cuda-samples=11.6.101=h8efea70_0
+  - cuda-sanitizer-api=12.4.127=0
+  - cuda-toolkit=11.6.1=0
+  - cuda-tools=11.6.1=0
+  - cuda-visual-tools=11.6.1=0
+  - debugpy=1.6.7=py310h6a678d5_0
+  - entrypoints=0.4=pyhd8ed1ab_0
+  - exceptiongroup=1.2.0=pyhd8ed1ab_2
+  - executing=2.0.1=pyhd8ed1ab_0
+  - ffmpeg=4.3=hf484d3e_0
+  - freetype=2.12.1=h4a9f257_0
+  - gds-tools=1.9.1.3=0
+  - gmp=6.2.1=h58526e2_0
+  - gnutls=3.6.15=he1e5248_0
+  - idna=3.4=pyhd8ed1ab_0
+  - intel-openmp=2023.1.0=hdb19cb5_46306
+  - ipykernel=6.29.3=pyhd33586a_0
+  - jedi=0.19.1=pyhd8ed1ab_0
+  - jpeg=9e=h166bdaf_1
+  - jupyter_client=7.3.4=pyhd8ed1ab_0
+  - jupyter_core=5.7.2=pyh31011fe_1
+  - lame=3.100=h7f98852_1001
+  - lcms2=2.12=h3be6417_0
+  - ld_impl_linux-64=2.38=h1181459_1
+  - lerc=3.0=h9c3ff4c_0
+  - libcublas=11.9.2.110=h5e84587_0
+  - libcublas-dev=11.9.2.110=h5c901ab_0
+  - libcufft=10.7.1.112=hf425ae0_0
+  - libcufft-dev=10.7.1.112=ha5ce4c0_0
+  - libcufile=1.9.1.3=0
+  - libcufile-dev=1.9.1.3=0
+  - libcurand=10.3.5.147=0
+  - libcurand-dev=10.3.5.147=0
+  - libcusolver=11.3.4.124=h33c3c4e_0
+  - libcusparse=11.7.2.124=h7538f96_0
+  - libcusparse-dev=11.7.2.124=hbbe9722_0
+  - libdeflate=1.17=h5eee18b_1
+  - libffi=3.4.4=h6a678d5_1
+  - libgcc-ng=11.2.0=h1234567_1
+  - libgomp=11.2.0=h1234567_1
+  - libiconv=1.16=h516909a_0
+  - libidn2=2.3.4=h5eee18b_0
+  - libnpp=11.6.3.124=hd2722f0_0
+  - libnpp-dev=11.6.3.124=h3c42840_0
+  - libnvjpeg=11.6.2.124=hd473ad6_0
+  - libnvjpeg-dev=11.6.2.124=hb5906b9_0
+  - libpng=1.6.39=h5eee18b_0
+  - libsodium=1.0.18=h36c2ea0_1
+  - libstdcxx-ng=11.2.0=he4da1e4_16
+  - libtasn1=4.19.0=h5eee18b_0
+  - libtiff=4.5.1=h6a678d5_0
+  - libunistring=0.9.10=h7f98852_0
+  - libuuid=1.41.5=h5eee18b_0
+  - libwebp-base=1.3.2=h5eee18b_0
+  - lz4-c=1.9.4=h6a678d5_1
+  - mkl=2023.1.0=h213fc3f_46344
+  - mkl-service=2.4.0=py310h5eee18b_1
+  - mkl_fft=1.3.8=py310h5eee18b_0
+  - mkl_random=1.2.4=py310hdb19cb5_0
+  - ncurses=6.4=h6a678d5_0
+  - nest-asyncio=1.6.0=pyhd8ed1ab_0
+  - nettle=3.7.3=hbbd107a_1
+  - nsight-compute=2024.1.1.4=0
+  - numpy-base=1.26.4=py310hb5e798b_0
+  - openh264=2.1.1=h780b84a_0
+  - openjpeg=2.4.0=h9ca470c_2
+  - openssl=3.0.13=h7f8727e_2
+  - packaging=24.0=pyhd8ed1ab_0
+  - parso=0.8.4=pyhd8ed1ab_0
+  - pexpect=4.9.0=pyhd8ed1ab_0
+  - pickleshare=0.7.5=py_1003
+  - pip=23.3.1=pyhd8ed1ab_0
+  - ptyprocess=0.7.0=pyhd3deb0d_0
+  - pure_eval=0.2.2=pyhd8ed1ab_0
+  - pygments=2.17.2=pyhd8ed1ab_0
+  - pysocks=1.7.1=pyha2e5f31_6
+  - python=3.10.14=h955ad1f_0
+  - python_abi=3.10=2_cp310
+  - pytorch=1.13.1=py3.10_cuda11.6_cudnn8.3.2_0
+  - pytorch-cuda=11.6=h867d48c_1
+  - pytorch-mutex=1.0=cuda
+  - pyzmq=25.1.2=py310h6a678d5_0
+  - readline=8.2=h5eee18b_0
+  - requests=2.31.0=pyhd8ed1ab_0
+  - setuptools=68.2.2=pyhd8ed1ab_0
+  - six=1.16.0=pyh6c4a22f_0
+  - sqlite=3.41.2=h5eee18b_0
+  - tbb=2021.8.0=hdb19cb5_0
+  - tk=8.6.12=h1ccaba5_0
+  - torchaudio=0.13.1=py310_cu116
+  - tornado=6.1=py310h5764c6d_3
+  - typing_extensions=4.9.0=pyha770c72_0
+  - tzdata=2024a=h8827d51_1
+  - urllib3=2.1.0=pyhd8ed1ab_0
+  - wcwidth=0.2.13=pyhd8ed1ab_0
+  - wheel=0.41.2=pyhd8ed1ab_0
+  - xz=5.4.6=h5eee18b_1
+  - zeromq=4.3.5=h6a678d5_0
+  - zlib=1.2.13=h5eee18b_1
+  - zstd=1.5.5=hc292b87_2
+  - pip:
+      - absl-py==2.1.0
+      - accelerate==0.29.2
+      - addict==2.4.0
+      - aiofiles==23.2.1
+      - aiohttp==3.9.3
+      - aiosignal==1.3.1
+      - albumentations==1.3.0
+      - aliyun-python-sdk-core==2.15.1
+      - aliyun-python-sdk-kms==2.16.2
+      - annotated-types==0.7.0
+      - antlr4-python3-runtime==4.8
+      - anyio==4.6.2.post1
+      - appdirs==1.4.4
+      - async-timeout==4.0.3
+      - attrs==23.2.0
+      - basicsr==1.4.2
+      - beautifulsoup4==4.12.3
+      - bitsandbytes==0.35.4
+      - black==21.4b2
+      - blinker==1.8.2
+      - blis==0.7.11
+      - boto3==1.34.97
+      - botocore==1.34.97
+      - bresenham==0.2.1
+      - cachetools==5.3.3
+      - captum==0.7.0
+      - catalogue==2.0.10
+      - cffi==1.16.0
+      - chardet==5.2.0
+      - click==8.1.7
+      - clip==0.1.0
+      - cloudpickle==3.0.0
+      - cmake==3.25.2
+      - compel==2.0.3
+      - confection==0.1.4
+      - contourpy==1.2.1
+      - controlnet-aux==0.0.6
+      - crcmod==1.7
+      - cryptography==42.0.7
+      - cssselect2==0.7.0
+      - cycler==0.12.1
+      - cymem==2.0.8
+      - cython==3.0.10
+      - datasets==2.18.0
+      - decorator==4.4.2
+      - decord==0.6.0
+      - deepspeed==0.8.0
+      - diffdist==0.1
+      - diffusers==0.27.2
+      - dill==0.3.8
+      - docker-pycreds==0.4.0
+      - easydict==1.10
+      - einops==0.3.0
+      - fairscale==0.4.13
+      - faiss-cpu==1.8.0
+      - fastapi==0.115.4
+      - ffmpy==0.3.0
+      - filelock==3.13.4
+      - flask==3.0.3
+      - flatbuffers==24.3.25
+      - fonttools==4.51.0
+      - frozenlist==1.4.1
+      - fsspec==2024.2.0
+      - ftfy==6.1.1
+      - future==1.0.0
+      - fvcore==0.1.5.post20221221
+      - gast==0.5.4
+      - gdown==5.1.0
+      - gitdb==4.0.11
+      - gitpython==3.1.43
+      - google-auth==2.29.0
+      - google-auth-oauthlib==0.4.6
+      - gradio==5.5.0
+      - gradio-client==1.4.2
+      - grpcio==1.62.1
+      - h11==0.14.0
+      - hjson==3.1.0
+      - httpcore==1.0.6
+      - httpx==0.27.2
+      - huggingface-hub==0.25.2
+      - hydra-core==1.1.1
+      - imageio==2.25.1
+      - imageio-ffmpeg==0.4.8
+      - importlib-metadata==7.1.0
+      - inquirerpy==0.3.4
+      - iopath==0.1.9
+      - ipdb==0.13.13
+      - ipympl==0.9.4
+      - ipython==8.23.0
+      - ipython-genutils==0.2.0
+      - ipywidgets==8.1.2
+      - itsdangerous==2.2.0
+      - jax==0.4.26
+      - jaxlib==0.4.26
+      - jinja2==3.1.3
+      - jmespath==0.10.0
+      - joblib==1.4.2
+      - jupyterlab-widgets==3.0.10
+      - kiwisolver==1.4.5
+      - kornia==0.6.0
+      - lightning-utilities==0.11.2
+      - lmdb==1.4.1
+      - loguru==0.7.2
+      - loralib==0.1.2
+      - lvis==0.5.3
+      - lxml==5.2.1
+      - markdown==3.6
+      - markdown-it-py==3.0.0
+      - markupsafe==2.1.5
+      - matplotlib==3.8.4
+      - matplotlib-inline==0.1.6
+      - mdurl==0.1.2
+      - mediapipe==0.10.11
+      - ml-dtypes==0.4.0
+      - modelcards==0.1.6
+      - modelscope==1.14.0
+      - motion-vector-extractor==1.0.6
+      - moviepy==1.0.3
+      - mpmath==1.3.0
+      - multidict==6.0.5
+      - multiprocess==0.70.16
+      - murmurhash==1.0.10
+      - mypy-extensions==1.0.0
+      - networkx==3.3
+      - ninja==1.11.1.1
+      - nltk==3.8.1
+      - numpy==1.24.2
+      - nvidia-cublas-cu11==11.10.3.66
+      - nvidia-cuda-cupti-cu12==12.1.105
+      - nvidia-cuda-nvrtc-cu11==11.7.99
+      - nvidia-cuda-nvrtc-cu12==12.1.105
+      - nvidia-cuda-runtime-cu11==11.7.99
+      - nvidia-cuda-runtime-cu12==12.1.105
+      - nvidia-cudnn-cu11==8.5.0.96
+      - nvidia-cufft-cu12==11.0.2.54
+      - nvidia-curand-cu12==10.3.2.106
+      - nvidia-nccl-cu12==2.20.5
+      - nvidia-nvjitlink-cu12==12.6.77
+      - nvidia-nvtx-cu12==12.1.105
+      - oauthlib==3.2.2
+      - omegaconf==2.1.1
+      - open-clip-torch==2.0.2
+      - opencv-contrib-python==4.9.0.80
+      - opencv-python==4.6.0.66
+      - opencv-python-headless==4.9.0.80
+      - opt-einsum==3.3.0
+      - orjson==3.10.11
+      - oss2==2.18.5
+      - pandas==1.5.3
+      - pathspec==0.12.1
+      - pathtools==0.1.2
+      - peft==0.10.0
+      - pfzy==0.3.4
+      - pillow==9.5.0
+      - pkgconfig==1.5.5
+      - platformdirs==4.2.0
+      - portalocker==2.8.2
+      - preshed==3.0.9
+      - proglog==0.1.10
+      - prompt-toolkit==3.0.43
+      - protobuf==3.20.3
+      - psutil==5.9.8
+      - py-cpuinfo==9.0.0
+      - pyarrow==15.0.2
+      - pyarrow-hotfix==0.6
+      - pyasn1==0.6.0
+      - pyasn1-modules==0.4.0
+      - pyav==12.0.5
+      - pycocotools==2.0.7
+      - pycparser==2.22
+      - pycryptodome==3.20.0
+      - pydantic==2.9.2
+      - pydantic-core==2.23.4
+      - pydeprecate==0.3.1
+      - pydot==2.0.0
+      - pydub==0.25.1
+      - pynvml==11.5.3
+      - pyparsing==3.1.2
+      - pyre-extensions==0.0.23
+      - python-dateutil==2.9.0.post0
+      - python-multipart==0.0.12
+      - pytorch-lightning==1.4.2
+      - pytz==2024.1
+      - pywavelets==1.6.0
+      - pyyaml==6.0.1
+      - qudida==0.0.4
+      - regex==2024.4.16
+      - reportlab==4.1.0
+      - requests-oauthlib==2.0.0
+      - rich==13.9.4
+      - rsa==4.9
+      - ruff==0.7.2
+      - s3transfer==0.10.1
+      - safehttpx==0.1.1
+      - safetensors==0.4.2
+      - scikit-image==0.19.3
+      - scikit-learn==1.4.2
+      - scikit-video==1.1.11
+      - scipy==1.10.1
+      - semantic-version==2.10.0
+      - sentry-sdk==1.44.1
+      - setproctitle==1.3.3
+      - shapely==2.0.3
+      - shellingham==1.5.4
+      - simplejson==3.19.2
+      - smmap==5.0.1
+      - sniffio==1.3.1
+      - sortedcontainers==2.4.0
+      - sounddevice==0.4.6
+      - soupsieve==2.5
+      - srsly==2.4.8
+      - stable-diffusion-sdkit==2.1.3
+      - stack-data==0.6.3
+      - starlette==0.41.2
+      - svg-path==6.3
+      - svglib==1.5.1
+      - svgpathtools==1.6.1
+      - svgwrite==1.4.3
+      - sympy==1.13.3
+      - tabulate==0.9.0
+      - tb-nightly==2.17.0a20240408
+      - tensorboard==2.12.0
+      - tensorboard-data-server==0.7.0
+      - tensorboard-plugin-wit==1.8.1
+      - termcolor==2.2.0
+      - test-tube==0.7.5
+      - thinc==8.1.10
+      - threadpoolctl==3.5.0
+      - tifffile==2024.2.12
+      - timm==0.6.11
+      - tinycss2==1.2.1
+      - tokenizers==0.20.1
+      - toml==0.10.2
+      - tomli==2.0.1
+      - tomlkit==0.12.0
+      - torch==1.13.1
+      - torchmetrics==0.6.0
+      - torchsummary==1.5.1
+      - torchvision==0.14.1
+      - tqdm==4.64.1
+      - traitlets==5.14.2
+      - transformers==4.45.2
+      - triton==2.3.0
+      - typer==0.12.5
+      - typing-inspect==0.9.0
+      - uvicorn==0.32.0
+      - wandb==0.16.6
+      - wasabi==1.1.2
+      - webencodings==0.5.1
+      - websockets==12.0
+      - werkzeug==3.0.2
+      - widgetsnbextension==4.0.10
+      - xformers==0.0.16
+      - xxhash==3.4.1
+      - yacs==0.1.8
+      - yapf==0.40.2
+      - yarl==1.9.4
+      - zipp==3.18.1

gifs_filter.py ADDED Viewed

	@@ -0,0 +1,68 @@

+# filter images
+from PIL import Image, ImageSequence
+import requests
+from tqdm import tqdm
+import numpy as np
+import torch
+from transformers import CLIPProcessor, CLIPModel
+def sample_frame_indices(clip_len, frame_sample_rate, seg_len):
+    converted_len = int(clip_len * frame_sample_rate)
+    end_idx = np.random.randint(converted_len, seg_len)
+    start_idx = end_idx - converted_len
+    indices = np.linspace(start_idx, end_idx, num=clip_len)
+    indices = np.clip(indices, start_idx, end_idx - 1).astype(np.int64)
+    return indices
+def load_frames(image: Image, mode='RGBA'):
+    return np.array([
+        np.array(frame.convert(mode))
+        for frame in ImageSequence.Iterator(image)
+    ])
+img_model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
+img_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
+def filter(gifs, input_image):
+    max_cosine = 0.9
+    max_gif = []
+    for gif in tqdm(gifs, total=len(gifs)):
+        with Image.open(gif) as im:
+            frames = load_frames(im)
+        frames = np.array(frames)
+        frames = frames[:, :, :, :3]
+        frames = np.transpose(frames, (0, 3, 1, 2))[1:]
+        image = Image.open(input_image)
+        inputs = img_processor(images=frames, return_tensors="pt", padding=False)
+        inputs_base = img_processor(images=image, return_tensors="pt", padding=False)
+        with torch.no_grad():
+            feat_img_base = img_model.get_image_features(pixel_values=inputs_base["pixel_values"])
+            feat_img_vid = img_model.get_image_features(pixel_values=inputs["pixel_values"])
+        cos_avg = 0
+        avg_score_for_vid = 0
+        for i in range(len(feat_img_vid)):
+            cosine_similarity = torch.nn.functional.cosine_similarity(
+                feat_img_base,
+                feat_img_vid[0].unsqueeze(0),
+                dim=1)
+            # print(cosine_similarity)
+            cos_avg += cosine_similarity.item()
+        cos_avg /= len(feat_img_vid)
+        print("Current cosine similarity: ", cos_avg)
+        print("Max cosine similarity: ", max_cosine)
+        if cos_avg > max_cosine:
+            # max_cosine = cos_avg
+            max_gif.append(gif)
+    return max_gif

invert_utils.py ADDED Viewed

	@@ -0,0 +1,89 @@

+import os
+import imageio
+import numpy as np
+from typing import Union
+import torch
+import torchvision
+from tqdm import tqdm
+from einops import rearrange
+def save_videos_grid(videos: torch.Tensor, path: str, rescale=False, n_rows=4, fps=8):
+    videos = rearrange(videos, "b c t h w -> t b c h w")
+    outputs = []
+    for x in videos:
+        x = torchvision.utils.make_grid(x, nrow=n_rows)
+        x = x.transpose(0, 1).transpose(1, 2).squeeze(-1)
+        if rescale:
+            x = (x + 1.0) / 2.0  # -1,1 -> 0,1
+        x = (x * 255).numpy().astype(np.uint8)
+        outputs.append(x)
+    os.makedirs(os.path.dirname(path), exist_ok=True)
+    imageio.mimsave(path, outputs, fps=fps)
+# DDIM Inversion
+@torch.no_grad()
+def init_prompt(prompt, pipeline):
+    uncond_input = pipeline.tokenizer(
+        [""], padding="max_length", max_length=pipeline.tokenizer.model_max_length,
+        return_tensors="pt"
+    )
+    uncond_embeddings = pipeline.text_encoder(uncond_input.input_ids.to(pipeline.device))[0]
+    text_input = pipeline.tokenizer(
+        [prompt],
+        padding="max_length",
+        max_length=pipeline.tokenizer.model_max_length,
+        truncation=True,
+        return_tensors="pt",
+    )
+    text_embeddings = pipeline.text_encoder(text_input.input_ids.to(pipeline.device))[0]
+    context = torch.cat([uncond_embeddings, text_embeddings])
+    return context
+def next_step(model_output: Union[torch.FloatTensor, np.ndarray], timestep: int,
+              sample: Union[torch.FloatTensor, np.ndarray], ddim_scheduler):
+    timestep, next_timestep = min(
+        timestep - ddim_scheduler.config.num_train_timesteps // ddim_scheduler.num_inference_steps, 999), timestep
+    # try:
+    alpha_prod_t = ddim_scheduler.alphas_cumprod[timestep] if timestep >= 0 else ddim_scheduler.final_alpha_cumprod
+    # except:
+    #     alpha_prod_t = ddim_scheduler.alphas_cumprod[timestep] #if timestep >= 0 else ddim_scheduler.final_alpha_cumprod
+    alpha_prod_t_next = ddim_scheduler.alphas_cumprod[next_timestep]
+    beta_prod_t = 1 - alpha_prod_t
+    next_original_sample = (sample - beta_prod_t ** 0.5 * model_output) / alpha_prod_t ** 0.5
+    next_sample_direction = (1 - alpha_prod_t_next) ** 0.5 * model_output
+    next_sample = alpha_prod_t_next ** 0.5 * next_original_sample + next_sample_direction
+    return next_sample
+def get_noise_pred_single(latents, t, context, unet):
+    noise_pred = unet(latents, t, encoder_hidden_states=context)["sample"]
+    return noise_pred
+@torch.no_grad()
+def ddim_loop(pipeline, ddim_scheduler, latent, num_inv_steps, prompt):
+    context = init_prompt(prompt, pipeline)
+    uncond_embeddings, cond_embeddings = context.chunk(2)
+    all_latent = [latent]
+    latent = latent.clone().detach()
+    for i in tqdm(range(num_inv_steps)):
+        t = ddim_scheduler.timesteps[len(ddim_scheduler.timesteps) - i - 1]
+        noise_pred = get_noise_pred_single(latent, t, cond_embeddings, pipeline.unet)
+        noise_pred_unc = get_noise_pred_single(latent, t, uncond_embeddings, pipeline.unet)
+        noise_pred = noise_pred_unc + 9.0 * (noise_pred_unc - noise_pred)
+        latent = next_step(noise_pred, t, latent, ddim_scheduler)
+        all_latent.append(latent)
+    return all_latent
+@torch.no_grad()
+def ddim_inversion(pipeline, ddim_scheduler, video_latent, num_inv_steps, prompt=""):
+    ddim_latents = ddim_loop(pipeline, ddim_scheduler, video_latent, num_inv_steps, prompt)
+    return ddim_latents

read_vids.py ADDED Viewed

	@@ -0,0 +1,27 @@

+import imageio.v3 as iio
+import os
+from sys import argv
+video_name = argv[1]
+video = video_name
+video_id = video.split("/")[-1].replace(".mp4","")
+png_base = "png_logs"
+try:
+    os.mkdir(png_base)
+except:
+    pass
+video_id = os.path.join(png_base, video_id)
+all_frames = list(iio.imiter(video))
+ctr = 0
+try:
+    os.makedirs(video_id)
+except:
+    pass
+for idx, frame in enumerate(all_frames):
+    iio.imwrite(f"{video_id}/{ctr:03d}.jpg", frame)
+    ctr += 1

requirements.txt ADDED Viewed

	@@ -0,0 +1,44 @@

+accelerate==0.29.2
+blinker==1.9.0
+certifi==2024.8.30
+charset-normalizer==3.4.0
+click==8.1.7
+decorator==4.4.2
+diffusers==0.27.2
+einops==0.8.0
+filelock==3.16.1
+Flask==3.0.3
+fsspec==2024.10.0
+huggingface-hub==0.25.2
+idna==3.10
+imageio==2.36.0
+imageio-ffmpeg==0.5.1
+importlib_metadata==8.5.0
+itsdangerous==2.2.0
+Jinja2==3.1.4
+MarkupSafe==3.0.2
+moviepy==1.0.3
+numpy==1.24.2
+nvidia-cublas-cu11==11.10.3.66
+nvidia-cuda-nvrtc-cu11==11.7.99
+nvidia-cuda-runtime-cu11==11.7.99
+nvidia-cudnn-cu11==8.5.0.96
+opencv-python==4.10.0.84
+packaging==24.2
+pillow==10.4.0
+proglog==0.1.10
+psutil==6.1.0
+python-dotenv==1.0.1
+PyYAML==6.0.2
+regex==2024.11.6
+requests==2.32.3
+safetensors==0.4.5
+tokenizers==0.20.3
+torch==1.13.1
+torchvision==0.14.1
+tqdm==4.67.0
+transformers==4.45.2
+typing_extensions==4.12.2
+urllib3==2.2.3
+Werkzeug==3.1.3
+zipp==3.21.0

static/app_tmp/gif_logs/vid_sketch10-rand0_4e766a8e-9d22-4818-8991-e884ce17e5e5.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch10-rand0_508fa599-d685-462e-ad06-11ca4fd15d6f.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch10-rand0_9e7e07af-2adc-47b0-8aa4-716a934690e8.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch10-rand0_dfcba486-0d8c-4d68-9689-97f1fb889213.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch10-rand1_4e766a8e-9d22-4818-8991-e884ce17e5e5.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch10-rand1_508fa599-d685-462e-ad06-11ca4fd15d6f.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch10-rand1_9e7e07af-2adc-47b0-8aa4-716a934690e8.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch10-rand1_dfcba486-0d8c-4d68-9689-97f1fb889213.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch10-rand2_4e766a8e-9d22-4818-8991-e884ce17e5e5.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch10-rand2_508fa599-d685-462e-ad06-11ca4fd15d6f.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch10-rand2_9e7e07af-2adc-47b0-8aa4-716a934690e8.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch10-rand2_dfcba486-0d8c-4d68-9689-97f1fb889213.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch10-rand3_4e766a8e-9d22-4818-8991-e884ce17e5e5.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch10-rand3_508fa599-d685-462e-ad06-11ca4fd15d6f.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch10-rand3_9e7e07af-2adc-47b0-8aa4-716a934690e8.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch10-rand3_dfcba486-0d8c-4d68-9689-97f1fb889213.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch10-rand4_4e766a8e-9d22-4818-8991-e884ce17e5e5.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch10-rand4_508fa599-d685-462e-ad06-11ca4fd15d6f.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch10-rand4_9e7e07af-2adc-47b0-8aa4-716a934690e8.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch10-rand4_dfcba486-0d8c-4d68-9689-97f1fb889213.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch10-rand5_508fa599-d685-462e-ad06-11ca4fd15d6f.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch10-rand6_508fa599-d685-462e-ad06-11ca4fd15d6f.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch10-rand7_508fa599-d685-462e-ad06-11ca4fd15d6f.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch10-rand8_508fa599-d685-462e-ad06-11ca4fd15d6f.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch10-rand9_508fa599-d685-462e-ad06-11ca4fd15d6f.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch3-rand0_875203a1-f830-46e7-a287-4a0bc2c3a648.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch3-rand1_875203a1-f830-46e7-a287-4a0bc2c3a648.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch3-rand2_875203a1-f830-46e7-a287-4a0bc2c3a648.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch3-rand3_875203a1-f830-46e7-a287-4a0bc2c3a648.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch3-rand4_875203a1-f830-46e7-a287-4a0bc2c3a648.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch8-rand0_47fc0372-4688-4a2a-abb3-817ccfee8816.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch8-rand0_77158110-9239-4771-bb44-a83c3aa47567.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch8-rand0_fd1dace5-80a2-4a0f-afb1-c6aa0943c91a.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch8-rand1_47fc0372-4688-4a2a-abb3-817ccfee8816.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch8-rand1_77158110-9239-4771-bb44-a83c3aa47567.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch8-rand1_fd1dace5-80a2-4a0f-afb1-c6aa0943c91a.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch8-rand2_47fc0372-4688-4a2a-abb3-817ccfee8816.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch8-rand2_77158110-9239-4771-bb44-a83c3aa47567.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch8-rand2_fd1dace5-80a2-4a0f-afb1-c6aa0943c91a.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch8-rand3_47fc0372-4688-4a2a-abb3-817ccfee8816.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch8-rand3_77158110-9239-4771-bb44-a83c3aa47567.gif ADDED Viewed

static/app_tmp/gif_logs/vid_sketch8-rand3_fd1dace5-80a2-4a0f-afb1-c6aa0943c91a.gif ADDED Viewed