Spaces:

HReynaud
/

EchoNet-Synthetic

Sleeping

App Files Files Community

HReynaud commited on May 21

Commit

cfb9037

•

1 Parent(s): f74e63f

Upload folder using huggingface_hub

Browse files

Files changed (28) hide show

.vscode/launch.json +16 -0
README.md +4 -4
app.py +311 -0
output.mp4 +0 -0
requirements.txt +10 -0
resources/config.yaml +51 -0
resources/examples/ef20.mp4 +0 -0
resources/examples/ef20.png +0 -0
resources/examples/ef30.mp4 +0 -0
resources/examples/ef30.png +0 -0
resources/examples/ef40.mp4 +0 -0
resources/examples/ef40.png +0 -0
resources/examples/ef50.mp4 +0 -0
resources/examples/ef50.png +0 -0
resources/examples/ef60.mp4 +0 -0
resources/examples/ef60.png +0 -0
resources/examples/ef70.mp4 +0 -0
resources/examples/ef70.png +0 -0
resources/examples/ef80.mp4 +0 -0
resources/examples/ef80.png +0 -0
resources/examples/ef90.mp4 +0 -0
resources/examples/ef90.png +0 -0
resources/ivae/config.json +31 -0
resources/ivae/diffusion_pytorch_model.safetensors +3 -0
resources/lidm/config.json +52 -0
resources/lidm/diffusion_pytorch_model.safetensors +3 -0
resources/lvdm/config.json +44 -0
resources/lvdm/diffusion_pytorch_model.safetensors +3 -0

.vscode/launch.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Python Debugger: Current File",
+            "type": "debugpy",
+            "request": "launch",
+            "program": "${file}",
+            "console": "integratedTerminal",
+            "justMyCode": false
+        }
+    ]
+}

README.md CHANGED Viewed

@@ -1,10 +1,10 @@
 ---
 title: EchoNet Synthetic
-emoji: 🏃
-colorFrom: purple
-colorTo: pink
 sdk: gradio
-sdk_version: 4.31.4
 app_file: app.py
 pinned: false
 ---

 ---
 title: EchoNet Synthetic
+emoji: 🦀
+colorFrom: blue
+colorTo: gray
 sdk: gradio
+sdk_version: 4.19.2
 app_file: app.py
 pinned: false
 ---

app.py ADDED Viewed

	@@ -0,0 +1,311 @@

+import gradio as gr
+import os
+import json
+import torch
+import torch.nn as nn
+import diffusers
+from einops import rearrange
+from PIL import Image
+from omegaconf import OmegaConf
+from tqdm import tqdm
+import cv2
+NUM_STEPS = 64
+FRAMES = 192
+FPS=32
+mycss = """
+.contain {
+    width: 1000px;
+    margin: 0 auto;
+}
+.svelte-1pijsyv {
+    width: 448px;
+}
+.arrow {
+  display: flex;
+  align-items: center;
+  margin: 7px 0;
+}
+.arrow-tail {
+  width: 270px;
+  height: 50px;
+  background-color: black;
+  transition: background-color 0.3s;
+}
+.arrow-head {
+  width: 0;
+  height: 0;
+  border-top: 70px solid transparent;
+  border-bottom: 70px solid transparent;
+  border-left: 120px solid black;
+  transition: border-left-color 0.3s;
+}
+@media (prefers-color-scheme: dark) {
+  .arrow-tail {
+    background-color: white;
+  }
+  .arrow-head {
+    border-left-color: white;
+  }
+}
+"""
+myhtml = """
+<div class="arrow">
+  <div class="arrow-tail"></div>
+  <div class="arrow-head"></div>
+</div>
+"""
+myjs = """
+function setLoopTrue() {
+    let videos = document.getElementsByTagName('video');
+    if (videos.length > 0) {
+        document.getElementsByTagName('video')[0].loop = true;
+    }
+    setTimeout(setLoopTrue, 3000);
+}
+"""
+def load_model(path):
+    # find config.json
+    json_path = os.path.join(path, "config.json")
+    assert os.path.exists(json_path), f"Could not find config.json at {json_path}"
+    with open(json_path, "r") as f:
+        config = json.load(f)
+    # instantiate class
+    klass_name = config["_class_name"]
+    klass = getattr(diffusers, klass_name, None)
+    if klass is None:
+        klass = globals().get(klass_name, None)
+    assert klass is not None, f"Could not find class {klass_name} in diffusers or global scope."
+    assert getattr(klass, "from_pretrained", None) is not None, f"Class {klass_name} does not support 'from_pretrained'."
+    # load checkpoint
+    model = klass.from_pretrained(path)
+    return model, config
+def load_scheduler(config):
+    scheduler_kwargs = OmegaConf.to_container(config.noise_scheduler)
+    scheduler_klass_name = scheduler_kwargs.pop("_class_name")
+    scheduler_klass = getattr(diffusers, scheduler_klass_name, None)
+    scheduler = scheduler_klass(**scheduler_kwargs)
+    return scheduler
+def padf(tensor, mult=3):
+    pad = 2**mult - (tensor.shape[-1] % 2**mult)
+    pad = pad//2
+    tensor = nn.functional.pad(tensor, (pad, pad, pad, pad, 0, 0), mode='replicate')
+    return tensor, pad
+def unpadf(tensor, pad=1):
+    return tensor[..., pad:-pad, pad:-pad]
+def pad_reshape(tensor, mult=3):
+    tensor, pad = padf(tensor, mult=mult)
+    tensor = rearrange(tensor, "b c t h w -> b t c h w")
+    return tensor, pad
+def unpad_reshape(tensor, pad=1):
+    tensor = rearrange(tensor, "b t c h w -> b c t h w")
+    tensor = unpadf(tensor, pad=pad)
+    return tensor
+class Context:
+    def __init__(self, lidm_path, lvdm_path, vae_path, config_path):
+        self.lidm, self.lidm_config = load_model(lidm_path)
+        self.lvdm, self.lvdm_config = load_model(lvdm_path)
+        self.vae, self.vae_config = load_model(vae_path)
+        self.config = OmegaConf.load(config_path)
+        self.models = [self.lidm, self.lvdm, self.vae]
+        self.scheduler = load_scheduler(self.config)
+        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+        self.dtype = torch.float32
+        for model in self.models:
+            model.to(self.device, dtype=self.dtype)
+            model.eval()
+        print("Models loaded")
+    def get_img(self, steps):
+        print("generating image")
+        self.scheduler.set_timesteps(steps)
+        with torch.no_grad():
+            B, C, H, W = 1, self.lidm_config["in_channels"], self.lidm_config["sample_size"], self.lidm_config["sample_size"]
+            timesteps = self.scheduler.timesteps
+            forward_kwargs = {}
+            latents = torch.randn((B, C, H, W), device=self.device, dtype=self.dtype)
+            with torch.autocast("cuda"):
+                for t in tqdm(timesteps):
+                    forward_kwargs["timestep"] =  t
+                    latent_model_input = latents
+                    latent_model_input = self.scheduler.scale_model_input(latent_model_input, timestep=t)
+                    latent_model_input, padding = padf(latent_model_input, mult=3)
+                    noise_pred = self.lidm(latent_model_input, **forward_kwargs).sample
+                    noise_pred = unpadf(noise_pred, pad=padding)
+                    latents = self.scheduler.step(noise_pred, t, latents).prev_sample
+            # latent shape[B,C,H,W]
+            latents = latents / self.vae.config.scaling_factor
+            img = self.vae.decode(latents).sample
+            img = (img + 1) * 128 # [-1, 1] -> [0, 256]
+            img = img.mean(1).unsqueeze(1).repeat([1, 3, 1, 1])
+            img = img.clamp(0, 255).to(torch.uint8).cpu().numpy()
+            img = img[0].transpose(1, 2, 0)
+            img = Image.fromarray(img)
+        return img, latents
+    def get_vid(self, lvef: int, ref_latent: torch.Tensor, steps: int):
+        print("generating video")
+        self.scheduler.set_timesteps(steps)
+        with torch.no_grad():
+            B, C, T, H, W = 1, 4, self.lvdm_config["num_frames"], self.lvdm_config["sample_size"], self.lvdm_config["sample_size"]
+            if FRAMES > T:
+                OT = T//2 # overlap 64//2
+                TR = (FRAMES - T) / 32 # total frames (192 - 64) / 32 = 4
+                TR = int(TR + 1) # total repetitions
+                NT = (T-OT) * TR + OT
+            else:
+                OT = 0
+                TR = 1
+                NT = T
+            timesteps = self.scheduler.timesteps
+            lvef = lvef / 100
+            lvef = torch.tensor([lvef]*TR, device=self.device, dtype=self.dtype)
+            lvef = lvef[:, None, None]
+            print(lvef.shape)
+            forward_kwargs = {}
+            forward_kwargs["added_time_ids"] = torch.zeros((B*TR, self.config.unet.addition_time_embed_dim), device=self.device, dtype=self.dtype)
+            forward_kwargs["encoder_hidden_states"] = lvef
+            print(forward_kwargs["added_time_ids"].shape)
+            latent_cond_images = ref_latent * self.vae.config.scaling_factor
+            latent_cond_images = latent_cond_images[:,:,None,:,:].repeat([1, 1, NT, 1, 1]).to(self.device, dtype=self.dtype)
+            print(latent_cond_images.shape)
+            latents = torch.randn((B, C, NT, H, W), device=self.device, dtype=self.dtype)
+            print(latents.shape)
+            with torch.autocast("cuda"):
+                for t in tqdm(timesteps):
+                    forward_kwargs["timestep"] = t
+                    latent_model_input = latents
+                    latent_model_input = self.scheduler.scale_model_input(latent_model_input, timestep=t)
+                    latent_model_input = torch.cat((latent_model_input, latent_cond_images), dim=1) # B x 2C x T x H x W
+                    latent_model_input, padding = pad_reshape(latent_model_input, mult=3) # B x T x 2C x H+P x W+P
+                    inputs = torch.cat([latent_model_input[:,r*(T-OT):r*(T-OT)+T] for r in range(TR)], dim=0) # B*TR x T x 2C x H+P x W+P
+                    noise_pred = self.lvdm(inputs, **forward_kwargs).sample
+                    outputs = torch.chunk(noise_pred, TR, dim=0) # TR x B x T x C x H x W
+                    noise_predictions = []
+                    for r in range(TR):
+                        noise_predictions.append(outputs[r] if r == 0 else outputs[r][:,OT:])
+                    noise_pred = torch.cat(noise_predictions, dim=1) # B x NT x C x H x W
+                    noise_pred = unpad_reshape(noise_pred, pad=padding)
+                    latents = self.scheduler.step(noise_pred, t, latents).prev_sample
+            print("done generating noise")
+            # latent shape[B,C,T,H,W]
+            latents = latents / self.vae.config.scaling_factor
+            latents = rearrange(latents, "b c t h w -> (b t) c h w")
+            chunk_size = 16
+            chunked_latents = torch.split(latents, chunk_size, dim=0)
+            decoded_chunks = []
+            for chunk in chunked_latents:
+                decoded_chunks.append(self.vae.decode(chunk.float().cuda()).sample.cpu())
+            video = torch.cat(decoded_chunks, dim=0) # (B*T) x H x W x C
+            video = rearrange(video, "(b t) c h w -> b t h w c", b=B)[0] # T H W C
+            video = (video + 1) * 128 # [-1, 1] -> [0, 256]
+            video = video.mean(-1).unsqueeze(-1).repeat([1, 1, 1, 3]) # T H W 3
+            video = video.clamp(0, 255).to(torch.uint8).cpu().numpy()
+            out = cv2.VideoWriter('output.mp4', cv2.VideoWriter_fourcc(*'mp4v'), FPS, (112, 112))
+            for img in video:
+                out.write(img)
+            out.release()
+        return "output.mp4"
+ctx = Context(
+    lidm_path="resources/lidm",
+    lvdm_path="resources/lvdm",
+    vae_path="resources/ivae",
+    config_path="resources/config.yaml"
+)
+with gr.Blocks(css=mycss, js=myjs) as demo:
+    with gr.Row():
+        # Greet user with an explanation of the demo
+        gr.Markdown("""
+        # EchoNet-Synthetic: Privacy-preserving Video Generation for Safe Medical Data Sharing
+        This demo is attached to a paper under review at MICCAI 2024, and is targeted at the reviewers of that paper.
+        1. Start by generating an image using the "Generate Image" button. This will generate a random image, similar to the EchoNet-Dynamic dataset.
+        2. Adjust the "Ejection Fraction Score" slider to change the ejection fraction of the generated image.
+        3. Generate a video using the "Generate Video" button. This will generate a video from the generated image, with the ejection fraction score you chose.
+        We leave the ejection fraction input completely open, so you can see how the video generation changes with different ejection fraction scores, even unrealistic ones. The normal ejection fraction range is 50-75.<br>
+        We recommend 64 steps for ideal image quality, but you can adjust this to see how it affects the video generation.
+        """)
+    with gr.Row():
+        # core activity
+        # 3 columns
+        with gr.Column():
+            # Image generation goes here
+            img = gr.Image(interactive=False, label="Generated Image") # allow user upload
+            img_btn = gr.Button("Generate Image")
+        with gr.Column():
+            # LVEF slider goes here
+            # Add an big arrow image for show
+            gr.HTML(myhtml)
+            efslider = gr.Slider(minimum=0, maximum=100, value=65, step=1, label="Ejection Fraction Score (%)") #
+            dsslider = gr.Slider(minimum=1, maximum=999, value=64, step=1, label="Sampling Steps") #
+            pass
+        with gr.Column():
+            # Video generation goes here
+            vid = gr.Video(interactive=False, autoplay=True, label="Generated Video")
+            vid_btn = gr.Button("Generate Video")
+    with gr.Row():
+        # Additional informations
+        gr.Examples(
+            examples=[[f"resources/examples/ef{i}.png", f"resources/examples/ef{i}.mp4", i, 64] for i in [20, 30, 40, 50, 60, 70, 80, 90]],
+            inputs=[img, vid, efslider, dsslider],
+            outputs=None,
+            fn=None,
+            cache_examples=False,
+        )
+    ltt_img = gr.State() # latent image state
+    img.change() # apply center-cropping
+    img_btn.click(fn=ctx.get_img, inputs=[dsslider], outputs=[img, ltt_img]) # generate image with lidm
+    vid_btn.click(fn=ctx.get_vid, inputs=[efslider, ltt_img, dsslider], outputs=[vid]) # generate video with lvdm
+demo.launch(share=False)

output.mp4 ADDED Viewed

Binary file (143 kB). View file

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+torch==2.1.1
+torchvision==0.16.1
+diffusers==0.24.0
+einops==0.7.0
+accelerate==0.25.0
+opencv-python==4.8.1.78
+pillow==10.1.0
+omegaconf==2.3.0
+tqdm==4.66.1
+gradio==4.19.2

resources/config.yaml ADDED Viewed

	@@ -0,0 +1,51 @@

+globals:
+    target_fps: 32
+    target_nframes: 64
+unet:
+    _class_name: UNetSpatioTemporalConditionModel
+    addition_time_embed_dim: 1
+    block_out_channels:
+        - 128
+        - 256
+        - 256
+        - 512
+    cross_attention_dim: 1
+    down_block_types:
+        - CrossAttnDownBlockSpatioTemporal
+        - CrossAttnDownBlockSpatioTemporal
+        - CrossAttnDownBlockSpatioTemporal
+        - DownBlockSpatioTemporal
+    in_channels: 8
+    layers_per_block: 2
+    num_attention_heads:
+        - 8
+        - 16
+        - 16
+        - 32
+    num_frames: ${globals.target_nframes}
+    out_channels: 4
+    projection_class_embeddings_input_dim: 1
+    sample_size: 14
+    transformer_layers_per_block: 1
+    up_block_types:
+        - UpBlockSpatioTemporal
+        - CrossAttnUpBlockSpatioTemporal
+        - CrossAttnUpBlockSpatioTemporal
+        - CrossAttnUpBlockSpatioTemporal
+noise_scheduler:
+    _class_name: DDPMScheduler
+    num_train_timesteps: 1000
+    beta_start: 0.0001
+    beta_end: 0.02
+    beta_schedule: linear # linear, scaled_linear, or squaredcos_cap_v2
+    variance_type: fixed_small # fixed_small, fixed_small_log, fixed_large, fixed_large_log, learned or learned_range
+    clip_sample: true
+    clip_sample_range: 4.0 # default 1
+    prediction_type: v_prediction # epsilon, sample, v_prediction
+    thresholding: false # do not touch
+    dynamic_thresholding_ratio: 0.995 # unused
+    sample_max_value: 1.0 # unused
+    timestep_spacing: "leading" #
+    steps_offset: 0 # unused

resources/examples/ef20.mp4 ADDED Viewed

Binary file (90.5 kB). View file

resources/examples/ef20.png ADDED Viewed

resources/examples/ef30.mp4 ADDED Viewed

Binary file (118 kB). View file

resources/examples/ef30.png ADDED Viewed

resources/examples/ef40.mp4 ADDED Viewed

Binary file (125 kB). View file

resources/examples/ef40.png ADDED Viewed

resources/examples/ef50.mp4 ADDED Viewed

Binary file (124 kB). View file

resources/examples/ef50.png ADDED Viewed

resources/examples/ef60.mp4 ADDED Viewed

Binary file (129 kB). View file

resources/examples/ef60.png ADDED Viewed

resources/examples/ef70.mp4 ADDED Viewed

Binary file (95.7 kB). View file

resources/examples/ef70.png ADDED Viewed

resources/examples/ef80.mp4 ADDED Viewed

Binary file (137 kB). View file

resources/examples/ef80.png ADDED Viewed

resources/examples/ef90.mp4 ADDED Viewed

Binary file (152 kB). View file

resources/examples/ef90.png ADDED Viewed

resources/ivae/config.json ADDED Viewed

	@@ -0,0 +1,31 @@

+{
+  "_class_name": "AutoencoderKL",
+  "_diffusers_version": "0.23.1",
+  "act_fn": "silu",
+  "block_out_channels": [
+    128,
+    256,
+    256,
+    512
+  ],
+  "down_block_types": [
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D",
+    "DownEncoderBlock2D"
+  ],
+  "force_upcast": true,
+  "in_channels": 3,
+  "latent_channels": 4,
+  "layers_per_block": 2,
+  "norm_num_groups": 32,
+  "out_channels": 3,
+  "sample_size": 512,
+  "scaling_factor": 0.18215,
+  "up_block_types": [
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D",
+    "UpDecoderBlock2D"
+  ]
+}

resources/ivae/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ba0be1555511d1e145bfda156062aab744c6f7fc12e930c78c3640baf8183d5b
+size 249675844

resources/lidm/config.json ADDED Viewed

	@@ -0,0 +1,52 @@

+{
+  "_class_name": "UNet2DModel",
+  "_diffusers_version": "0.24.0",
+  "act_fn": "silu",
+  "add_attention": true,
+  "attention_head_dim": 8,
+  "attn_norm_num_groups": null,
+  "block_out_channels": [
+    128,
+    256,
+    256,
+    512
+  ],
+  "center_input_sample": false,
+  "class_embed_type": null,
+  "decay": 0.9999,
+  "down_block_types": [
+    "AttnDownBlock2D",
+    "AttnDownBlock2D",
+    "AttnDownBlock2D",
+    "DownBlock2D"
+  ],
+  "downsample_padding": 1,
+  "downsample_type": "resnet",
+  "dropout": 0.0,
+  "flip_sin_to_cos": true,
+  "freq_shift": 0,
+  "in_channels": 4,
+  "inv_gamma": 1.0,
+  "layers_per_block": 2,
+  "mid_block_scale_factor": 1,
+  "min_decay": 0.0,
+  "norm_eps": 1e-05,
+  "norm_num_groups": 32,
+  "num_class_embeds": null,
+  "num_train_timesteps": null,
+  "optimization_step": 250000,
+  "out_channels": 4,
+  "power": 0.6666666666666666,
+  "resnet_time_scale_shift": "default",
+  "sample_size": 14,
+  "time_embedding_type": "positional",
+  "up_block_types": [
+    "UpBlock2D",
+    "AttnUpBlock2D",
+    "AttnUpBlock2D",
+    "AttnUpBlock2D"
+  ],
+  "update_after_step": 0,
+  "upsample_type": "resnet",
+  "use_ema_warmup": false
+}

resources/lidm/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b3d93d2de24f4f081919dd946a03cf7a59befcc4f711e4a983f2c5b15be45920
+size 294245640

resources/lvdm/config.json ADDED Viewed

	@@ -0,0 +1,44 @@

+{
+  "_class_name": "UNetSpatioTemporalConditionModel",
+  "_diffusers_version": "0.24.0",
+  "addition_time_embed_dim": 1,
+  "block_out_channels": [
+    128,
+    256,
+    256,
+    512
+  ],
+  "cross_attention_dim": 1,
+  "decay": 0.9999,
+  "down_block_types": [
+    "CrossAttnDownBlockSpatioTemporal",
+    "CrossAttnDownBlockSpatioTemporal",
+    "CrossAttnDownBlockSpatioTemporal",
+    "DownBlockSpatioTemporal"
+  ],
+  "in_channels": 8,
+  "inv_gamma": 1.0,
+  "layers_per_block": 2,
+  "min_decay": 0.0,
+  "num_attention_heads": [
+    8,
+    16,
+    16,
+    32
+  ],
+  "num_frames": 64,
+  "optimization_step": 80000,
+  "out_channels": 4,
+  "power": 0.6666666666666666,
+  "projection_class_embeddings_input_dim": 1,
+  "sample_size": 14,
+  "transformer_layers_per_block": 1,
+  "up_block_types": [
+    "UpBlockSpatioTemporal",
+    "CrossAttnUpBlockSpatioTemporal",
+    "CrossAttnUpBlockSpatioTemporal",
+    "CrossAttnUpBlockSpatioTemporal"
+  ],
+  "update_after_step": 0,
+  "use_ema_warmup": false
+}

resources/lvdm/diffusion_pytorch_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:452750040eb2183d04b5547ae9493e555246cc70c11de7d9d4897377811c520e
+size 575506960