Spaces:

fbnnb
/

TC_1024

Runtime error

App Files Files Community

fbnnb commited on Dec 20, 2024

Commit

73b78df

verified ·

1 Parent(s): 4bc9607

Update gradio_app.py

Browse files

Files changed (1) hide show

gradio_app.py +346 -348

gradio_app.py CHANGED Viewed

@@ -1,349 +1,347 @@
-import os, argparse
-import sys
-import gradio as gr
-# from scripts.gradio.i2v_test_application import Image2Video
-sys.path.insert(1, os.path.join(sys.path[0], 'lvdm'))
-import spaces
-import os
-import time
-from omegaconf import OmegaConf
-import torch
-from scripts.evaluation.funcs import load_model_checkpoint, save_videos, batch_ddim_sampling, get_latent_z
-from utils.utils import instantiate_from_config
-from huggingface_hub import hf_hub_download
-from einops import repeat
-import torchvision.transforms as transforms
-from pytorch_lightning import seed_everything
-from einops import rearrange
-from cldm.model import load_state_dict
-import cv2
-import torch
-print("cuda available:", torch.cuda.is_available())
-from huggingface_hub import snapshot_download
-import os
-def download_model():
-    REPO_ID = 'fbnnb/TC_sketch'
-    filename_list = ['tc_sketch.pt']
-    tar_dir = './checkpoints/tooncrafter_1024_interp_sketch/'
-    if not os.path.exists(tar_dir):
-        os.makedirs(tar_dir)
-    for filename in filename_list:
-        local_file = os.path.join(tar_dir, filename)
-        if not os.path.exists(local_file):
-            hf_hub_download(repo_id=REPO_ID, filename=filename, local_dir=tar_dir, local_dir_use_symlinks=False)
-    print("downloaded")
-def get_latent_z_with_hidden_states(model, videos):
-    b, c, t, h, w = videos.shape
-    x = rearrange(videos, 'b c t h w -> (b t) c h w')
-    encoder_posterior, hidden_states = model.first_stage_model.encode(x, return_hidden_states=True)
-    hidden_states_first_last = []
-    ### use only the first and last hidden states
-    for hid in hidden_states:
-        hid = rearrange(hid, '(b t) c h w -> b c t h w', t=t)
-        hid_new = torch.cat([hid[:, :, 0:1], hid[:, :, -1:]], dim=2)
-        hidden_states_first_last.append(hid_new)
-    z = model.get_first_stage_encoding(encoder_posterior).detach()
-    z = rearrange(z, '(b t) c h w -> b c t h w', b=b, t=t)
-    return z, hidden_states_first_last
-def extract_frames(video_path):
-    # 動画ファイルを読み込む
-    cap = cv2.VideoCapture(video_path)
-    frame_list = []
-    frame_num = 0
-    while True:
-        # フレームを読み込む
-        ret, frame = cap.read()
-        if not ret:
-            break
-        # フレームをリストに追加
-        frame_list.append(frame)
-        frame_num += 1
-    print("load video length:", len(frame_list))
-    # 動画ファイルを閉じる
-    cap.release()
-    return frame_list
-resolution = '576_1024'
-resolution = (576, 1024)
-download_model()
-print("after download model")
-result_dir = "./results/"
-if not os.path.exists(result_dir):
-    os.mkdir(result_dir)
-#ToonCrafterModel
-ckpt_path='checkpoints/tooncrafter_1024_interp_sketch/tc_sketch.pt'
-config_file='configs/inference_1024_v1.0.yaml'
-config = OmegaConf.load(config_file)
-model_config = config.pop("model", OmegaConf.create())
-model_config['params']['unet_config']['params']['use_checkpoint']=False
-model = instantiate_from_config(model_config)
-assert os.path.exists(ckpt_path), "Error: checkpoint Not Found!"
-# ckpt_path = "/group/40005/gzhiwang/tc_sketch.pt"
-ckpt_path = "/group/40034/gzhiwang/tc_sketch.pt"
-model = load_model_checkpoint(model, ckpt_path)
-model.eval()
-# cn_model.load_state_dict(load_state_dict(cn_ckpt_path, location='cpu'))
-# cn_model.eval()
-# model.control_model = cn_model
-# model_list.append(model)
-save_fps = 8
-print("resolution:", resolution)
-print("init done.")
-def transpose_if_needed(tensor):
-    h = tensor.shape[-2]
-    w = tensor.shape[-1]
-    if h > w:
-        tensor = tensor.permute(0, 2, 1)
-    return tensor
-def untranspose(tensor):
-    ndim = tensor.ndim
-    return tensor.transpose(ndim-1, ndim-2)
-@spaces.GPU(duration=200)
-def get_image(image, sketch, prompt, steps=50, cfg_scale=7.5, eta=1.0, fs=3, seed=123, control_scale=0.6):
-    print("enter fn")
-    # control_frames = extract_frames(frame_guides)
-    print("extract frames")
-    seed_everything(seed)
-    transform = transforms.Compose([
-        transforms.Resize(min(resolution)),
-        transforms.CenterCrop(resolution),
-        ])
-    print("before empty cache")
-    torch.cuda.empty_cache()
-    print('start:', prompt, time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
-    start = time.time()
-    gpu_id=0
-    if steps > 60:
-        steps = 60
-    global model
-    # model = model_list[gpu_id]
-    model = model.cuda()
-    batch_size=1
-    channels = model.model.diffusion_model.out_channels
-    frames = model.temporal_length
-    h, w = resolution[0] // 8, resolution[1] // 8
-    noise_shape = [batch_size, channels, frames, h, w]
-    # text cond
-    transposed = False
-    with torch.no_grad(), torch.cuda.amp.autocast():
-        text_emb = model.get_learned_conditioning([prompt])
-        print("before control")
-        #control cond
-        # if frame_guides is not None:
-        #     cn_videos = []
-        #     for frame in control_frames:
-        #         frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
-        #         frame = cv2.bitwise_not(frame)
-        #         cn_tensor = torch.from_numpy(frame).unsqueeze(2).permute(2, 0, 1).float().to(model.device)
-        #         #cn_tensor = (cn_tensor / 255. - 0.5) * 2
-        #         cn_tensor = ( cn_tensor/255.0 )
-        #         cn_tensor = transpose_if_needed(cn_tensor)
-        #         cn_tensor_resized = transform(cn_tensor) #3,h,w
-        #         cn_video = cn_tensor_resized.unsqueeze(0).unsqueeze(2) # bc1hw
-        #         cn_videos.append(cn_video)
-        #     cn_videos = torch.cat(cn_videos, dim=2)
-        #     if cn_videos.shape[2] > frames:
-        #         idxs = []
-        #         for i in range(frames):
-        #             index = int((i + 0.5) * cn_videos.shape[2] / frames)
-        #             idxs.append(min(index, cn_videos.shape[2] - 1))
-        #         cn_videos = cn_videos[:, :, idxs, :, :]
-        #         print("cn_videos.shape after slicing", cn_videos.shape)
-        #     model_list = []
-        #     for model in model_list:
-        #         model.control_scale = control_scale
-        #         model_list.append(model)
-        # else:
-        cn_videos = None
-        print("image cond")
-        # img cond
-        img_tensor = torch.from_numpy(image).permute(2, 0, 1).float().to(model.device)
-        input_h, input_w = img_tensor.shape[1:]
-        img_tensor = (img_tensor / 255. - 0.5) * 2
-        img_tensor = transpose_if_needed(img_tensor)
-        image_tensor_resized = transform(img_tensor) #3,h,w
-        videos = image_tensor_resized.unsqueeze(0).unsqueeze(2) # bc1hw
-        print("get latent z")
-        # z = get_latent_z(model, videos) #bc,1,hw
-        videos = repeat(videos, 'b c t h w -> b c (repeat t) h w', repeat=frames//2)
-        if sketch is not None:
-            img_tensor2 = torch.from_numpy(sketch).permute(2, 0, 1).float().to(model.device)
-            img_tensor2 = (img_tensor2 / 255. - 0.5) * 2
-            img_tensor2 = transpose_if_needed(img_tensor2)
-            image_tensor_resized2 = transform(img_tensor2) #3,h,w
-            videos2 = image_tensor_resized2.unsqueeze(0).unsqueeze(2) # bchw
-            videos2 = repeat(videos2, 'b c t h w -> b c (repeat t) h w', repeat=frames//2)
-            videos = torch.cat([videos, videos2], dim=2)
-        else:
-            videos = torch.cat([videos, videos], dim=2)
-        z, hs = get_latent_z_with_hidden_states(model, videos)
-        img_tensor_repeat = torch.zeros_like(z)
-        img_tensor_repeat[:,:,:1,:,:] = z[:,:,:1,:,:]
-        img_tensor_repeat[:,:,-1:,:,:] = z[:,:,-1:,:,:]
-        print("image embedder")
-        cond_images = model.embedder(img_tensor.unsqueeze(0)) ## blc
-        img_emb = model.image_proj_model(cond_images)
-        imtext_cond = torch.cat([text_emb, img_emb], dim=1)
-        fs = torch.tensor([fs], dtype=torch.long, device=model.device)
-        # print("cn videos:",cn_videos.shape, "img emb:", img_emb.shape)
-        cond = {"c_crossattn": [imtext_cond], "fs": fs, "c_concat": [img_tensor_repeat], "control_cond": cn_videos}
-        print("before sample loop")
-        ## inference
-        batch_samples = batch_ddim_sampling(model, cond, noise_shape, n_samples=1, ddim_steps=steps, ddim_eta=eta, cfg_scale=cfg_scale, hs=hs)
-        ## remove the last frame
-        if image2 is None:
-            batch_samples = batch_samples[:,:,:,:-1,...]
-        ## b,samples,c,t,h,w
-        prompt_str = prompt.replace("/", "_slash_") if "/" in prompt else prompt
-        prompt_str = prompt_str.replace(" ", "_") if " " in prompt else prompt_str
-        prompt_str=prompt_str[:40]
-        if len(prompt_str) == 0:
-            prompt_str = 'empty_prompt'
-    global result_dir
-    global save_fps
-    if input_h > input_w:
-        batch_samples = untranspose(batch_samples)
-    save_videos(batch_samples, result_dir, filenames=[prompt_str], fps=save_fps)
-    print(f"Saved in {prompt_str}. Time used: {(time.time() - start):.2f} seconds")
-    model = model.cpu()
-    saved_result_dir = os.path.join(result_dir, f"{prompt_str}.mp4")
-    print("result saved to:", saved_result_dir)
-    return saved_result_dir
-    # @spaces.GPU
-i2v_examples_interp_1024 = [
-    ['prompts/1024_interp/frame_000000.jpg', 'prompts/1024_interp/frame_000041.jpg', 'a cat is eating', 50, 7.5, 1.0, 10, 123]
-]
-def dynamicrafter_demo(result_dir='./tmp/', res=1024):
-    if res == 1024:
-        resolution = '576_1024'
-        css = """#input_img {max-width: 1024px !important} #output_vid {max-width: 1024px; max-height:576px}"""
-    elif res == 512:
-        resolution = '320_512'
-        css = """#input_img {max-width: 512px !important} #output_vid {max-width: 512px; max-height: 320px} #input_img2 {max-width: 512px !important} #output_vid {max-width: 512px; max-height: 320px}"""
-    elif res == 256:
-        resolution = '256_256'
-        css = """#input_img {max-width: 256px !important} #output_vid {max-width: 256px; max-height: 256px}"""
-    else:
-        raise NotImplementedError(f"Unsupported resolution: {res}")
-    # image2video = Image2Video(result_dir, resolution=resolution)
-    with gr.Blocks(analytics_enabled=False, css=css) as dynamicrafter_iface:
-        with gr.Tab(label='ToonCrafter_320x512'):
-            with gr.Column():
-                with gr.Row():
-                    with gr.Column():
-                        with gr.Row():
-                            i2v_input_image = gr.Image(label="Input Image1",elem_id="input_img")
-                            # frame_guides = gr.Video(label="Input Guidance",elem_id="input_guidance", autoplay=True,show_share_button=True)
-                        with gr.Row():
-                            i2v_input_text = gr.Text(label='Prompts')
-                        with gr.Row():
-                            i2v_seed = gr.Slider(label='Random Seed', minimum=0, maximum=50000, step=1, value=123)
-                            i2v_eta = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, label='ETA', value=1.0, elem_id="i2v_eta")
-                            i2v_cfg_scale = gr.Slider(minimum=1.0, maximum=15.0, step=0.5, label='CFG Scale', value=7.5, elem_id="i2v_cfg_scale")
-                        with gr.Row():
-                            i2v_steps = gr.Slider(minimum=1, maximum=60, step=1, elem_id="i2v_steps", label="Sampling steps", value=50)
-                            i2v_motion = gr.Slider(minimum=5, maximum=30, step=1, elem_id="i2v_motion", label="FPS", value=10)
-                            control_scale = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, elem_id="i2v_ctrl_scale", label="control_scale", value=0.6)
-                        i2v_end_btn = gr.Button("Generate")
-                    with gr.Column():
-                        with gr.Row():
-                            i2v_input_sketch = gr.Image(label="Input End SKetch",elem_id="input_img2")
-                        with gr.Row():
-                            i2v_output_video = gr.Video(label="Generated Video",elem_id="output_vid",autoplay=True,show_share_button=True)
-                gr.Examples(examples=i2v_examples_interp_1024,
-                            inputs=[i2v_input_image, i2v_input_sketch, i2v_input_text, i2v_steps, i2v_cfg_scale, i2v_eta, i2v_motion, i2v_seed, control_scale],
-                            outputs=[i2v_output_video],
-                            fn = get_image,
-                            cache_examples=False,
-                )
-            i2v_end_btn.click(inputs=[i2v_input_image, i2v_input_sketch, i2v_input_text, i2v_steps, i2v_cfg_scale, i2v_eta, i2v_motion, i2v_seed, control_scale],
-                            outputs=[i2v_output_video],
-                            fn = get_image
-            )
-    return dynamicrafter_iface
-def get_parser():
-    parser = argparse.ArgumentParser()
-    return parser
-if __name__ == "__main__":
-    parser = get_parser()
-    args = parser.parse_args()
-    result_dir = os.path.join('./', 'results')
-    dynamicrafter_iface = dynamicrafter_demo(result_dir)
-    dynamicrafter_iface.queue(max_size=12)
-    print("launching...")
-    # dynamicrafter_iface.launch(max_threads=1, share=True)
-    dynamicrafter_iface.launch(server_name='0.0.0.0', server_port=12345)
-    # dynamicrafter_iface.launch()
     # print("launched...")

+import os, argparse
+import sys
+import gradio as gr
+# from scripts.gradio.i2v_test_application import Image2Video
+sys.path.insert(1, os.path.join(sys.path[0], 'lvdm'))
+import spaces
+import os
+import time
+from omegaconf import OmegaConf
+import torch
+from scripts.evaluation.funcs import load_model_checkpoint, save_videos, batch_ddim_sampling, get_latent_z
+from utils.utils import instantiate_from_config
+from huggingface_hub import hf_hub_download
+from einops import repeat
+import torchvision.transforms as transforms
+from pytorch_lightning import seed_everything
+from einops import rearrange
+from cldm.model import load_state_dict
+import cv2
+import torch
+print("cuda available:", torch.cuda.is_available())
+from huggingface_hub import snapshot_download
+import os
+def download_model():
+    REPO_ID = 'fbnnb/TC_sketch'
+    filename_list = ['tc_sketch.pt']
+    tar_dir = './checkpoints/tooncrafter_1024_interp_sketch/'
+    if not os.path.exists(tar_dir):
+        os.makedirs(tar_dir)
+    for filename in filename_list:
+        local_file = os.path.join(tar_dir, filename)
+        if not os.path.exists(local_file):
+            hf_hub_download(repo_id=REPO_ID, filename=filename, local_dir=tar_dir, local_dir_use_symlinks=False)
+    print("downloaded")
+def get_latent_z_with_hidden_states(model, videos):
+    b, c, t, h, w = videos.shape
+    x = rearrange(videos, 'b c t h w -> (b t) c h w')
+    encoder_posterior, hidden_states = model.first_stage_model.encode(x, return_hidden_states=True)
+    hidden_states_first_last = []
+    ### use only the first and last hidden states
+    for hid in hidden_states:
+        hid = rearrange(hid, '(b t) c h w -> b c t h w', t=t)
+        hid_new = torch.cat([hid[:, :, 0:1], hid[:, :, -1:]], dim=2)
+        hidden_states_first_last.append(hid_new)
+    z = model.get_first_stage_encoding(encoder_posterior).detach()
+    z = rearrange(z, '(b t) c h w -> b c t h w', b=b, t=t)
+    return z, hidden_states_first_last
+def extract_frames(video_path):
+    # 動画ファイルを読み込む
+    cap = cv2.VideoCapture(video_path)
+    frame_list = []
+    frame_num = 0
+    while True:
+        # フレームを読み込む
+        ret, frame = cap.read()
+        if not ret:
+            break
+        # フレームをリストに追加
+        frame_list.append(frame)
+        frame_num += 1
+    print("load video length:", len(frame_list))
+    # 動画ファイルを閉じる
+    cap.release()
+    return frame_list
+resolution = '576_1024'
+resolution = (576, 1024)
+download_model()
+print("after download model")
+result_dir = "./results/"
+if not os.path.exists(result_dir):
+    os.mkdir(result_dir)
+#ToonCrafterModel
+ckpt_path='checkpoints/tooncrafter_1024_interp_sketch/tc_sketch.pt'
+config_file='configs/inference_1024_v1.0.yaml'
+config = OmegaConf.load(config_file)
+model_config = config.pop("model", OmegaConf.create())
+model_config['params']['unet_config']['params']['use_checkpoint']=False
+model = instantiate_from_config(model_config)
+assert os.path.exists(ckpt_path), "Error: checkpoint Not Found!"
+model = load_model_checkpoint(model, ckpt_path)
+model.eval()
+# cn_model.load_state_dict(load_state_dict(cn_ckpt_path, location='cpu'))
+# cn_model.eval()
+# model.control_model = cn_model
+# model_list.append(model)
+save_fps = 8
+print("resolution:", resolution)
+print("init done.")
+def transpose_if_needed(tensor):
+    h = tensor.shape[-2]
+    w = tensor.shape[-1]
+    if h > w:
+        tensor = tensor.permute(0, 2, 1)
+    return tensor
+def untranspose(tensor):
+    ndim = tensor.ndim
+    return tensor.transpose(ndim-1, ndim-2)
+@spaces.GPU(duration=200)
+def get_image(image, sketch, prompt, steps=50, cfg_scale=7.5, eta=1.0, fs=3, seed=123, control_scale=0.6):
+    print("enter fn")
+    # control_frames = extract_frames(frame_guides)
+    print("extract frames")
+    seed_everything(seed)
+    transform = transforms.Compose([
+        transforms.Resize(min(resolution)),
+        transforms.CenterCrop(resolution),
+        ])
+    print("before empty cache")
+    torch.cuda.empty_cache()
+    print('start:', prompt, time.strftime('%Y-%m-%d %H:%M:%S',time.localtime(time.time())))
+    start = time.time()
+    gpu_id=0
+    if steps > 60:
+        steps = 60
+    global model
+    # model = model_list[gpu_id]
+    model = model.cuda()
+    batch_size=1
+    channels = model.model.diffusion_model.out_channels
+    frames = model.temporal_length
+    h, w = resolution[0] // 8, resolution[1] // 8
+    noise_shape = [batch_size, channels, frames, h, w]
+    # text cond
+    transposed = False
+    with torch.no_grad(), torch.cuda.amp.autocast():
+        text_emb = model.get_learned_conditioning([prompt])
+        print("before control")
+        #control cond
+        # if frame_guides is not None:
+        #     cn_videos = []
+        #     for frame in control_frames:
+        #         frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
+        #         frame = cv2.bitwise_not(frame)
+        #         cn_tensor = torch.from_numpy(frame).unsqueeze(2).permute(2, 0, 1).float().to(model.device)
+        #         #cn_tensor = (cn_tensor / 255. - 0.5) * 2
+        #         cn_tensor = ( cn_tensor/255.0 )
+        #         cn_tensor = transpose_if_needed(cn_tensor)
+        #         cn_tensor_resized = transform(cn_tensor) #3,h,w
+        #         cn_video = cn_tensor_resized.unsqueeze(0).unsqueeze(2) # bc1hw
+        #         cn_videos.append(cn_video)
+        #     cn_videos = torch.cat(cn_videos, dim=2)
+        #     if cn_videos.shape[2] > frames:
+        #         idxs = []
+        #         for i in range(frames):
+        #             index = int((i + 0.5) * cn_videos.shape[2] / frames)
+        #             idxs.append(min(index, cn_videos.shape[2] - 1))
+        #         cn_videos = cn_videos[:, :, idxs, :, :]
+        #         print("cn_videos.shape after slicing", cn_videos.shape)
+        #     model_list = []
+        #     for model in model_list:
+        #         model.control_scale = control_scale
+        #         model_list.append(model)
+        # else:
+        cn_videos = None
+        print("image cond")
+        # img cond
+        img_tensor = torch.from_numpy(image).permute(2, 0, 1).float().to(model.device)
+        input_h, input_w = img_tensor.shape[1:]
+        img_tensor = (img_tensor / 255. - 0.5) * 2
+        img_tensor = transpose_if_needed(img_tensor)
+        image_tensor_resized = transform(img_tensor) #3,h,w
+        videos = image_tensor_resized.unsqueeze(0).unsqueeze(2) # bc1hw
+        print("get latent z")
+        # z = get_latent_z(model, videos) #bc,1,hw
+        videos = repeat(videos, 'b c t h w -> b c (repeat t) h w', repeat=frames//2)
+        if sketch is not None:
+            img_tensor2 = torch.from_numpy(sketch).permute(2, 0, 1).float().to(model.device)
+            img_tensor2 = (img_tensor2 / 255. - 0.5) * 2
+            img_tensor2 = transpose_if_needed(img_tensor2)
+            image_tensor_resized2 = transform(img_tensor2) #3,h,w
+            videos2 = image_tensor_resized2.unsqueeze(0).unsqueeze(2) # bchw
+            videos2 = repeat(videos2, 'b c t h w -> b c (repeat t) h w', repeat=frames//2)
+            videos = torch.cat([videos, videos2], dim=2)
+        else:
+            videos = torch.cat([videos, videos], dim=2)
+        z, hs = get_latent_z_with_hidden_states(model, videos)
+        img_tensor_repeat = torch.zeros_like(z)
+        img_tensor_repeat[:,:,:1,:,:] = z[:,:,:1,:,:]
+        img_tensor_repeat[:,:,-1:,:,:] = z[:,:,-1:,:,:]
+        print("image embedder")
+        cond_images = model.embedder(img_tensor.unsqueeze(0)) ## blc
+        img_emb = model.image_proj_model(cond_images)
+        imtext_cond = torch.cat([text_emb, img_emb], dim=1)
+        fs = torch.tensor([fs], dtype=torch.long, device=model.device)
+        # print("cn videos:",cn_videos.shape, "img emb:", img_emb.shape)
+        cond = {"c_crossattn": [imtext_cond], "fs": fs, "c_concat": [img_tensor_repeat], "control_cond": cn_videos}
+        print("before sample loop")
+        ## inference
+        batch_samples = batch_ddim_sampling(model, cond, noise_shape, n_samples=1, ddim_steps=steps, ddim_eta=eta, cfg_scale=cfg_scale, hs=hs)
+        ## remove the last frame
+        if image2 is None:
+            batch_samples = batch_samples[:,:,:,:-1,...]
+        ## b,samples,c,t,h,w
+        prompt_str = prompt.replace("/", "_slash_") if "/" in prompt else prompt
+        prompt_str = prompt_str.replace(" ", "_") if " " in prompt else prompt_str
+        prompt_str=prompt_str[:40]
+        if len(prompt_str) == 0:
+            prompt_str = 'empty_prompt'
+    global result_dir
+    global save_fps
+    if input_h > input_w:
+        batch_samples = untranspose(batch_samples)
+    save_videos(batch_samples, result_dir, filenames=[prompt_str], fps=save_fps)
+    print(f"Saved in {prompt_str}. Time used: {(time.time() - start):.2f} seconds")
+    model = model.cpu()
+    saved_result_dir = os.path.join(result_dir, f"{prompt_str}.mp4")
+    print("result saved to:", saved_result_dir)
+    return saved_result_dir
+    # @spaces.GPU
+i2v_examples_interp_1024 = [
+    ['prompts/1024_interp/frame_000000.jpg', 'prompts/1024_interp/frame_000041.jpg', 'a cat is eating', 50, 7.5, 1.0, 10, 123]
+]
+def dynamicrafter_demo(result_dir='./tmp/', res=1024):
+    if res == 1024:
+        resolution = '576_1024'
+        css = """#input_img {max-width: 1024px !important} #output_vid {max-width: 1024px; max-height:576px}"""
+    elif res == 512:
+        resolution = '320_512'
+        css = """#input_img {max-width: 512px !important} #output_vid {max-width: 512px; max-height: 320px} #input_img2 {max-width: 512px !important} #output_vid {max-width: 512px; max-height: 320px}"""
+    elif res == 256:
+        resolution = '256_256'
+        css = """#input_img {max-width: 256px !important} #output_vid {max-width: 256px; max-height: 256px}"""
+    else:
+        raise NotImplementedError(f"Unsupported resolution: {res}")
+    # image2video = Image2Video(result_dir, resolution=resolution)
+    with gr.Blocks(analytics_enabled=False, css=css) as dynamicrafter_iface:
+        with gr.Tab(label='ToonCrafter_320x512'):
+            with gr.Column():
+                with gr.Row():
+                    with gr.Column():
+                        with gr.Row():
+                            i2v_input_image = gr.Image(label="Input Image1",elem_id="input_img")
+                            # frame_guides = gr.Video(label="Input Guidance",elem_id="input_guidance", autoplay=True,show_share_button=True)
+                        with gr.Row():
+                            i2v_input_text = gr.Text(label='Prompts')
+                        with gr.Row():
+                            i2v_seed = gr.Slider(label='Random Seed', minimum=0, maximum=50000, step=1, value=123)
+                            i2v_eta = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, label='ETA', value=1.0, elem_id="i2v_eta")
+                            i2v_cfg_scale = gr.Slider(minimum=1.0, maximum=15.0, step=0.5, label='CFG Scale', value=7.5, elem_id="i2v_cfg_scale")
+                        with gr.Row():
+                            i2v_steps = gr.Slider(minimum=1, maximum=60, step=1, elem_id="i2v_steps", label="Sampling steps", value=50)
+                            i2v_motion = gr.Slider(minimum=5, maximum=30, step=1, elem_id="i2v_motion", label="FPS", value=10)
+                            control_scale = gr.Slider(minimum=0.0, maximum=1.0, step=0.1, elem_id="i2v_ctrl_scale", label="control_scale", value=0.6)
+                        i2v_end_btn = gr.Button("Generate")
+                    with gr.Column():
+                        with gr.Row():
+                            i2v_input_sketch = gr.Image(label="Input End SKetch",elem_id="input_img2")
+                        with gr.Row():
+                            i2v_output_video = gr.Video(label="Generated Video",elem_id="output_vid",autoplay=True,show_share_button=True)
+                gr.Examples(examples=i2v_examples_interp_1024,
+                            inputs=[i2v_input_image, i2v_input_sketch, i2v_input_text, i2v_steps, i2v_cfg_scale, i2v_eta, i2v_motion, i2v_seed, control_scale],
+                            outputs=[i2v_output_video],
+                            fn = get_image,
+                            cache_examples=False,
+                )
+            i2v_end_btn.click(inputs=[i2v_input_image, i2v_input_sketch, i2v_input_text, i2v_steps, i2v_cfg_scale, i2v_eta, i2v_motion, i2v_seed, control_scale],
+                            outputs=[i2v_output_video],
+                            fn = get_image
+            )
+    return dynamicrafter_iface
+def get_parser():
+    parser = argparse.ArgumentParser()
+    return parser
+if __name__ == "__main__":
+    parser = get_parser()
+    args = parser.parse_args()
+    result_dir = os.path.join('./', 'results')
+    dynamicrafter_iface = dynamicrafter_demo(result_dir)
+    dynamicrafter_iface.queue(max_size=12)
+    print("launching...")
+    # dynamicrafter_iface.launch(max_threads=1, share=True)
+    dynamicrafter_iface.launch(server_name='0.0.0.0', server_port=12345)
+    # dynamicrafter_iface.launch()
     # print("launched...")