I2VGen-XL

Runtime error

App Files Files Community

wenmeng.zwm commited on Jan 4, 2024

Commit

60931f6

1 Parent(s): 3da52d6

update space to support I2VGEN-XL https://modelscope.cn/studios/damo/I2VGen-XL/summary

Browse files

Files changed (3) hide show

Dockerfile +4 -35
app.py +32 -67
requirements.txt +9 -15

Dockerfile CHANGED Viewed

@@ -1,45 +1,14 @@
-FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04
 ENV DEBIAN_FRONTEND=noninteractive
-RUN apt-get update && \
-    apt-get upgrade -y && \
-    apt-get install -y --no-install-recommends \
-    git \
-    git-lfs \
-    wget \
-    curl \
-    # python build dependencies \
-    build-essential \
-    libssl-dev \
-    zlib1g-dev \
-    libbz2-dev \
-    libreadline-dev \
-    libsqlite3-dev \
-    libncursesw5-dev \
-    xz-utils \
-    tk-dev \
-    libxml2-dev \
-    libxmlsec1-dev \
-    libffi-dev \
-    liblzma-dev \
-    # gradio dependencies \
-    ffmpeg && \
-    apt-get clean && \
-    rm -rf /var/lib/apt/lists/*
 RUN useradd -m -u 1000 user
 USER user
 ENV HOME=/home/user \
     PATH=/home/user/.local/bin:${PATH}
 WORKDIR ${HOME}/app
-RUN curl https://pyenv.run | bash
-ENV PATH=${HOME}/.pyenv/shims:${HOME}/.pyenv/bin:${PATH}
-ARG PYTHON_VERSION=3.10.12
-RUN pyenv install ${PYTHON_VERSION} && \
-    pyenv global ${PYTHON_VERSION} && \
-    pyenv rehash && \
-    pip install --no-cache-dir -U pip setuptools wheel
 COPY --chown=1000 ./requirements.txt /tmp/requirements.txt
 RUN pip install -r /tmp/requirements.txt

+FROM registry.cn-beijing.aliyuncs.com/modelscope-repo/modelscope:ubuntu22.04-cuda11.8.0-py310-torch2.1.0-tf2.14.0-1.10.0
 ENV DEBIAN_FRONTEND=noninteractive
+#RUN apt-get update && \
+#    apt-get upgrade -y && \
+#    apt-get install -y --no-install-recommends \
 RUN useradd -m -u 1000 user
 USER user
 ENV HOME=/home/user \
     PATH=/home/user/.local/bin:${PATH}
 WORKDIR ${HOME}/app
 COPY --chown=1000 ./requirements.txt /tmp/requirements.txt
 RUN pip install -r /tmp/requirements.txt

app.py CHANGED Viewed

@@ -1,85 +1,50 @@
-#!/usr/bin/env python
 import os
-import pathlib
-import tempfile
 import gradio as gr
-import torch
-from huggingface_hub import snapshot_download
-from modelscope.outputs import OutputKeys
 from modelscope.pipelines import pipeline
-DESCRIPTION = """# I2VGen-XL
-I2VGen-XL can generate videos that are semantically similar to the input image and text. The generated videos are high-definition (1280 * 720), wide-screen (16:9), temporally coherent, and have good texture.
-"""
-if torch.cuda.is_available():
-    model_cache_dir = os.getenv("MODEL_CACHE_DIR", "./models")
-    image2video_model_dir = pathlib.Path(model_cache_dir) / "MS-Image2Video"
-    snapshot_download(repo_id="damo-vilab/MS-Image2Video", repo_type="model", local_dir=image2video_model_dir)
-    image_to_video_pipe = pipeline(
-        task="image-to-video", model=image2video_model_dir.as_posix(), model_revision="v1.1.0", device="cuda:0"
-    )
-    video2video_model_dir = pathlib.Path(model_cache_dir) / "MS-Vid2Vid-XL"
-    snapshot_download(repo_id="damo-vilab/MS-Vid2Vid-XL", repo_type="model", local_dir=video2video_model_dir)
-    video_to_video_pipe = pipeline(
-        task="video-to-video", model=video2video_model_dir.as_posix(), model_revision="v1.1.0", device="cuda:0"
-    )
-else:
-    image_to_video_pipe = None
-    video_to_video_pipe = None
-def image_to_video(image_path: str) -> str:
-    output_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
-    image_to_video_pipe(image_path, output_video=output_file.name)[OutputKeys.OUTPUT_VIDEO]
-    return output_file.name
-def video_to_video(video_path: str, text: str) -> str:
-    p_input = {"video_path": video_path, "text": text}
-    output_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
-    video_to_video_pipe(p_input, output_video=output_file.name)[OutputKeys.OUTPUT_VIDEO]
-    return output_file.name
-with gr.Blocks(css="style.css") as demo:
-    gr.Markdown(DESCRIPTION)
-    with gr.Box():
-        gr.Markdown('Step 1: Upload an image and click the "Generate video" button.')
-        with gr.Row():
-            with gr.Column():
-                input_image = gr.Image(label="Input image", type="filepath", height=300)
-                i2v_button = gr.Button("Generate video")
-            with gr.Column():
-                output_video_1 = gr.Video(label="Output video 1", interactive=False, height=300)
     with gr.Box():
         gr.Markdown(
-            'Step 2: Add an English text description of the video content and click the "Generate high-resolution video" button.'
         )
         with gr.Row():
             with gr.Column():
-                text_description = gr.Textbox(label="Text description")
-                v2v_button = gr.Button("Generate high-resolution video")
             with gr.Column():
-                output_video_2 = gr.Video(label="Output video 2", height=300)
-    i2v_button.click(
-        fn=image_to_video,
-        inputs=input_image,
-        outputs=output_video_1,
-        api_name="image-to-video",
-    )
-    v2v_button.click(
-        fn=video_to_video,
-        inputs=[output_video_1, text_description],
-        outputs=output_video_2,
-        api_name="video-to-video",
-    )
-if __name__ == "__main__":
-    demo.queue(max_size=10, api_open=False).launch()

 import os
 import gradio as gr
 from modelscope.pipelines import pipeline
+from modelscope.outputs import OutputKeys
+image_to_video_pipe = pipeline(task="image-to-video", model='damo/i2vgen-xl', revision='v1.1.3', device='cuda:0')
+def upload_file(file):
+    return file.name
+def image_to_video(image_in, text_in):
+    if image_in is None:
+        raise gr.Error('请上传图片或等待图片上传完成')
+    print(image_in)
+    output_video_path = image_to_video_pipe(image_in, caption=text_in)[OutputKeys.OUTPUT_VIDEO]
+    print(output_video_path)
+    return output_video_path
+with gr.Blocks() as demo:
+    gr.Markdown(
+        """<center><font size=7>I2VGen-XL</center>
+        <left><font size=3>I2VGen-XL可以根据用户输入的静态图像和文本生成目标接近、语义相同的视频，生成的视频具高清(1280 * 720)、宽屏(16:9)、时序连贯、质感好等特点。</left>
+        <left><font size=3>I2VGen-XL can generate videos with similar contents and semantics based on user input static images and text. The generated videos have characteristics such as high-definition (1280 * 720), widescreen (16:9), coherent timing, and good texture.</left>
+        """
+    )
     with gr.Box():
         gr.Markdown(
+        """<left><font size=3>选择合适的图片进行上传，并补充对视频内容的英文文本描述，然后点击“生成视频”。</left>
+        <left><font size=3>Please choose the image to upload (we recommend the image size be 1280 * 720), provide the English text description of the video you wish to create, and then click on "Generate Video" to receive the generated video.</left>"""
         )
         with gr.Row():
             with gr.Column():
+                text_in = gr.Textbox(label="文本描述", lines=2, elem_id="text-in")
+                image_in = gr.Image(label="图片输入", type="filepath", interactive=False, elem_id="image-in", height=300)
+                with gr.Row():
+                    upload_image = gr.UploadButton("上传图片", file_types=["image"], file_count="single")
+                    image_submit = gr.Button("生成视频🎬")
             with gr.Column():
+                video_out_1 = gr.Video(label='生成的视频', elem_id='video-out_1', interactive=False, height=300)
+    gr.Markdown("<left><font size=2>注：如果生成的视频无法播放，请尝试升级浏览器或使用chrome浏览器。</left>")
+    upload_image.upload(upload_file, upload_image, image_in, queue=False)
+    image_submit.click(fn=image_to_video, inputs=[image_in, text_in], outputs=[video_out_1])
+demo.queue(status_update_rate=1, api_open=False).launch(share=False, show_error=True)

requirements.txt CHANGED Viewed

@@ -1,15 +1,9 @@
-easydict==1.10
-einops==0.6.1
-fairscale==0.4.13
-gradio==3.41.0
-huggingface_hub==0.16.4
-imageio==2.31.1
-modelscope==1.8.4
-open_clip_torch==2.20.0
-opencv-python-headless==4.8.0.76
-pytorch-lightning==2.0.7
-rotary-embedding-torch==0.2.7
-scipy==1.11.2
-torch==2.0.1
-torchsde==0.2.5
-xformers==0.0.20

+torchsde
+open_clip_torch>=2.0.2
+opencv-python-headless
+opencv-python
+einops>=0.4
+rotary-embedding-torch
+fairscale
+scipy
+imageio