wenmeng.zwm commited on
Commit
60931f6
1 Parent(s): 3da52d6

update space to support I2VGEN-XL https://modelscope.cn/studios/damo/I2VGen-XL/summary

Browse files
Files changed (3) hide show
  1. Dockerfile +4 -35
  2. app.py +32 -67
  3. requirements.txt +9 -15
Dockerfile CHANGED
@@ -1,45 +1,14 @@
1
- FROM nvidia/cuda:11.7.1-cudnn8-devel-ubuntu22.04
2
  ENV DEBIAN_FRONTEND=noninteractive
3
- RUN apt-get update && \
4
- apt-get upgrade -y && \
5
- apt-get install -y --no-install-recommends \
6
- git \
7
- git-lfs \
8
- wget \
9
- curl \
10
- # python build dependencies \
11
- build-essential \
12
- libssl-dev \
13
- zlib1g-dev \
14
- libbz2-dev \
15
- libreadline-dev \
16
- libsqlite3-dev \
17
- libncursesw5-dev \
18
- xz-utils \
19
- tk-dev \
20
- libxml2-dev \
21
- libxmlsec1-dev \
22
- libffi-dev \
23
- liblzma-dev \
24
- # gradio dependencies \
25
- ffmpeg && \
26
- apt-get clean && \
27
- rm -rf /var/lib/apt/lists/*
28
-
29
  RUN useradd -m -u 1000 user
30
  USER user
31
  ENV HOME=/home/user \
32
  PATH=/home/user/.local/bin:${PATH}
33
  WORKDIR ${HOME}/app
34
 
35
- RUN curl https://pyenv.run | bash
36
- ENV PATH=${HOME}/.pyenv/shims:${HOME}/.pyenv/bin:${PATH}
37
- ARG PYTHON_VERSION=3.10.12
38
- RUN pyenv install ${PYTHON_VERSION} && \
39
- pyenv global ${PYTHON_VERSION} && \
40
- pyenv rehash && \
41
- pip install --no-cache-dir -U pip setuptools wheel
42
-
43
  COPY --chown=1000 ./requirements.txt /tmp/requirements.txt
44
  RUN pip install -r /tmp/requirements.txt
45
 
 
1
+ FROM registry.cn-beijing.aliyuncs.com/modelscope-repo/modelscope:ubuntu22.04-cuda11.8.0-py310-torch2.1.0-tf2.14.0-1.10.0
2
  ENV DEBIAN_FRONTEND=noninteractive
3
+ #RUN apt-get update && \
4
+ # apt-get upgrade -y && \
5
+ # apt-get install -y --no-install-recommends \
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6
  RUN useradd -m -u 1000 user
7
  USER user
8
  ENV HOME=/home/user \
9
  PATH=/home/user/.local/bin:${PATH}
10
  WORKDIR ${HOME}/app
11
 
 
 
 
 
 
 
 
 
12
  COPY --chown=1000 ./requirements.txt /tmp/requirements.txt
13
  RUN pip install -r /tmp/requirements.txt
14
 
app.py CHANGED
@@ -1,85 +1,50 @@
1
- #!/usr/bin/env python
2
-
3
  import os
4
- import pathlib
5
- import tempfile
6
 
7
  import gradio as gr
8
- import torch
9
- from huggingface_hub import snapshot_download
10
- from modelscope.outputs import OutputKeys
11
  from modelscope.pipelines import pipeline
 
12
 
13
- DESCRIPTION = """# I2VGen-XL
14
-
15
- I2VGen-XL can generate videos that are semantically similar to the input image and text. The generated videos are high-definition (1280 * 720), wide-screen (16:9), temporally coherent, and have good texture.
16
- """
17
-
18
- if torch.cuda.is_available():
19
- model_cache_dir = os.getenv("MODEL_CACHE_DIR", "./models")
20
-
21
- image2video_model_dir = pathlib.Path(model_cache_dir) / "MS-Image2Video"
22
- snapshot_download(repo_id="damo-vilab/MS-Image2Video", repo_type="model", local_dir=image2video_model_dir)
23
- image_to_video_pipe = pipeline(
24
- task="image-to-video", model=image2video_model_dir.as_posix(), model_revision="v1.1.0", device="cuda:0"
25
- )
26
-
27
- video2video_model_dir = pathlib.Path(model_cache_dir) / "MS-Vid2Vid-XL"
28
- snapshot_download(repo_id="damo-vilab/MS-Vid2Vid-XL", repo_type="model", local_dir=video2video_model_dir)
29
- video_to_video_pipe = pipeline(
30
- task="video-to-video", model=video2video_model_dir.as_posix(), model_revision="v1.1.0", device="cuda:0"
31
- )
32
- else:
33
- image_to_video_pipe = None
34
- video_to_video_pipe = None
35
-
36
 
37
- def image_to_video(image_path: str) -> str:
38
- output_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
39
- image_to_video_pipe(image_path, output_video=output_file.name)[OutputKeys.OUTPUT_VIDEO]
40
- return output_file.name
41
 
 
 
 
 
 
 
 
42
 
43
- def video_to_video(video_path: str, text: str) -> str:
44
- p_input = {"video_path": video_path, "text": text}
45
- output_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
46
- video_to_video_pipe(p_input, output_video=output_file.name)[OutputKeys.OUTPUT_VIDEO]
47
- return output_file.name
48
 
 
 
 
 
49
 
50
- with gr.Blocks(css="style.css") as demo:
51
- gr.Markdown(DESCRIPTION)
52
- with gr.Box():
53
- gr.Markdown('Step 1: Upload an image and click the "Generate video" button.')
54
- with gr.Row():
55
- with gr.Column():
56
- input_image = gr.Image(label="Input image", type="filepath", height=300)
57
- i2v_button = gr.Button("Generate video")
58
- with gr.Column():
59
- output_video_1 = gr.Video(label="Output video 1", interactive=False, height=300)
60
  with gr.Box():
61
  gr.Markdown(
62
- 'Step 2: Add an English text description of the video content and click the "Generate high-resolution video" button.'
 
 
63
  )
64
  with gr.Row():
65
  with gr.Column():
66
- text_description = gr.Textbox(label="Text description")
67
- v2v_button = gr.Button("Generate high-resolution video")
 
 
 
68
  with gr.Column():
69
- output_video_2 = gr.Video(label="Output video 2", height=300)
 
70
 
71
- i2v_button.click(
72
- fn=image_to_video,
73
- inputs=input_image,
74
- outputs=output_video_1,
75
- api_name="image-to-video",
76
- )
77
- v2v_button.click(
78
- fn=video_to_video,
79
- inputs=[output_video_1, text_description],
80
- outputs=output_video_2,
81
- api_name="video-to-video",
82
- )
83
 
84
- if __name__ == "__main__":
85
- demo.queue(max_size=10, api_open=False).launch()
 
 
 
 
 
1
  import os
 
 
2
 
3
  import gradio as gr
 
 
 
4
  from modelscope.pipelines import pipeline
5
+ from modelscope.outputs import OutputKeys
6
 
7
+ image_to_video_pipe = pipeline(task="image-to-video", model='damo/i2vgen-xl', revision='v1.1.3', device='cuda:0')
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
 
9
+ def upload_file(file):
10
+ return file.name
 
 
11
 
12
+ def image_to_video(image_in, text_in):
13
+ if image_in is None:
14
+ raise gr.Error('请上传图片或等待图片上传完成')
15
+ print(image_in)
16
+ output_video_path = image_to_video_pipe(image_in, caption=text_in)[OutputKeys.OUTPUT_VIDEO]
17
+ print(output_video_path)
18
+ return output_video_path
19
 
 
 
 
 
 
20
 
21
+ with gr.Blocks() as demo:
22
+ gr.Markdown(
23
+ """<center><font size=7>I2VGen-XL</center>
24
+ <left><font size=3>I2VGen-XL可以根据用户输入的静态图像和文本生成目标接近、语义相同的视频,生成的视频具高清(1280 * 720)、宽屏(16:9)、时序连贯、质感好等特点。</left>
25
 
26
+ <left><font size=3>I2VGen-XL can generate videos with similar contents and semantics based on user input static images and text. The generated videos have characteristics such as high-definition (1280 * 720), widescreen (16:9), coherent timing, and good texture.</left>
27
+ """
28
+ )
 
 
 
 
 
 
 
29
  with gr.Box():
30
  gr.Markdown(
31
+ """<left><font size=3>选择合适的图片进行上传,并补充对视频内容的英文文本描述,然后点击“生成视频”。</left>
32
+
33
+ <left><font size=3>Please choose the image to upload (we recommend the image size be 1280 * 720), provide the English text description of the video you wish to create, and then click on "Generate Video" to receive the generated video.</left>"""
34
  )
35
  with gr.Row():
36
  with gr.Column():
37
+ text_in = gr.Textbox(label="文本描述", lines=2, elem_id="text-in")
38
+ image_in = gr.Image(label="图片输入", type="filepath", interactive=False, elem_id="image-in", height=300)
39
+ with gr.Row():
40
+ upload_image = gr.UploadButton("上传图片", file_types=["image"], file_count="single")
41
+ image_submit = gr.Button("生成视频🎬")
42
  with gr.Column():
43
+ video_out_1 = gr.Video(label='生成的视频', elem_id='video-out_1', interactive=False, height=300)
44
+ gr.Markdown("<left><font size=2>注:如果生成的视频无法播放,请尝试升级浏览器或使用chrome浏览器。</left>")
45
 
 
 
 
 
 
 
 
 
 
 
 
 
46
 
47
+ upload_image.upload(upload_file, upload_image, image_in, queue=False)
48
+ image_submit.click(fn=image_to_video, inputs=[image_in, text_in], outputs=[video_out_1])
49
+
50
+ demo.queue(status_update_rate=1, api_open=False).launch(share=False, show_error=True)
requirements.txt CHANGED
@@ -1,15 +1,9 @@
1
- easydict==1.10
2
- einops==0.6.1
3
- fairscale==0.4.13
4
- gradio==3.41.0
5
- huggingface_hub==0.16.4
6
- imageio==2.31.1
7
- modelscope==1.8.4
8
- open_clip_torch==2.20.0
9
- opencv-python-headless==4.8.0.76
10
- pytorch-lightning==2.0.7
11
- rotary-embedding-torch==0.2.7
12
- scipy==1.11.2
13
- torch==2.0.1
14
- torchsde==0.2.5
15
- xformers==0.0.20
 
1
+ torchsde
2
+ open_clip_torch>=2.0.2
3
+ opencv-python-headless
4
+ opencv-python
5
+ einops>=0.4
6
+ rotary-embedding-torch
7
+ fairscale
8
+ scipy
9
+ imageio