Spaces:
Runtime error
Runtime error
wenmeng.zwm
commited on
Commit
•
60931f6
1
Parent(s):
3da52d6
update space to support I2VGEN-XL https://modelscope.cn/studios/damo/I2VGen-XL/summary
Browse files- Dockerfile +4 -35
- app.py +32 -67
- requirements.txt +9 -15
Dockerfile
CHANGED
@@ -1,45 +1,14 @@
|
|
1 |
-
FROM
|
2 |
ENV DEBIAN_FRONTEND=noninteractive
|
3 |
-
RUN apt-get update && \
|
4 |
-
apt-get upgrade -y && \
|
5 |
-
apt-get install -y --no-install-recommends \
|
6 |
-
git \
|
7 |
-
git-lfs \
|
8 |
-
wget \
|
9 |
-
curl \
|
10 |
-
# python build dependencies \
|
11 |
-
build-essential \
|
12 |
-
libssl-dev \
|
13 |
-
zlib1g-dev \
|
14 |
-
libbz2-dev \
|
15 |
-
libreadline-dev \
|
16 |
-
libsqlite3-dev \
|
17 |
-
libncursesw5-dev \
|
18 |
-
xz-utils \
|
19 |
-
tk-dev \
|
20 |
-
libxml2-dev \
|
21 |
-
libxmlsec1-dev \
|
22 |
-
libffi-dev \
|
23 |
-
liblzma-dev \
|
24 |
-
# gradio dependencies \
|
25 |
-
ffmpeg && \
|
26 |
-
apt-get clean && \
|
27 |
-
rm -rf /var/lib/apt/lists/*
|
28 |
-
|
29 |
RUN useradd -m -u 1000 user
|
30 |
USER user
|
31 |
ENV HOME=/home/user \
|
32 |
PATH=/home/user/.local/bin:${PATH}
|
33 |
WORKDIR ${HOME}/app
|
34 |
|
35 |
-
RUN curl https://pyenv.run | bash
|
36 |
-
ENV PATH=${HOME}/.pyenv/shims:${HOME}/.pyenv/bin:${PATH}
|
37 |
-
ARG PYTHON_VERSION=3.10.12
|
38 |
-
RUN pyenv install ${PYTHON_VERSION} && \
|
39 |
-
pyenv global ${PYTHON_VERSION} && \
|
40 |
-
pyenv rehash && \
|
41 |
-
pip install --no-cache-dir -U pip setuptools wheel
|
42 |
-
|
43 |
COPY --chown=1000 ./requirements.txt /tmp/requirements.txt
|
44 |
RUN pip install -r /tmp/requirements.txt
|
45 |
|
|
|
1 |
+
FROM registry.cn-beijing.aliyuncs.com/modelscope-repo/modelscope:ubuntu22.04-cuda11.8.0-py310-torch2.1.0-tf2.14.0-1.10.0
|
2 |
ENV DEBIAN_FRONTEND=noninteractive
|
3 |
+
#RUN apt-get update && \
|
4 |
+
# apt-get upgrade -y && \
|
5 |
+
# apt-get install -y --no-install-recommends \
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
6 |
RUN useradd -m -u 1000 user
|
7 |
USER user
|
8 |
ENV HOME=/home/user \
|
9 |
PATH=/home/user/.local/bin:${PATH}
|
10 |
WORKDIR ${HOME}/app
|
11 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
12 |
COPY --chown=1000 ./requirements.txt /tmp/requirements.txt
|
13 |
RUN pip install -r /tmp/requirements.txt
|
14 |
|
app.py
CHANGED
@@ -1,85 +1,50 @@
|
|
1 |
-
#!/usr/bin/env python
|
2 |
-
|
3 |
import os
|
4 |
-
import pathlib
|
5 |
-
import tempfile
|
6 |
|
7 |
import gradio as gr
|
8 |
-
import torch
|
9 |
-
from huggingface_hub import snapshot_download
|
10 |
-
from modelscope.outputs import OutputKeys
|
11 |
from modelscope.pipelines import pipeline
|
|
|
12 |
|
13 |
-
|
14 |
-
|
15 |
-
I2VGen-XL can generate videos that are semantically similar to the input image and text. The generated videos are high-definition (1280 * 720), wide-screen (16:9), temporally coherent, and have good texture.
|
16 |
-
"""
|
17 |
-
|
18 |
-
if torch.cuda.is_available():
|
19 |
-
model_cache_dir = os.getenv("MODEL_CACHE_DIR", "./models")
|
20 |
-
|
21 |
-
image2video_model_dir = pathlib.Path(model_cache_dir) / "MS-Image2Video"
|
22 |
-
snapshot_download(repo_id="damo-vilab/MS-Image2Video", repo_type="model", local_dir=image2video_model_dir)
|
23 |
-
image_to_video_pipe = pipeline(
|
24 |
-
task="image-to-video", model=image2video_model_dir.as_posix(), model_revision="v1.1.0", device="cuda:0"
|
25 |
-
)
|
26 |
-
|
27 |
-
video2video_model_dir = pathlib.Path(model_cache_dir) / "MS-Vid2Vid-XL"
|
28 |
-
snapshot_download(repo_id="damo-vilab/MS-Vid2Vid-XL", repo_type="model", local_dir=video2video_model_dir)
|
29 |
-
video_to_video_pipe = pipeline(
|
30 |
-
task="video-to-video", model=video2video_model_dir.as_posix(), model_revision="v1.1.0", device="cuda:0"
|
31 |
-
)
|
32 |
-
else:
|
33 |
-
image_to_video_pipe = None
|
34 |
-
video_to_video_pipe = None
|
35 |
-
|
36 |
|
37 |
-
def
|
38 |
-
|
39 |
-
image_to_video_pipe(image_path, output_video=output_file.name)[OutputKeys.OUTPUT_VIDEO]
|
40 |
-
return output_file.name
|
41 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
42 |
|
43 |
-
def video_to_video(video_path: str, text: str) -> str:
|
44 |
-
p_input = {"video_path": video_path, "text": text}
|
45 |
-
output_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
|
46 |
-
video_to_video_pipe(p_input, output_video=output_file.name)[OutputKeys.OUTPUT_VIDEO]
|
47 |
-
return output_file.name
|
48 |
|
|
|
|
|
|
|
|
|
49 |
|
50 |
-
with
|
51 |
-
|
52 |
-
|
53 |
-
gr.Markdown('Step 1: Upload an image and click the "Generate video" button.')
|
54 |
-
with gr.Row():
|
55 |
-
with gr.Column():
|
56 |
-
input_image = gr.Image(label="Input image", type="filepath", height=300)
|
57 |
-
i2v_button = gr.Button("Generate video")
|
58 |
-
with gr.Column():
|
59 |
-
output_video_1 = gr.Video(label="Output video 1", interactive=False, height=300)
|
60 |
with gr.Box():
|
61 |
gr.Markdown(
|
62 |
-
|
|
|
|
|
63 |
)
|
64 |
with gr.Row():
|
65 |
with gr.Column():
|
66 |
-
|
67 |
-
|
|
|
|
|
|
|
68 |
with gr.Column():
|
69 |
-
|
|
|
70 |
|
71 |
-
i2v_button.click(
|
72 |
-
fn=image_to_video,
|
73 |
-
inputs=input_image,
|
74 |
-
outputs=output_video_1,
|
75 |
-
api_name="image-to-video",
|
76 |
-
)
|
77 |
-
v2v_button.click(
|
78 |
-
fn=video_to_video,
|
79 |
-
inputs=[output_video_1, text_description],
|
80 |
-
outputs=output_video_2,
|
81 |
-
api_name="video-to-video",
|
82 |
-
)
|
83 |
|
84 |
-
|
85 |
-
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
|
|
|
|
2 |
|
3 |
import gradio as gr
|
|
|
|
|
|
|
4 |
from modelscope.pipelines import pipeline
|
5 |
+
from modelscope.outputs import OutputKeys
|
6 |
|
7 |
+
image_to_video_pipe = pipeline(task="image-to-video", model='damo/i2vgen-xl', revision='v1.1.3', device='cuda:0')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
|
9 |
+
def upload_file(file):
|
10 |
+
return file.name
|
|
|
|
|
11 |
|
12 |
+
def image_to_video(image_in, text_in):
|
13 |
+
if image_in is None:
|
14 |
+
raise gr.Error('请上传图片或等待图片上传完成')
|
15 |
+
print(image_in)
|
16 |
+
output_video_path = image_to_video_pipe(image_in, caption=text_in)[OutputKeys.OUTPUT_VIDEO]
|
17 |
+
print(output_video_path)
|
18 |
+
return output_video_path
|
19 |
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
+
with gr.Blocks() as demo:
|
22 |
+
gr.Markdown(
|
23 |
+
"""<center><font size=7>I2VGen-XL</center>
|
24 |
+
<left><font size=3>I2VGen-XL可以根据用户输入的静态图像和文本生成目标接近、语义相同的视频,生成的视频具高清(1280 * 720)、宽屏(16:9)、时序连贯、质感好等特点。</left>
|
25 |
|
26 |
+
<left><font size=3>I2VGen-XL can generate videos with similar contents and semantics based on user input static images and text. The generated videos have characteristics such as high-definition (1280 * 720), widescreen (16:9), coherent timing, and good texture.</left>
|
27 |
+
"""
|
28 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
29 |
with gr.Box():
|
30 |
gr.Markdown(
|
31 |
+
"""<left><font size=3>选择合适的图片进行上传,并补充对视频内容的英文文本描述,然后点击“生成视频”。</left>
|
32 |
+
|
33 |
+
<left><font size=3>Please choose the image to upload (we recommend the image size be 1280 * 720), provide the English text description of the video you wish to create, and then click on "Generate Video" to receive the generated video.</left>"""
|
34 |
)
|
35 |
with gr.Row():
|
36 |
with gr.Column():
|
37 |
+
text_in = gr.Textbox(label="文本描述", lines=2, elem_id="text-in")
|
38 |
+
image_in = gr.Image(label="图片输入", type="filepath", interactive=False, elem_id="image-in", height=300)
|
39 |
+
with gr.Row():
|
40 |
+
upload_image = gr.UploadButton("上传图片", file_types=["image"], file_count="single")
|
41 |
+
image_submit = gr.Button("生成视频🎬")
|
42 |
with gr.Column():
|
43 |
+
video_out_1 = gr.Video(label='生成的视频', elem_id='video-out_1', interactive=False, height=300)
|
44 |
+
gr.Markdown("<left><font size=2>注:如果生成的视频无法播放,请尝试升级浏览器或使用chrome浏览器。</left>")
|
45 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
46 |
|
47 |
+
upload_image.upload(upload_file, upload_image, image_in, queue=False)
|
48 |
+
image_submit.click(fn=image_to_video, inputs=[image_in, text_in], outputs=[video_out_1])
|
49 |
+
|
50 |
+
demo.queue(status_update_rate=1, api_open=False).launch(share=False, show_error=True)
|
requirements.txt
CHANGED
@@ -1,15 +1,9 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
pytorch-lightning==2.0.7
|
11 |
-
rotary-embedding-torch==0.2.7
|
12 |
-
scipy==1.11.2
|
13 |
-
torch==2.0.1
|
14 |
-
torchsde==0.2.5
|
15 |
-
xformers==0.0.20
|
|
|
1 |
+
torchsde
|
2 |
+
open_clip_torch>=2.0.2
|
3 |
+
opencv-python-headless
|
4 |
+
opencv-python
|
5 |
+
einops>=0.4
|
6 |
+
rotary-embedding-torch
|
7 |
+
fairscale
|
8 |
+
scipy
|
9 |
+
imageio
|
|
|
|
|
|
|
|
|
|
|
|