Spaces:
Runtime error
Runtime error
Artiprocher
commited on
Upload 6 files
Browse files- app.py +100 -0
- images/0.png +0 -0
- images/1.png +0 -0
- images/2.png +0 -0
- images/3.png +0 -0
- images/4.png +0 -0
app.py
ADDED
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
os.system("git clone https://github.com/modelscope/DiffSynth-Studio.git")
|
3 |
+
os.system("cp -r DiffSynth-Studio/diffsynth ./")
|
4 |
+
os.system("pip install -r DiffSynth-Studio/requirements.txt")
|
5 |
+
from diffsynth import save_video, ModelManager, SVDVideoPipeline
|
6 |
+
from diffsynth import ModelManager
|
7 |
+
import torch, os, random, time
|
8 |
+
import gradio as gr
|
9 |
+
import numpy as np
|
10 |
+
from PIL import Image
|
11 |
+
import spaces
|
12 |
+
|
13 |
+
|
14 |
+
def get_i2v_pipeline():
|
15 |
+
model_manager = ModelManager(torch_dtype=torch.float16, device="cuda",
|
16 |
+
model_id_list=["stable-video-diffusion-img2vid-xt", "ExVideo-SVD-128f-v1"],
|
17 |
+
downloading_priority=["HuggingFace"])
|
18 |
+
pipe = SVDVideoPipeline.from_model_manager(model_manager)
|
19 |
+
return pipe
|
20 |
+
|
21 |
+
|
22 |
+
@spaces.GPU(duration=300)
|
23 |
+
def sample(image, seed, randomize_seed, motion_bucket_id, num_inference_steps):
|
24 |
+
if randomize_seed:
|
25 |
+
seed = random.randint(0, 10**8)
|
26 |
+
torch.manual_seed(seed)
|
27 |
+
video = pipe(
|
28 |
+
input_image=image.resize((512, 512)),
|
29 |
+
num_frames=128, fps=30, height=512, width=512,
|
30 |
+
motion_bucket_id=motion_bucket_id,
|
31 |
+
num_inference_steps=num_inference_steps,
|
32 |
+
min_cfg_scale=2, max_cfg_scale=2, contrast_enhance_scale=1.2
|
33 |
+
)
|
34 |
+
file_path = f"videos/{time.time_ns()}.mp4"
|
35 |
+
os.makedirs("videos", exist_ok=True)
|
36 |
+
save_video(video, file_path, fps=30, quality=7)
|
37 |
+
return file_path, seed
|
38 |
+
|
39 |
+
|
40 |
+
def crop_and_resize(image):
|
41 |
+
height = 512
|
42 |
+
width = 512
|
43 |
+
image = np.array(image)
|
44 |
+
image_height, image_width, _ = image.shape
|
45 |
+
if image_height / image_width < height / width:
|
46 |
+
croped_width = int(image_height / height * width)
|
47 |
+
left = (image_width - croped_width) // 2
|
48 |
+
image = image[:, left: left+croped_width]
|
49 |
+
image = Image.fromarray(image).convert("RGB").resize((width, height))
|
50 |
+
else:
|
51 |
+
croped_height = int(image_width / width * height)
|
52 |
+
left = (image_height - croped_height) // 2
|
53 |
+
image = image[left: left+croped_height, :]
|
54 |
+
image = Image.fromarray(image).convert("RGB").resize((width, height))
|
55 |
+
return image
|
56 |
+
|
57 |
+
|
58 |
+
pipe = get_i2v_pipeline()
|
59 |
+
with gr.Blocks() as demo:
|
60 |
+
gr.Markdown('''
|
61 |
+
# ExVideo
|
62 |
+
|
63 |
+
ExVideo is a post-tuning technique aimed at enhancing the capability of video generation models. We have extended Stable Video Diffusion to achieve the generation of long videos up to 128 frames.
|
64 |
+
|
65 |
+
This is the first model we have made public. Due to limitations in computational resources, this model was trained on about 40,000 videos using 8x A100 GPUs for approximately one week. Therefore, the model may sometimes generate content that does not conform to real-world principles. Please look forward to the release of our subsequent models.
|
66 |
+
|
67 |
+
To use this model, please refer to [DiffSynth](https://github.com/modelscope/DiffSynth-Studio).
|
68 |
+
|
69 |
+
* [Project Page](https://ecnu-cilab.github.io/ExVideoProjectPage/)
|
70 |
+
* [Source Code](https://github.com/modelscope/DiffSynth-Studio)
|
71 |
+
* [Technical report](https://arxiv.org/abs/2406.14130)
|
72 |
+
''')
|
73 |
+
with gr.Row():
|
74 |
+
with gr.Column():
|
75 |
+
image = gr.Image(label="Upload your image", type="pil")
|
76 |
+
generate_btn = gr.Button("Generate")
|
77 |
+
video = gr.Video()
|
78 |
+
with gr.Accordion("Advanced options", open=False):
|
79 |
+
seed = gr.Slider(label="Seed", value=0, randomize=True, minimum=0, maximum=10**8, step=1)
|
80 |
+
randomize_seed = gr.Checkbox(label="Randomize seed", value=True)
|
81 |
+
motion_bucket_id = gr.Slider(label="Motion bucket id", info="Controls how much motion to synthesize", value=100, minimum=0, maximum=127)
|
82 |
+
num_inference_steps = gr.Slider(label="Inference steps", value=25, minimum=1, maximum=50)
|
83 |
+
|
84 |
+
image.upload(fn=crop_and_resize, inputs=image, outputs=image, queue=False)
|
85 |
+
generate_btn.click(fn=sample, inputs=[image, seed, randomize_seed, motion_bucket_id, num_inference_steps], outputs=[video, seed], api_name="video")
|
86 |
+
gr.Examples(
|
87 |
+
examples=[
|
88 |
+
"images/0.png",
|
89 |
+
"images/1.png",
|
90 |
+
"images/2.png",
|
91 |
+
"images/3.png",
|
92 |
+
"images/4.png"
|
93 |
+
],
|
94 |
+
inputs=image,
|
95 |
+
outputs=[video, seed],
|
96 |
+
fn=sample,
|
97 |
+
)
|
98 |
+
|
99 |
+
if __name__ == "__main__":
|
100 |
+
demo.launch()
|
images/0.png
ADDED
images/1.png
ADDED
images/2.png
ADDED
images/3.png
ADDED
images/4.png
ADDED