fffiloni's picture
add dependencies
aa0a087 verified
raw
history blame
No virus
3.82 kB
import gradio as gr
import numpy as np
import tempfile
import imageio
import torch
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
pipe = DiffusionPipeline.from_pretrained("cerspense/zeroscope_v2_576w", torch_dtype=torch.float16)
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe.enable_model_cpu_offload()
caption = gr.load(name="spaces/fffiloni/CoCa-clone")
def create_image_caption(image_init):
cap = caption(image_init, "Nucleus sampling", 1.2, 0.5, 5, 20, fn_index=0)
print("cap: " + cap)
return cap
def export_to_video(frames: np.ndarray, fps: int) -> str:
frames = np.clip((frames * 255), 0, 255).astype(np.uint8)
out_file = tempfile.NamedTemporaryFile(suffix=".mp4", delete=False)
writer = imageio.get_writer(out_file.name, format="FFMPEG", fps=fps)
for frame in frames:
writer.append_data(frame)
writer.close()
return out_file.name
def infer(image_init):
prompt = create_image_caption(image_init)
video_frames = pipe(prompt, num_inference_steps=40, height=320, width=576, num_frames=24).frames[0]
video_path = export_to_video(video_frames, 12)
print(video_path)
return prompt, video_path
css = """
#col-container {max-width: 510px; margin-left: auto; margin-right: auto;}
a {text-decoration-line: underline; font-weight: 600;}
.animate-spin {
animation: spin 1s linear infinite;
}
@keyframes spin {
from {
transform: rotate(0deg);
}
to {
transform: rotate(360deg);
}
}
#share-btn-container {
display: flex;
padding-left: 0.5rem !important;
padding-right: 0.5rem !important;
background-color: #000000;
justify-content: center;
align-items: center;
border-radius: 9999px !important;
max-width: 13rem;
}
#share-btn-container:hover {
background-color: #060606;
}
#share-btn {
all: initial;
color: #ffffff;
font-weight: 600;
cursor:pointer;
font-family: 'IBM Plex Sans', sans-serif;
margin-left: 0.5rem !important;
padding-top: 0.5rem !important;
padding-bottom: 0.5rem !important;
right:0;
}
#share-btn * {
all: unset;
}
#share-btn-container div:nth-child(-n+2){
width: auto !important;
min-height: 0px !important;
}
#share-btn-container .wrap {
display: none !important;
}
#share-btn-container.hidden {
display: none!important;
}
img[src*='#center'] {
display: block;
margin: auto;
}
"""
with gr.Blocks(css=css) as demo:
with gr.Column(elem_id="col-container"):
gr.Markdown(
"""
<h1 style="text-align: center;">Zeroscope Image-to-Video</h1>
<p style="text-align: center;">
A watermark-free Modelscope-based video model optimized for producing high-quality 16:9 compositions and a smooth video output. <br />
This demo is a variation that lets you upload an image as reference for video generation.
</p>
[![Duplicate this Space](https://huggingface.co/datasets/huggingface/badges/raw/main/duplicate-this-space-sm.svg#center)](https://huggingface.co/spaces/fffiloni/zeroscope-img-to-video?duplicate=true)
"""
)
image_init = gr.Image(label="Image Init",type="filepath", source="upload", elem_id="image-init")
#inference_steps = gr.Slider(label="Inference Steps", minimum=10, maximum=100, step=1, value=40, interactive=False)
submit_btn = gr.Button("Submit")
coca_cap = gr.Textbox(label="Caption", placeholder="CoCa Caption will be displayed here", elem_id="coca-cap-in")
video_result = gr.Video(label="Video Output", elem_id="video-output")
submit_btn.click(fn=infer,
inputs=[image_init],
outputs=[coca_cap, video_result])
demo.queue(max_size=12).launch()