Spaces:
Configuration error
Configuration error
| #!/usr/bin/env python | |
| from __future__ import annotations | |
| import os | |
| import gradio as gr | |
| from webui.merge_config_gradio import merge_config_then_run | |
| import huggingface_hub | |
| import shutil | |
| import os | |
| import torch | |
| HF_TOKEN = os.getenv('HF_TOKEN') | |
| pipe = merge_config_then_run() | |
| ARTICLE = r""" | |
| If VideoGrain is helpful, please help to ⭐ the <a href='https://github.com/knightyxp/VideoGrain' target='_blank'>Github Repo</a>. Thanks! | |
| [](https://github.com/knightyxp/VideoGrain) | |
| --- | |
| 📝 **Citation** | |
| If our work is useful for your research, please consider citing: | |
| ```bibtex | |
| @article{yang2025videograin, | |
| title={VideoGrain: Modulating Space-Time Attention for Multi-grained Video Editing}, | |
| author={Yang, Xiangpeng and Zhu, Linchao and Fan, Hehe and Yang, Yi}, | |
| journal={ICLR}, | |
| year={2025} | |
| } | |
| ``` | |
| 📋 **License** | |
| This project is licensed under <a rel="license" href="https://github.com/knightyxp/VideoGrain?tab=License-1-ov-file#readme">ReLER-Lab License 1.0</a>. | |
| Redistribution and use for non-commercial purposes should follow this license. | |
| 📧 **Contact** | |
| If you have any questions, please feel free to reach me out at <b>knightyxp@gmail.com</b>. | |
| """ | |
| def update_layout_visibility(selected_num): | |
| num = int(selected_num) | |
| return [gr.update(visible=(i < num)) for i in range(len(layout_files))] | |
| with gr.Blocks(css='style.css') as demo: | |
| # gr.Markdown(TITLE) | |
| gr.HTML( | |
| """ | |
| <div style="text-align: center; max-width: 1200px; margin: 20px auto;"> | |
| <h1 style="font-weight: 900; font-size: 2rem; margin: 0rem"> | |
| VideoGrain: Modulating Space-Time Attention for Multi-Grained Video Editing | |
| </h1> | |
| <h2 style="font-weight: 450; font-size: 1rem; margin: 0rem"> | |
| <a href="https://github.com/knightyxp">Xiangpeng Yang</a> | |
| </h2> | |
| <h2 style="font-weight: 450; font-size: 1rem; margin: 0rem"> | |
| <span class="link-block"> | |
| [<a href="https://arxiv.org/abs/2502.17258" target="_blank" | |
| class="external-link "> | |
| <span class="icon"> | |
| <i class="ai ai-arxiv"></i> | |
| </span> | |
| <span>arXiv</span> | |
| </a>] | |
| </span> | |
| <!-- Github link --> | |
| <span class="link-block"> | |
| [<a href="https://github.com/knightyxp/VideoGrain" target="_blank" | |
| class="external-link "> | |
| <span class="icon"> | |
| <i class="fab fa-github"></i> | |
| </span> | |
| <span>Code</span> | |
| </a>] | |
| </span> | |
| <!-- Github link --> | |
| <span class="link-block"> | |
| [<a href="https://knightyxp.github.io/VideoGrain_project_page" target="_blank" | |
| class="external-link "> | |
| <span class="icon"> | |
| <i class="fab fa-github"></i> | |
| </span> | |
| <span>Homepage</span> | |
| </a>] | |
| </span> | |
| <!-- Github link --> | |
| <span class="link-block"> | |
| [<a href="https://www.youtube.com/watch?v=XEM4Pex7F9E" target="_blank" | |
| class="external-link "> | |
| <span class="icon"> | |
| <i class="fab fa-youtube"></i> | |
| </span> | |
| <span>Youtube Video</span> | |
| </a>] | |
| </span> | |
| </h2> | |
| <h2 style="font-weight: 450; font-size: 1rem; margin-top: 0.5rem; margin-bottom: 0.5rem"> | |
| 📕 TL;DR: VideoGrain is a zero-shot method for class-level, instance-level, and part-level video editing | |
| </h2> | |
| <h2 style="font-weight: 450; font-size: 1rem;"> | |
| Note that this page is a limited demo of VideoGrain. To run with more configurations, please check out our <a href="https://github.com/knightyxp/VideoGrain">github page. | |
| </h2> | |
| </div> | |
| """) | |
| gr.HTML(""" | |
| <p>We provide an <a href="https://github.com/knightyxp/VideoGrain?tab=readme-ov-file#editing-guidance-for-your-video"> Editing Guidance </a> to help users to choose hyperparameters when editing in-the-wild video. | |
| <p>To remove the limitations or avoid queue on your own hardware, you may <a href="https://huggingface.co/spaces/XiangpengYang/VideoGrain?duplicate=true" style="display: inline-block; vertical-align: middle;"><img style="margin-top: 0em; margin-bottom: 0em; display: inline-block;" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a></p> | |
| """) | |
| with gr.Row(): | |
| with gr.Column(): | |
| with gr.Accordion('Input Video', open=True): | |
| # user_input_video = gr.File(label='Input Source Video') | |
| user_input_video = gr.Video(label='Input Source Video', source='upload', type='numpy', format="mp4", visible=True).style(height="auto") | |
| # Radio to choose how many layout videos to show | |
| num_layouts = gr.Radio( | |
| choices=["2", "3", "4", "5"], | |
| label="Select Number of Editing Areas", | |
| value="2", # default | |
| info="Please select the number of editing areas" | |
| ) | |
| # 使用循环生成所有的布局视频组件,并存到列表 layout_files 中 | |
| layout_files = [] | |
| with gr.Row(): | |
| for i in range(5): | |
| video = gr.Video( | |
| label=f"Layout Video {i+1}", | |
| type="numpy", | |
| format="mp4", | |
| visible=(i < 2) # 默认显示前两个 | |
| ) | |
| layout_files.append(video) | |
| # 当 num_layouts 改变时,通过回调函数更新 layout_files 列表中各视频组件的 visible 属性 | |
| num_layouts.change( | |
| fn=update_layout_visibility, | |
| inputs=num_layouts, | |
| outputs=layout_files | |
| ) | |
| prompt = gr.Textbox(label='Prompt', | |
| info='Change the prompt, and extract each local prompt in the editing prompts.\ | |
| (the local prompt order should be same as layout masks order.)', | |
| ) | |
| model_id = gr.Dropdown( | |
| label='Model ID', | |
| choices=[ | |
| 'stable-diffusion-v1-5/stable-diffusion-v1-5', | |
| # add shape editing ckpt here | |
| ], | |
| value='stable-diffusion-v1-5/stable-diffusion-v1-5') | |
| with gr.Column(): | |
| result = gr.Video(label='Result') | |
| # result.style(height=512, width=512) | |
| with gr.Accordion('Temporal Crop offset and Sampling Stride', open=False): | |
| n_sample_frame = gr.Slider(label='Number of Frames', | |
| minimum=0, | |
| maximum=32, | |
| step=1, | |
| value=16) | |
| sampling_rate = gr.Slider(label='sampling_rate', | |
| minimum=0, | |
| maximum=20, | |
| step=1, | |
| value=1) | |
| start_sample_frame = gr.Number(label='Start frame in the video', | |
| value=0, | |
| precision=0) | |
| with gr.Row(): | |
| control_list = ['dwpose', 'depth_zoe', 'depth_midas'] | |
| control_type = gr.Dropdown( | |
| choices=control_list, | |
| label='Control type', | |
| value='dwpose' | |
| ) | |
| # Checkbox group for "dwpose" options; default: hand selected, face not selected. | |
| dwpose_options = gr.CheckboxGroup( | |
| choices=["hand", "face"], | |
| label="DW Pose Options", | |
| value=["hand"], | |
| visible=True # Initially visible since default control_type is "dwpose" | |
| ) | |
| # Update the visibility of the dwpose_options based on the selected control type | |
| control_type.change( | |
| fn=lambda x: gr.update(visible=(x == "dwpose")), | |
| inputs=control_type, | |
| outputs=dwpose_options | |
| ) | |
| controlnet_conditioning_scale = gr.Slider(label='ControlNet conditioning scale', | |
| minimum=0.0, | |
| maximum=1.0, | |
| value=1.0, | |
| step=0.1) | |
| with gr.Accordion('Editing config for VideoGrian', open=True): | |
| use_pnp = gr.Checkbox( | |
| label="Use PnP", | |
| value=False, | |
| info="Check to enable PnP functionality." | |
| ) | |
| pnp_inject_steps = gr.Slider(label='pnp inject steps', | |
| info='PnP inject steps for temporal consistency', | |
| minimum=0, | |
| maximum=10, | |
| step=1, | |
| value=0) | |
| flatten_res = gr.CheckboxGroup( | |
| choices=["1", "2", "4", "8"], | |
| label="Flatten Resolution", | |
| value=["1"], | |
| info="Select one or more flatten resolution factors. Mapping: 1 -> 64, 2 -> 32 (64/2), 4 -> 16 (64/4), 8 -> 8 (64/8)." | |
| ) | |
| run_button = gr.Button('Generate') | |
| with gr.Row(): | |
| from example import style_example | |
| examples = style_example | |
| # gr.Examples(examples=examples, | |
| # inputs=[ | |
| # model_id, | |
| # user_input_video, | |
| # layout_files, | |
| # prompt, | |
| # model_id, | |
| # control_type, | |
| # dwpose_options, | |
| # controlnet_conditioning_scale, | |
| # use_pnp, | |
| # pnp_inject_steps, | |
| # flatten_res, | |
| # ], | |
| # outputs=result, | |
| # fn=pipe.run, | |
| # cache_examples=True, | |
| # # cache_examples=os.getenv('SYSTEM') == 'spaces' | |
| # ) | |
| gr.Markdown(ARTICLE) | |
| inputs = [user_input_video, num_layouts, | |
| *layout_files, | |
| prompt, | |
| model_id, | |
| n_sample_frame, | |
| start_sample_frame, | |
| sampling_rate, | |
| control_type, | |
| dwpose_options, | |
| controlnet_conditioning_scale, | |
| use_pnp, | |
| pnp_inject_steps, | |
| flatten_res, | |
| ] | |
| prompt.submit(fn=pipe.run, inputs=inputs, outputs=result) | |
| run_button.click(fn=pipe.run, inputs=inputs, outputs=result) | |
| demo.queue().launch(share=True) |