Spaces:
Runtime error
Runtime error
import os | |
import gradio as gr | |
import pandas as pd | |
BASELINE = f'<a target="_blank" href=https://github.com/showlab/loveu-tgve-2023 style="color: blue; text-decoration: underline;text-decoration-style: dotted;">Tune-A-Video (Baseline)</a>' | |
COLS = ["Method", "CLIPScore (Frame Consistency) ⬆️", "CLIPScore (Text Alignment) ⬆️", "PickScore ⬆️", "Human Preference ⬆️", "References"] | |
TYPES = ["markdown", "number", "number", "number", "str", "markdown"] | |
def get_leaderboard(): | |
from result import submission_results | |
all_data = [] | |
baseline_0 = { | |
"Method": '**Tune-A-Video**', | |
"CLIPScore (Frame Consistency) ⬆️":0.92, | |
"CLIPScore (Text Alignment) ⬆️":27.12, | |
"PickScore ⬆️":20.36, | |
"Human Preference ⬆️":'', | |
"References": ','.join([f'<a target="_blank" href="https://arxiv.org/abs/2212.11565" style="color: blue">Paper</a>', | |
f'<a target="_blank" href="https://github.com/showlab/Tune-A-Video" style="color: blue">Code</a>', | |
f'<a target="_blank" href="https://tuneavideo.github.io/" style="color: blue">Website</a>', | |
f'<a target="_blank" href="https://huggingface.co/spaces/Tune-A-Video-library/Tune-A-Video-inference" style="color: blue">Demo</a>']) | |
} | |
baseline_1 = { | |
"Method": 'VideoCrafter', | |
"CLIPScore (Frame Consistency) ⬆️":0.89, | |
"CLIPScore (Text Alignment) ⬆️":25.55, | |
"PickScore ⬆️":19.17, | |
"References": ','.join([f'<a target="_blank" href="https://github.com/VideoCrafter/VideoCrafter" style="color: blue">Code</a>', | |
f'<a target="_blank" href="https://huggingface.co/spaces/VideoCrafter/VideoCrafter" style="color: blue">Demo</a>']) | |
} | |
all_data += [baseline_0, baseline_1] | |
all_data += submission_results | |
dataframe = pd.DataFrame.from_records(all_data) | |
dataframe = dataframe.sort_values(by=['PickScore ⬆️'], ascending=False) | |
print(dataframe) | |
dataframe = dataframe[COLS] | |
return dataframe | |
leaderboard = get_leaderboard() | |
def refresh(): | |
return get_leaderboard() | |
def load_edited_video(source_video, *args): | |
result = source_video.split('/')[-1].split('.mp4')[0] + '-edit.mp4' | |
return os.path.join(os.path.dirname(__file__), f"files/{result}") | |
block = gr.Blocks() | |
with block: | |
with gr.Tab("Leaderboard"): | |
with gr.Row(): | |
gr.Markdown(f""" | |
# 🤗 LOVEU-TGVE @ CVPR 2023 Leaderboard | |
<font size="4"> | |
<b>Welcome to the <a href="https://sites.google.com/view/loveucvpr23/track4" target="_blank">Text-Guided Video Editing (TGVE)</a> competition leaderboard of <a href="https://sites.google.com/view/loveucvpr23/home" target="_blank">LOVEU Workshop @ CVPR 2023</a>!</b> | |
Leveraging AI for video editing has the potential to unleash creativity for artists across all skill levels. The rapidly-advancing field of Text-Guided Video Editing (TGVE) is here to address this challenge. Recent works in this field include <a href="https://tuneavideo.github.io/" target="_blank">Tune-A-Video</a>, <a href="https://research.runwayml.com/gen2" target="_blank">Gen-2</a>, and <a href="https://dreamix-video-editing.github.io/" target="_blank">Dreamix</a>. | |
In this competition track, we provide a standard set of videos and prompts. As a researcher, you will develop a model that takes a video and a prompt for how to edit it, and your model will produce an edited video. For instance, you might be given a video of “a man is surfing inside the barrel of a wave,” and your model will edit the video to “a man is surfing on a wave made of aurora borealis.” | |
During the competition, evaluation results performed against the following 3 automatic metrics will be displayed on the leaderboard: | |
- <a href="https://arxiv.org/abs/2103.00020" target="_blank">CLIPScore</a> (Frame Consistency) - the average cosine similarity between all pairs of CLIP image embeddings computed on all frames of output videos. | |
- <a href="https://arxiv.org/abs/2103.00020" target="_blank">CLIPScore</a> (Text Alignment) - the average CLIP score between all frames of output videos and corresponding edited prompts. | |
- <a href="https://arxiv.org/abs/2305.01569" target="_blank">PickScore</a> - the average PickScore between all frames of output videos. | |
After all submissions are uploaded, we will run a human-evaluation of all submitted videos. Specifically, we will have human labelers compare all submitted videos. Labelers will evaluate videos on the following criteria: | |
- Text alignment: How well does the generated video match the caption? | |
- Structure: How well does the generated video preserve the structure of the original video? | |
- Quality: Aesthetically, how good is this video? | |
We will choose a winner and a runner-up based on the human evaluation results. | |
</font> | |
The **bold** method name indicates that the implementation is **official** (by the author / developer of the original method).""") | |
with gr.Row(): | |
leaderboard_table = gr.components.Dataframe(value=leaderboard, headers=COLS, | |
datatype=TYPES, max_rows=10) | |
with gr.Row(): | |
refresh_button = gr.Button("Refresh") | |
refresh_button.click(refresh, inputs=[], outputs=[leaderboard_table]) | |
block.load(refresh, inputs=[], outputs=[leaderboard_table]) | |
with gr.Tab("Baseline Demo"): | |
with gr.Row(): | |
gr.Markdown(f"""Some examples generated by {BASELINE} are shown below.""") | |
with gr.Row(): | |
with gr.Column(): | |
source_video = gr.Video(type="file", label='Source Video', format="mp4", interactive=True) | |
source_prompt = gr.Textbox(label='Source Prompt', | |
# info='A good prompt describes each frame and most objects in video. Especially, it has the object or attribute that we want to edit or preserve.', | |
max_lines=2, | |
placeholder='Example: "A cat in the grass in the sun."', | |
# value='A cat in the grass in the sun.' | |
) | |
with gr.Column(): | |
result = gr.Video(type="file", label='Edited Video', format="mp4", interactive=True) | |
editing_prompt = gr.Textbox(label='Editing Prompt', | |
# info='A reasonable composition of video may achieve better results(e.g., "sunflower" video with "Van Gogh" prompt is better than "sunflower" with "Monet")', | |
max_lines=2, | |
placeholder='Example: "A dog in the grass in the sun."', | |
# value='A dog in the grass in the sun.' | |
) | |
with gr.Row(): | |
from example import examples | |
gr.Examples(examples=examples, | |
inputs=[source_video, source_prompt, editing_prompt], | |
outputs=result, | |
fn=load_edited_video, | |
cache_examples=True, | |
) | |
block.launch() |