Spaces:
Runtime error
Runtime error
import torch | |
import os | |
import gradio as gr | |
from video_transformers import VideoModel | |
from utils import ( | |
convert_frames_to_gif, | |
download_youtube_video, | |
sample_frames_from_video_file, | |
) | |
video_model = VideoModel.from_transformers("facebook/timesformer-base-finetuned-k400") | |
examples = [ | |
["https://www.youtube.com/watch?v=huAJ9dC5lmI"], | |
["https://www.youtube.com/watch?v=wvcWt6u5HTg"], | |
["https://www.youtube.com/watch?v=-3kZSi5qjRM"], | |
["https://www.youtube.com/watch?v=-6usjfP8hys"], | |
["https://www.youtube.com/watch?v=BDHub0gBGtc"], | |
["https://www.youtube.com/watch?v=B9ea7YyCP6E"], | |
["https://www.youtube.com/watch?v=BBkpaeJBKmk"], | |
["https://www.youtube.com/watch?v=BBqU8Apee_g"], | |
["https://www.youtube.com/watch?v=B8OdMwVwyXc"], | |
["https://www.youtube.com/watch?v=I7cwq6_4QtM"], | |
["https://www.youtube.com/watch?v=Z0mJDXpNhYA"], | |
["https://www.youtube.com/watch?v=QkQQjFGnZlg"], | |
["https://www.youtube.com/watch?v=IQaoRUQif14"], | |
] | |
def predict(youtube_url): | |
video_path = download_youtube_video(youtube_url) | |
frames = sample_frames_from_video_file(video_path, num_frames=16) | |
gif_path = convert_frames_to_gif(frames) | |
result = video_model.predict(video_or_folder_path=video_path) | |
os.remove(video_path) | |
return result["predictions"], gif_path | |
app = gr.Blocks() | |
with app: | |
gr.Markdown("# **<p align='center'>Video Classification with Timesformer</p>**") | |
gr.Markdown( | |
""" | |
<p style='text-align: center'> | |
Timesformer is a video model that uses a Transformer architecture to process video frames. | |
<br>It is released by Facebook AI Research in ICML 2021. | |
<br>This version is trained on Kinetics-400 dataset and can classify videos into 400 classes. | |
</p> | |
""" | |
) | |
gr.Markdown( | |
""" | |
<p style='text-align: center'> | |
Follow me for more! | |
<br> <a href='https://twitter.com/fcakyon' target='_blank'>twitter</a> | <a href='https://github.com/fcakyon' target='_blank'>github</a> | <a href='https://www.linkedin.com/in/fcakyon/' target='_blank'>linkedin</a> | <a href='https://fcakyon.medium.com/' target='_blank'>medium</a> | |
</p> | |
""" | |
) | |
with gr.Row(): | |
with gr.Column(): | |
gr.Markdown("Provide a Youtube video URL.") | |
youtube_url = gr.Textbox(label="Youtube URL:", show_label=True) | |
predict_btn = gr.Button(value="Predict") | |
with gr.Column(): | |
video_gif = gr.Image( | |
label="Input Clip", | |
show_label=True, | |
) | |
with gr.Column(): | |
predictions = gr.Label( | |
label="Predictions:", show_label=True, num_top_classes=5 | |
) | |
gr.Markdown("**Examples:**") | |
gr.Examples( | |
examples, | |
youtube_url, | |
[predictions, video_gif], | |
fn=predict, | |
cache_examples=True, | |
) | |
predict_btn.click(predict, inputs=youtube_url, outputs=[predictions, video_gif]) | |
gr.Markdown( | |
""" | |
\n Demo created by: <a href=\"https://github.com/fcakyon\">fcakyon</a> | |
<br> Based on this <a href=\"https://huggingface.co/docs/transformers/main/model_doc/timesformer">HuggingFace model</a> | |
""" | |
) | |
app.launch() | |