Spaces:
Sleeping
Sleeping
import numpy as np | |
import av | |
import torch | |
# from transformers.models.auto import AutoImageProcessor, AutoModelForVideoClassification | |
from transformers import AutoImageProcessor, AutoModelForVideoClassification | |
import streamlit as st | |
def read_video_pyav(container, indices): | |
''' | |
Decode the video with PyAV decoder. | |
Args: | |
container (`av.container.input.InputContainer`): PyAV container. | |
indices (`List[int]`): List of frame indices to decode. | |
Returns: | |
result (np.ndarray): np array of decoded frames of shape (num_frames, height, width, 3). | |
''' | |
frames = [] | |
container.seek(0) | |
start_index = indices[0] | |
end_index = indices[-1] | |
for i, frame in enumerate(container.decode(video=0)): | |
if i > end_index: | |
break | |
if i >= start_index and i in indices: | |
frames.append(frame) | |
return np.stack([x.to_ndarray(format="rgb24") for x in frames]) | |
def sample_frame_indices(clip_len, frame_sample_rate, seg_len): | |
''' | |
Sample a given number of frame indices from the video. | |
Args: | |
clip_len (`int`): Total number of frames to sample. | |
frame_sample_rate (`int`): Sample every n-th frame. | |
seg_len (`int`): Maximum allowed index of sample's last frame. | |
Returns: | |
indices (`List[int]`): List of sampled frame indices | |
''' | |
converted_len = int(clip_len * frame_sample_rate) | |
end_idx = np.random.randint(converted_len, seg_len) | |
start_idx = end_idx - converted_len | |
indices = np.linspace(start_idx, end_idx, num=clip_len) | |
indices = np.clip(indices, start_idx, end_idx - 1).astype(np.int64) | |
return indices | |
def classify(file): | |
container = av.open(file) | |
# sample 16 frames | |
indices = sample_frame_indices(clip_len=16, frame_sample_rate=4, seg_len=container.streams.video[0].frames) | |
video = read_video_pyav(container, indices) | |
if container.streams.video[0].frames < 16: | |
return 'Video trop courte' | |
inputs = image_processor(list(video), return_tensors="pt") | |
with torch.no_grad(): | |
outputs = model(**inputs) | |
logits = outputs.logits | |
# model predicts one of the 400 Kinetics-400 classes | |
predicted_label = logits.argmax(-1).item() | |
print(model.config.id2label[predicted_label]) | |
return model.config.id2label[predicted_label] | |
model_ckpt = '2nzi/videomae-surf-analytics' | |
# pipe = pipeline("video-classification", model="2nzi/videomae-surf-analytics") | |
image_processor = AutoImageProcessor.from_pretrained(model_ckpt) | |
model = AutoModelForVideoClassification.from_pretrained(model_ckpt) | |
st.subheader("Surf Analytics") | |
st.markdown(""" | |
Bienvenue sur le projet Surf Analytics réalisé par Walid, Guillaume, Valentine, et Antoine. | |
<a href="https://github.com/2nzi/M09-FinalProject-Surf-Analytics" style="text-decoration: none;">@Surf-Analytics-Github</a>. | |
""", unsafe_allow_html=True) | |
st.title("Surf Maneuver Classification") | |
uploaded_file = st.file_uploader("Upload a video file", type=["mp4", "avi", "mov"]) | |
if uploaded_file is not None: | |
video_bytes = uploaded_file.read() | |
st.video(video_bytes) | |
predicted_label = classify(uploaded_file) | |
st.success(f"Predicted Label: {predicted_label}") |