import numpy as np import av import torch # from transformers.models.auto import AutoImageProcessor, AutoModelForVideoClassification from transformers import AutoImageProcessor, AutoModelForVideoClassification import streamlit as st def read_video_pyav(container, indices): ''' Decode the video with PyAV decoder. Args: container (`av.container.input.InputContainer`): PyAV container. indices (`List[int]`): List of frame indices to decode. Returns: result (np.ndarray): np array of decoded frames of shape (num_frames, height, width, 3). ''' frames = [] container.seek(0) start_index = indices[0] end_index = indices[-1] for i, frame in enumerate(container.decode(video=0)): if i > end_index: break if i >= start_index and i in indices: frames.append(frame) return np.stack([x.to_ndarray(format="rgb24") for x in frames]) def sample_frame_indices(clip_len, frame_sample_rate, seg_len): ''' Sample a given number of frame indices from the video. Args: clip_len (`int`): Total number of frames to sample. frame_sample_rate (`int`): Sample every n-th frame. seg_len (`int`): Maximum allowed index of sample's last frame. Returns: indices (`List[int]`): List of sampled frame indices ''' converted_len = int(clip_len * frame_sample_rate) end_idx = np.random.randint(converted_len, seg_len) start_idx = end_idx - converted_len indices = np.linspace(start_idx, end_idx, num=clip_len) indices = np.clip(indices, start_idx, end_idx - 1).astype(np.int64) return indices def classify(file): container = av.open(file) # sample 16 frames indices = sample_frame_indices(clip_len=16, frame_sample_rate=4, seg_len=container.streams.video[0].frames) video = read_video_pyav(container, indices) if container.streams.video[0].frames < 16: return 'Video trop courte' inputs = image_processor(list(video), return_tensors="pt") with torch.no_grad(): outputs = model(**inputs) logits = outputs.logits # model predicts one of the 400 Kinetics-400 classes predicted_label = logits.argmax(-1).item() print(model.config.id2label[predicted_label]) return model.config.id2label[predicted_label] model_ckpt = '2nzi/videomae-surf-analytics' # pipe = pipeline("video-classification", model="2nzi/videomae-surf-analytics") image_processor = AutoImageProcessor.from_pretrained(model_ckpt) model = AutoModelForVideoClassification.from_pretrained(model_ckpt) st.subheader("Surf Analytics") st.markdown(""" Bienvenue sur le projet Surf Analytics réalisé par Walid, Guillaume, Valentine, et Antoine. @Surf-Analytics-Github. """, unsafe_allow_html=True) st.title("Surf Maneuver Classification") uploaded_file = st.file_uploader("Upload a video file", type=["mp4", "avi", "mov"]) if uploaded_file is not None: video_bytes = uploaded_file.read() st.video(video_bytes) predicted_label = classify(uploaded_file) st.success(f"Predicted Label: {predicted_label}")