Spaces:

HiepPhuocSS
/

TimeSFormer

Runtime error

App Files Files Community

thinh-huynh-re commited on Mar 12, 2023

Commit

24611b8

1 Parent(s): 09e8ab4

Init

Browse files

Files changed (4) hide show

.gitignore +4 -0
app.py +109 -0
requirements.txt +5 -0
tmp/.gitkeep +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,4 @@

+__pycache__
+env
+tmp/*
+!tmp/.gitkeep

app.py ADDED Viewed

	@@ -0,0 +1,109 @@

+import os
+from typing import List, Tuple
+import multiprocessing
+import numpy as np
+import pandas as pd
+import streamlit as st
+import torch
+from torch import Tensor
+from decord import VideoReader, cpu
+from transformers import AutoFeatureExtractor, TimesformerForVideoClassification
+np.random.seed(0)
+st.set_page_config(
+    page_title="TimeSFormer",
+    page_icon="🧊",
+    layout="wide",
+    initial_sidebar_state="expanded",
+    menu_items={
+        "Get Help": "https://www.extremelycoolapp.com/help",
+        "Report a bug": "https://www.extremelycoolapp.com/bug",
+        "About": "# This is a header. This is an *extremely* cool app!",
+    },
+)
+def sample_frame_indices(
+    clip_len: int, frame_sample_rate: float, seg_len: int
+) -> np.ndarray:
+    converted_len = int(clip_len * frame_sample_rate)
+    end_idx = np.random.randint(converted_len, seg_len)
+    start_idx = end_idx - converted_len
+    indices = np.linspace(start_idx, end_idx, num=clip_len)
+    indices = np.clip(indices, start_idx, end_idx - 1).astype(np.int64)
+    return indices
+@st.cache_resource
+def load_model():
+    feature_extractor = AutoFeatureExtractor.from_pretrained(
+        "MCG-NJU/videomae-base-finetuned-kinetics"
+    )
+    model = TimesformerForVideoClassification.from_pretrained(
+        "facebook/timesformer-base-finetuned-k400"
+    )
+    return feature_extractor, model
+feature_extractor, model = load_model()
+def inference(file_path: str):
+    videoreader = VideoReader(VIDEO_TMP_PATH, num_threads=1, ctx=cpu(0))
+    # sample 8 frames
+    videoreader.seek(0)
+    indices = sample_frame_indices(
+        clip_len=8, frame_sample_rate=4, seg_len=len(videoreader)
+    )
+    video = videoreader.get_batch(indices).asnumpy()
+    inputs = feature_extractor(list(video), return_tensors="pt")
+    with torch.no_grad():
+        outputs = model(**inputs)
+        logits: Tensor = outputs.logits
+    # model predicts one of the 400 Kinetics-400 classes
+    predicted_label = logits.argmax(-1).item()
+    print(model.config.id2label[predicted_label])
+    TOP_K = 5
+    # logits = np.squeeze(logits)
+    logits = logits.squeeze().numpy()
+    indices = np.argsort(logits)[::-1][:TOP_K]
+    values = logits[indices]
+    results: List[Tuple[str, float]] = []
+    for index, value in zip(indices, values):
+        predicted_label = model.config.id2label[index]
+        print(f"Label: {predicted_label} - {value:.2f}%")
+        results.append((predicted_label, value))
+    return pd.DataFrame(results, columns=("Label", "Confidence"))
+st.title("TimeSFormer")
+with st.expander("INTRODUCTION"):
+    st.text(
+        f"""Streamlit demo for TimeSFormer.
+        Author: Hiep Phuoc Secondary High School
+        Number of CPU(s): {multiprocessing.cpu_count()}
+    """
+    )
+VIDEO_TMP_PATH = os.path.join("tmp", "tmp.mp4")
+uploadedfile = st.file_uploader("Upload file", type=["mp4"])
+if uploadedfile is not None:
+    with st.spinner():
+        with open(VIDEO_TMP_PATH, "wb") as f:
+            f.write(uploadedfile.getbuffer())
+    with st.spinner("Processing..."):
+        df = inference(VIDEO_TMP_PATH)
+    st.dataframe(df)
+    st.video(VIDEO_TMP_PATH)

requirements.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+streamlit
+transformers
+torch
+decord
+black

tmp/.gitkeep ADDED Viewed

File without changes