thinh-huynh-re commited on
Commit
24611b8
1 Parent(s): 09e8ab4
Files changed (4) hide show
  1. .gitignore +4 -0
  2. app.py +109 -0
  3. requirements.txt +5 -0
  4. tmp/.gitkeep +0 -0
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ __pycache__
2
+ env
3
+ tmp/*
4
+ !tmp/.gitkeep
app.py ADDED
@@ -0,0 +1,109 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from typing import List, Tuple
3
+ import multiprocessing
4
+
5
+ import numpy as np
6
+ import pandas as pd
7
+ import streamlit as st
8
+ import torch
9
+ from torch import Tensor
10
+ from decord import VideoReader, cpu
11
+ from transformers import AutoFeatureExtractor, TimesformerForVideoClassification
12
+
13
+ np.random.seed(0)
14
+
15
+ st.set_page_config(
16
+ page_title="TimeSFormer",
17
+ page_icon="🧊",
18
+ layout="wide",
19
+ initial_sidebar_state="expanded",
20
+ menu_items={
21
+ "Get Help": "https://www.extremelycoolapp.com/help",
22
+ "Report a bug": "https://www.extremelycoolapp.com/bug",
23
+ "About": "# This is a header. This is an *extremely* cool app!",
24
+ },
25
+ )
26
+
27
+
28
+ def sample_frame_indices(
29
+ clip_len: int, frame_sample_rate: float, seg_len: int
30
+ ) -> np.ndarray:
31
+ converted_len = int(clip_len * frame_sample_rate)
32
+ end_idx = np.random.randint(converted_len, seg_len)
33
+ start_idx = end_idx - converted_len
34
+ indices = np.linspace(start_idx, end_idx, num=clip_len)
35
+ indices = np.clip(indices, start_idx, end_idx - 1).astype(np.int64)
36
+ return indices
37
+
38
+
39
+ @st.cache_resource
40
+ def load_model():
41
+ feature_extractor = AutoFeatureExtractor.from_pretrained(
42
+ "MCG-NJU/videomae-base-finetuned-kinetics"
43
+ )
44
+ model = TimesformerForVideoClassification.from_pretrained(
45
+ "facebook/timesformer-base-finetuned-k400"
46
+ )
47
+ return feature_extractor, model
48
+
49
+
50
+ feature_extractor, model = load_model()
51
+
52
+
53
+ def inference(file_path: str):
54
+ videoreader = VideoReader(VIDEO_TMP_PATH, num_threads=1, ctx=cpu(0))
55
+
56
+ # sample 8 frames
57
+ videoreader.seek(0)
58
+ indices = sample_frame_indices(
59
+ clip_len=8, frame_sample_rate=4, seg_len=len(videoreader)
60
+ )
61
+ video = videoreader.get_batch(indices).asnumpy()
62
+
63
+ inputs = feature_extractor(list(video), return_tensors="pt")
64
+
65
+ with torch.no_grad():
66
+ outputs = model(**inputs)
67
+ logits: Tensor = outputs.logits
68
+
69
+ # model predicts one of the 400 Kinetics-400 classes
70
+ predicted_label = logits.argmax(-1).item()
71
+ print(model.config.id2label[predicted_label])
72
+
73
+ TOP_K = 5
74
+ # logits = np.squeeze(logits)
75
+ logits = logits.squeeze().numpy()
76
+ indices = np.argsort(logits)[::-1][:TOP_K]
77
+ values = logits[indices]
78
+
79
+ results: List[Tuple[str, float]] = []
80
+ for index, value in zip(indices, values):
81
+ predicted_label = model.config.id2label[index]
82
+ print(f"Label: {predicted_label} - {value:.2f}%")
83
+ results.append((predicted_label, value))
84
+
85
+ return pd.DataFrame(results, columns=("Label", "Confidence"))
86
+
87
+
88
+ st.title("TimeSFormer")
89
+
90
+ with st.expander("INTRODUCTION"):
91
+ st.text(
92
+ f"""Streamlit demo for TimeSFormer.
93
+ Author: Hiep Phuoc Secondary High School
94
+ Number of CPU(s): {multiprocessing.cpu_count()}
95
+ """
96
+ )
97
+
98
+ VIDEO_TMP_PATH = os.path.join("tmp", "tmp.mp4")
99
+ uploadedfile = st.file_uploader("Upload file", type=["mp4"])
100
+
101
+ if uploadedfile is not None:
102
+ with st.spinner():
103
+ with open(VIDEO_TMP_PATH, "wb") as f:
104
+ f.write(uploadedfile.getbuffer())
105
+
106
+ with st.spinner("Processing..."):
107
+ df = inference(VIDEO_TMP_PATH)
108
+ st.dataframe(df)
109
+ st.video(VIDEO_TMP_PATH)
requirements.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ streamlit
2
+ transformers
3
+ torch
4
+ decord
5
+ black
tmp/.gitkeep ADDED
File without changes