thinh-huynh-re commited on
Commit
1bcf2a0
1 Parent(s): b47bdbb
Files changed (3) hide show
  1. app.py +20 -20
  2. capture_picture.py +20 -0
  3. camera.py → video.py +0 -0
app.py CHANGED
@@ -50,12 +50,14 @@ def load_model(model_name: str):
50
  return feature_extractor, model
51
 
52
 
53
- def read_video(file_path: str) -> np.ndarray:
54
  cap = cv2.VideoCapture(file_path)
55
  length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) # 1000 frames
56
  print("Number of frames", length)
57
 
58
- indices = sample_frame_indices(clip_len=8, frame_sample_rate=4, seg_len=length)
 
 
59
 
60
  frames: List[np.array] = []
61
  for i in indices:
@@ -83,8 +85,8 @@ def read_video_decord(file_path: str) -> np.ndarray:
83
  return video
84
 
85
 
86
- def inference(file_path: str):
87
- video = read_video(file_path)
88
 
89
  inputs = feature_extractor(list(video), return_tensors="pt")
90
 
@@ -111,6 +113,15 @@ def inference(file_path: str):
111
  return pd.DataFrame(results, columns=("Label", "Confidence"))
112
 
113
 
 
 
 
 
 
 
 
 
 
114
  st.title("TimeSFormer")
115
 
116
  with st.expander("INTRODUCTION"):
@@ -135,6 +146,10 @@ model_name = st.selectbox(
135
  )
136
  feature_extractor, model = load_model(model_name)
137
 
 
 
 
 
138
  VIDEO_TMP_PATH = os.path.join("tmp", "tmp.mp4")
139
  uploadedfile = st.file_uploader("Upload file", type=["mp4"])
140
 
@@ -146,23 +161,8 @@ if uploadedfile is not None:
146
 
147
  start_time = time.time()
148
  with st.spinner("Processing..."):
149
- df = inference(VIDEO_TMP_PATH)
150
  end_time = time.time()
151
  st.info(f"{end_time - start_time} seconds")
152
  st.dataframe(df)
153
  st.video(VIDEO_TMP_PATH)
154
-
155
- img_file_buffer = st.camera_input("Take a picture")
156
-
157
- if img_file_buffer is not None:
158
- # To read image file buffer with OpenCV:
159
- bytes_data = img_file_buffer.getvalue()
160
- cv2_img = cv2.imdecode(np.frombuffer(bytes_data, np.uint8), cv2.IMREAD_COLOR)
161
-
162
- # Check the type of cv2_img:
163
- # Should output: <class 'numpy.ndarray'>
164
- st.write(type(cv2_img))
165
-
166
- # Check the shape of cv2_img:
167
- # Should output shape: (height, width, channels)
168
- st.write(cv2_img.shape)
 
50
  return feature_extractor, model
51
 
52
 
53
+ def read_video(file_path: str, frames_per_video: int = 8) -> np.ndarray:
54
  cap = cv2.VideoCapture(file_path)
55
  length = int(cap.get(cv2.CAP_PROP_FRAME_COUNT)) # 1000 frames
56
  print("Number of frames", length)
57
 
58
+ indices = sample_frame_indices(
59
+ clip_len=frames_per_video, frame_sample_rate=4, seg_len=length
60
+ )
61
 
62
  frames: List[np.array] = []
63
  for i in indices:
 
85
  return video
86
 
87
 
88
+ def inference(file_path: str, frames_per_video: int = 8):
89
+ video = read_video(file_path, frames_per_video)
90
 
91
  inputs = feature_extractor(list(video), return_tensors="pt")
92
 
 
113
  return pd.DataFrame(results, columns=("Label", "Confidence"))
114
 
115
 
116
+ def get_frames_per_video(model_name: str) -> int:
117
+ if "base-finetuned" in model_name:
118
+ return 8
119
+ elif "hr-finetuned" in model_name:
120
+ return 16
121
+ else:
122
+ return 96
123
+
124
+
125
  st.title("TimeSFormer")
126
 
127
  with st.expander("INTRODUCTION"):
 
146
  )
147
  feature_extractor, model = load_model(model_name)
148
 
149
+ frames_per_video = get_frames_per_video(model_name)
150
+ st.info(f"Frames per video: {frames_per_video}")
151
+
152
+
153
  VIDEO_TMP_PATH = os.path.join("tmp", "tmp.mp4")
154
  uploadedfile = st.file_uploader("Upload file", type=["mp4"])
155
 
 
161
 
162
  start_time = time.time()
163
  with st.spinner("Processing..."):
164
+ df = inference(VIDEO_TMP_PATH, frames_per_video)
165
  end_time = time.time()
166
  st.info(f"{end_time - start_time} seconds")
167
  st.dataframe(df)
168
  st.video(VIDEO_TMP_PATH)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
capture_picture.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ import cv2
3
+ import numpy as np
4
+
5
+ img_file_buffer = st.camera_input("Take a picture")
6
+
7
+ if img_file_buffer is not None:
8
+ # To read image file buffer with OpenCV:
9
+ bytes_data = img_file_buffer.getvalue()
10
+ cv2_img: np.ndarray = cv2.imdecode(
11
+ np.frombuffer(bytes_data, np.uint8), cv2.IMREAD_COLOR
12
+ )
13
+
14
+ # Check the type of cv2_img:
15
+ # Should output: <class 'numpy.ndarray'>
16
+ st.write(type(cv2_img))
17
+
18
+ # Check the shape of cv2_img:
19
+ # Should output shape: (height, width, channels)
20
+ st.write(cv2_img.shape)
camera.py → video.py RENAMED
File without changes