Iskaj commited on
Commit
2935ca0
1 Parent(s): 39557de

add changepoint detection to app and figures

Browse files
Files changed (1) hide show
  1. app.py +64 -21
app.py CHANGED
@@ -22,6 +22,10 @@ import faiss
22
 
23
  import shutil
24
 
 
 
 
 
25
  FPS = 5
26
  MIN_DISTANCE = 4
27
  MAX_DISTANCE = 30
@@ -79,6 +83,8 @@ def compute_hashes(clip, fps=FPS):
79
  yield {"frame": 1+index*fps, "hash": hashed}
80
 
81
  def index_hashes_for_video(url, is_file = False):
 
 
82
  if not is_file:
83
  filename = download_video_from_url(url)
84
  else:
@@ -116,9 +122,8 @@ def get_video_indices(url, target, MIN_DISTANCE = 4):
116
  - MIN_DISTANCE: integer representing the minimum distance between hashes on bit-level before its considered a match
117
  """
118
  # TODO: Fix crash if no matches are found
119
- if url.endswith('dl=1'):
120
- is_file = False
121
- elif url.endswith('.mp4'):
122
  is_file = True
123
 
124
  # Url (short video)
@@ -132,6 +137,8 @@ def get_video_indices(url, target, MIN_DISTANCE = 4):
132
  return video_index, hash_vectors, target_indices
133
 
134
  def compare_videos(video_index, hash_vectors, target_indices, MIN_DISTANCE = 3): # , is_file = False):
 
 
135
  # The results are returned as a triplet of 1D arrays
136
  # lims, D, I, where result for query i is in I[lims[i]:lims[i+1]]
137
  # (indices of neighbors), D[lims[i]:lims[i+1]] (distances).
@@ -149,7 +156,9 @@ def get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE):
149
  nr_matches = len(D)
150
  logging.info(f"{(nr_matches/nr_source_frames) * 100.0:.1f}% of frames have a match for distance '{distance}' ({nr_matches} matches for {nr_source_frames} frames)")
151
  if nr_matches >= nr_source_frames:
152
- return distance
 
 
153
 
154
  def plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = 3):
155
  sns.set_theme()
@@ -185,16 +194,22 @@ def plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = 3):
185
  logging.basicConfig()
186
  logging.getLogger().setLevel(logging.INFO)
187
 
188
- def plot_multi_comparison(df):
189
- fig, ax_arr = plt.subplots(3, 2, figsize=(12, 6), dpi=100, sharex=True) # , ax=axes[1]
190
- # plt.scatter(x=df['TARGET_S'], y = df['SOURCE_S'], ax=ax_arr[0])
191
- # plt.scatter(x=df['TARGET_S'], y = df['SOURCE_S'], ax=ax_arr[1])
192
- sns.scatterplot(data = df, x='TARGET_S', y='SOURCE_S', ax=ax_arr[0,0])
193
- sns.lineplot(data = df, x='TARGET_S', y='SOURCE_LIP_S', ax=ax_arr[0,1])
194
- sns.scatterplot(data = df, x='TARGET_S', y='TIMESHIFT', ax=ax_arr[1,0])
195
- sns.lineplot(data = df, x='TARGET_S', y='TIMESHIFT_LIP', ax=ax_arr[1,1])
196
- sns.scatterplot(data = df, x='TARGET_S', y='OFFSET', ax=ax_arr[2,0])
197
- sns.lineplot(data = df, x='TARGET_S', y='OFFSET_LIP', ax=ax_arr[2,1])
 
 
 
 
 
 
198
  return fig
199
 
200
 
@@ -250,8 +265,27 @@ def get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False):
250
  # Add Offset col that assumes the video is played at the same speed as the other to do a "timeshift"
251
  df['OFFSET'] = df['SOURCE_S'] - df['TARGET_S'] - np.min(df['SOURCE_S'])
252
  df['OFFSET_LIP'] = df['SOURCE_LIP_S'] - df['TARGET_S'] - np.min(df['SOURCE_LIP_S'])
 
 
 
253
  return df
254
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
255
  def get_comparison(url, target, MIN_DISTANCE = 4):
256
  """ Function for Gradio to combine all helper functions"""
257
  video_index, hash_vectors, target_indices = get_video_indices(url, target, MIN_DISTANCE = MIN_DISTANCE)
@@ -259,34 +293,43 @@ def get_comparison(url, target, MIN_DISTANCE = 4):
259
  fig = plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = MIN_DISTANCE)
260
  return fig
261
 
262
- def get_auto_comparison(url, target, MIN_DISTANCE = MIN_DISTANCE):
263
  """ Function for Gradio to combine all helper functions"""
264
  distance = get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE)
 
 
265
  video_index, hash_vectors, target_indices = get_video_indices(url, target, MIN_DISTANCE = distance)
266
  lims, D, I, hash_vectors = compare_videos(video_index, hash_vectors, target_indices, MIN_DISTANCE = distance)
267
  # fig = plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = distance)
268
  df = get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False)
269
- fig = plot_multi_comparison(df)
 
270
  return fig
271
 
 
 
272
  video_urls = ["https://www.dropbox.com/s/8c89a9aba0w8gjg/Ploumen.mp4?dl=1",
273
  "https://www.dropbox.com/s/rzmicviu1fe740t/Bram%20van%20Ojik%20krijgt%20reprimande.mp4?dl=1",
274
  "https://www.dropbox.com/s/wcot34ldmb84071/Baudet%20ontmaskert%20Omtzigt_%20u%20bent%20door%20de%20mand%20gevallen%21.mp4?dl=1",
 
275
  "https://www.dropbox.com/s/4ognq8lshcujk43/Plenaire_zaal_20200923132426_Omtzigt.mp4?dl=1"]
276
 
277
  index_iface = gr.Interface(fn=lambda url: index_hashes_for_video(url).ntotal,
278
- inputs="text", outputs="text",
 
279
  examples=video_urls, cache_examples=True)
280
 
281
  compare_iface = gr.Interface(fn=get_comparison,
282
- inputs=["text", "text", gr.Slider(2, 30, 4, step=2)], outputs="plot",
 
283
  examples=[[x, video_urls[-1]] for x in video_urls[:-1]])
284
 
285
  auto_compare_iface = gr.Interface(fn=get_auto_comparison,
286
- inputs=["text", "text"], outputs="plot",
 
287
  examples=[[x, video_urls[-1]] for x in video_urls[:-1]])
288
 
289
- iface = gr.TabbedInterface([index_iface, compare_iface, auto_compare_iface], ["Index", "Compare", "AutoCompare"])
290
 
291
  if __name__ == "__main__":
292
  import matplotlib
@@ -295,5 +338,5 @@ if __name__ == "__main__":
295
  logging.basicConfig()
296
  logging.getLogger().setLevel(logging.INFO)
297
 
298
- iface.launch()
299
  #iface.launch(auth=("test", "test"), share=True, debug=True)
 
22
 
23
  import shutil
24
 
25
+ from kats.detectors.cusum_detection import CUSUMDetector
26
+ from kats.detectors.robust_stat_detection import RobustStatDetector
27
+ from kats.consts import TimeSeriesData
28
+
29
  FPS = 5
30
  MIN_DISTANCE = 4
31
  MAX_DISTANCE = 30
 
83
  yield {"frame": 1+index*fps, "hash": hashed}
84
 
85
  def index_hashes_for_video(url, is_file = False):
86
+ """ Download a video if it is a url, otherwise refer to the file. Secondly index the video
87
+ using faiss indices and return thi index. """
88
  if not is_file:
89
  filename = download_video_from_url(url)
90
  else:
 
122
  - MIN_DISTANCE: integer representing the minimum distance between hashes on bit-level before its considered a match
123
  """
124
  # TODO: Fix crash if no matches are found
125
+ is_file = False
126
+ if url.endswith('.mp4'):
 
127
  is_file = True
128
 
129
  # Url (short video)
 
137
  return video_index, hash_vectors, target_indices
138
 
139
  def compare_videos(video_index, hash_vectors, target_indices, MIN_DISTANCE = 3): # , is_file = False):
140
+ """ Search for matches between the indices of the target video (long video)
141
+ and the given hash vectors of a video"""
142
  # The results are returned as a triplet of 1D arrays
143
  # lims, D, I, where result for query i is in I[lims[i]:lims[i+1]]
144
  # (indices of neighbors), D[lims[i]:lims[i+1]] (distances).
 
156
  nr_matches = len(D)
157
  logging.info(f"{(nr_matches/nr_source_frames) * 100.0:.1f}% of frames have a match for distance '{distance}' ({nr_matches} matches for {nr_source_frames} frames)")
158
  if nr_matches >= nr_source_frames:
159
+ return distance
160
+ logging.warning(f"No matches found for any distance between {MIN_DISTANCE} and {MAX_DISTANCE}")
161
+ return None
162
 
163
  def plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = 3):
164
  sns.set_theme()
 
194
  logging.basicConfig()
195
  logging.getLogger().setLevel(logging.INFO)
196
 
197
+ def plot_multi_comparison(df, change_points):
198
+ """ From the dataframe plot the current set of plots, where the bottom right is most indicative """
199
+ fig, ax_arr = plt.subplots(3, 2, figsize=(12, 6), dpi=100, sharex=True)
200
+ sns.scatterplot(data = df, x='time', y='SOURCE_S', ax=ax_arr[0,0])
201
+ sns.lineplot(data = df, x='time', y='SOURCE_LIP_S', ax=ax_arr[0,1])
202
+ sns.scatterplot(data = df, x='time', y='OFFSET', ax=ax_arr[1,0])
203
+ sns.lineplot(data = df, x='time', y='OFFSET_LIP', ax=ax_arr[1,1])
204
+
205
+ # Plot change point as lines
206
+ sns.lineplot(data = df, x='time', y='OFFSET_LIP', ax=ax_arr[2,1])
207
+ for x in change_points:
208
+ cp_time = x.start_time
209
+ plt.vlines(x=cp_time, ymin=np.min(df['OFFSET_LIP']), ymax=np.max(df['OFFSET_LIP']), colors='red', lw=2)
210
+ rand_y_pos = np.random.uniform(low=np.min(df['OFFSET_LIP']), high=np.max(df['OFFSET_LIP']), size=None)
211
+ plt.text(x=cp_time, y=rand_y_pos, s=str(np.round(x.confidence, 2)), color='r', rotation=-0.0, fontsize=14)
212
+ plt.xticks(rotation=90)
213
  return fig
214
 
215
 
 
265
  # Add Offset col that assumes the video is played at the same speed as the other to do a "timeshift"
266
  df['OFFSET'] = df['SOURCE_S'] - df['TARGET_S'] - np.min(df['SOURCE_S'])
267
  df['OFFSET_LIP'] = df['SOURCE_LIP_S'] - df['TARGET_S'] - np.min(df['SOURCE_LIP_S'])
268
+
269
+ # Add time column for plotting
270
+ df['time'] = pd.to_datetime(df["TARGET_S"], unit='s') # Needs a datetime as input
271
  return df
272
 
273
+ def get_change_points(df, smoothing_window_size=10, method='CUSUM'):
274
+ tsd = TimeSeriesData(df.loc[:,['time','OFFSET_LIP']])
275
+ if method.upper() == "CUSUM":
276
+ detector = CUSUMDetector(tsd)
277
+ elif method.upper() == "ROBUSTSTAT":
278
+ detector = RobustStatDetector(tsd)
279
+ change_points = detector.detector(smoothing_window_size=smoothing_window_size, comparison_window=-2)
280
+
281
+ # Print some stats
282
+ if method.upper() == "CUSUM" and change_points != []:
283
+ mean_offset_prechange = change_points[0].mu0
284
+ mean_offset_postchange = change_points[0].mu1
285
+ jump_s = mean_offset_postchange - mean_offset_prechange
286
+ print(f"Video jumps {jump_s:.1f}s in time at {mean_offset_prechange:.1f} seconds")
287
+ return change_points
288
+
289
  def get_comparison(url, target, MIN_DISTANCE = 4):
290
  """ Function for Gradio to combine all helper functions"""
291
  video_index, hash_vectors, target_indices = get_video_indices(url, target, MIN_DISTANCE = MIN_DISTANCE)
 
293
  fig = plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = MIN_DISTANCE)
294
  return fig
295
 
296
+ def get_auto_comparison(url, target, smoothing_window_size=10, method="CUSUM"):
297
  """ Function for Gradio to combine all helper functions"""
298
  distance = get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE)
299
+ if distance == None:
300
+ raise gr.Error("No matches found!")
301
  video_index, hash_vectors, target_indices = get_video_indices(url, target, MIN_DISTANCE = distance)
302
  lims, D, I, hash_vectors = compare_videos(video_index, hash_vectors, target_indices, MIN_DISTANCE = distance)
303
  # fig = plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = distance)
304
  df = get_videomatch_df(url, target, min_distance=MIN_DISTANCE, vanilla_df=False)
305
+ change_points = get_change_points(df, smoothing_window_size=smoothing_window_size, method=method)
306
+ fig = plot_multi_comparison(df, change_points)
307
  return fig
308
 
309
+
310
+
311
  video_urls = ["https://www.dropbox.com/s/8c89a9aba0w8gjg/Ploumen.mp4?dl=1",
312
  "https://www.dropbox.com/s/rzmicviu1fe740t/Bram%20van%20Ojik%20krijgt%20reprimande.mp4?dl=1",
313
  "https://www.dropbox.com/s/wcot34ldmb84071/Baudet%20ontmaskert%20Omtzigt_%20u%20bent%20door%20de%20mand%20gevallen%21.mp4?dl=1",
314
+ "https://drive.google.com/uc?id=1XW0niHR1k09vPNv1cp6NvdGXe7FHJc1D&export=download",
315
  "https://www.dropbox.com/s/4ognq8lshcujk43/Plenaire_zaal_20200923132426_Omtzigt.mp4?dl=1"]
316
 
317
  index_iface = gr.Interface(fn=lambda url: index_hashes_for_video(url).ntotal,
318
+ inputs="text",
319
+ outputs="text",
320
  examples=video_urls, cache_examples=True)
321
 
322
  compare_iface = gr.Interface(fn=get_comparison,
323
+ inputs=["text", "text", gr.Slider(2, 30, 4, step=2)],
324
+ outputs="plot",
325
  examples=[[x, video_urls[-1]] for x in video_urls[:-1]])
326
 
327
  auto_compare_iface = gr.Interface(fn=get_auto_comparison,
328
+ inputs=["text", "text", gr.Slider(1, 50, 10, step=1), gr.Dropdown(choices=["CUSUM", "Robust"], value="CUSUM")],
329
+ outputs="plot",
330
  examples=[[x, video_urls[-1]] for x in video_urls[:-1]])
331
 
332
+ iface = gr.TabbedInterface([auto_compare_iface, compare_iface, index_iface,], ["AutoCompare", "Compare", "Index"])
333
 
334
  if __name__ == "__main__":
335
  import matplotlib
 
338
  logging.basicConfig()
339
  logging.getLogger().setLevel(logging.INFO)
340
 
341
+ iface.launch(inbrowser=True, debug=True)
342
  #iface.launch(auth=("test", "test"), share=True, debug=True)