Iskaj commited on
Commit
0afaddb
1 Parent(s): 6715214

add docs to videomatch.py

Browse files
Files changed (1) hide show
  1. videomatch.py +104 -41
videomatch.py CHANGED
@@ -15,14 +15,17 @@ import pandas as pd
15
  from videohash import compute_hashes, filepath_from_url
16
  from config import FPS, MIN_DISTANCE, MAX_DISTANCE, ROLLING_WINDOW_SIZE
17
 
18
- # def get_target_urls(json_file='apb2022.json'):
19
- # """ Obtain target urls for the target videos of a json file containing .mp4 files """
20
- # with open('apb2022.json', "r") as json_file:
21
- # target_videos = json.load(json_file)
22
- # return [video['mp4'] for video in target_videos]
23
-
24
  def index_hashes_for_video(url: str) -> faiss.IndexBinaryIVF:
25
- """ Compute hashes of a video and index the video using faiss indices and return the index. """
 
 
 
 
 
 
 
 
 
26
  filepath = filepath_from_url(url)
27
  if os.path.exists(f'{filepath}.index'):
28
  logging.info(f"Loading indexed hashes from {filepath}.index")
@@ -30,59 +33,90 @@ def index_hashes_for_video(url: str) -> faiss.IndexBinaryIVF:
30
  logging.info(f"Index {filepath}.index has in total {binary_index.ntotal} frames")
31
  return binary_index
32
 
 
33
  hash_vectors = np.array([x['hash'] for x in compute_hashes(url)])
34
  logging.info(f"Computed hashes for {hash_vectors.shape} frames.")
35
 
36
  # Initializing the quantizer.
37
  quantizer = faiss.IndexBinaryFlat(hash_vectors.shape[1]*8)
 
38
  # Initializing index.
39
  index = faiss.IndexBinaryIVF(quantizer, hash_vectors.shape[1]*8, min(16, hash_vectors.shape[0]))
40
- index.nprobe = 1 # Number of nearest clusters to be searched per query.
41
- # Training the quantizer.
 
42
  index.train(hash_vectors)
43
- #index = faiss.IndexBinaryFlat(64)
44
  index.add(hash_vectors)
45
  faiss.write_index_binary(index, f'{filepath}.index')
46
  logging.info(f"Indexed hashes for {index.ntotal} frames to {filepath}.index.")
 
47
  return index
48
 
49
  def get_video_index(url: str):
50
  """" Builds up a FAISS index for a video.
51
- args:
52
- - filepath: location of the source video
 
 
 
 
 
 
53
  """
54
- # Url (short video)
55
  video_index = index_hashes_for_video(url)
56
- video_index.make_direct_map() # Make sure the index is indexable
57
- hash_vectors = np.array([video_index.reconstruct(i) for i in range(video_index.ntotal)]) # Retrieve original indices
58
 
 
 
 
 
 
59
  return video_index, hash_vectors
60
 
61
  def compare_videos(hash_vectors, target_index, MIN_DISTANCE = 3):
62
  """ The comparison between the target and the original video will be plotted based
63
  on the matches between the target and the original video over time. The matches are determined
64
  based on the minimum distance between hashes (as computed by faiss-vectors) before they're considered a match.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
65
  """
66
- # The results are returned as a triplet of 1D arrays
67
- # lims, D, I, where result for query i is in I[lims[i]:lims[i+1]]
68
- # (indices of neighbors), D[lims[i]:lims[i+1]] (distances).
69
  lims, D, I = target_index.range_search(hash_vectors, MIN_DISTANCE)
70
  return lims, D, I, hash_vectors
71
 
72
  def get_decent_distance(video_index, hash_vectors, target_index, MIN_DISTANCE, MAX_DISTANCE):
73
  """ To get a decent heurstic for a base distance check every distance from MIN_DISTANCE to MAX_DISTANCE
74
- until the number of matches found is equal to or higher than the number of frames in the source video
 
 
75
 
76
- args:
77
- - video_index: The index of the source video
78
- - hash_vectors: The hash vectors of the target video
79
- - target_index: The index of the target video
80
- """
 
 
 
 
 
 
 
81
  for distance in np.arange(start = MIN_DISTANCE - 2, stop = MAX_DISTANCE + 2, step = 2, dtype=int):
82
- distance = int(distance)
83
- # --- Previously ---
84
- # video_index, hash_vectors = get_video_index(filepath)
85
- # target_index, _ = get_video_index(target)
86
  _, D, _, _ = compare_videos(hash_vectors, target_index, MIN_DISTANCE = distance)
87
  nr_source_frames = video_index.ntotal
88
  nr_matches = len(D)
@@ -91,36 +125,64 @@ def get_decent_distance(video_index, hash_vectors, target_index, MIN_DISTANCE, M
91
  if nr_matches >= nr_source_frames:
92
  return distance
93
  logging.warning(f"No matches found for any distance between {MIN_DISTANCE} and {MAX_DISTANCE}")
 
94
  return None
95
 
96
  def get_change_points(df, smoothing_window_size=10, method='ROBUST', metric="ROLL_OFFSET_MODE"):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  tsd = TimeSeriesData(df.loc[:,['time', metric]])
 
 
98
  if method.upper() == "CUSUM":
99
  detector = CUSUMDetector(tsd)
100
  elif method.upper() == "ROBUST":
101
  detector = RobustStatDetector(tsd)
102
  change_points = detector.detector(smoothing_window_size=smoothing_window_size, comparison_window=-2)
103
 
104
- # Print some stats
105
  if method.upper() == "CUSUM" and change_points != []:
106
  mean_offset_prechange = change_points[0].mu0
107
  mean_offset_postchange = change_points[0].mu1
108
  jump_s = mean_offset_postchange - mean_offset_prechange
109
- print(f"Video jumps {jump_s:.1f}s in time at {mean_offset_prechange:.1f} seconds")
 
110
  return change_points
111
 
112
- def get_videomatch_df(lims, D, I, hash_vectors, distance, min_distance=MIN_DISTANCE, window_size=ROLLING_WINDOW_SIZE, vanilla_df=False):
113
- # --- Previously ---
114
- # distance = get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE)
115
- # _, hash_vectors = get_video_index(url)
116
- # target_index, _ = get_video_index(target)
117
- # lims, D, I, hash_vectors = compare_videos(hash_vectors, target_index, MIN_DISTANCE = distance)
 
 
 
 
 
118
 
 
 
 
 
 
119
  target = [(lims[i+1]-lims[i]) * [i] for i in range(hash_vectors.shape[0])]
120
  target_s = [i/FPS for j in target for i in j]
121
  source_s = [i/FPS for i in I]
122
 
123
- # Make df
124
  df = pd.DataFrame(zip(target_s, source_s, D, I), columns = ['TARGET_S', 'SOURCE_S', 'DISTANCE', 'INDICES'])
125
  if vanilla_df:
126
  return df
@@ -129,7 +191,7 @@ def get_videomatch_df(lims, D, I, hash_vectors, distance, min_distance=MIN_DISTA
129
  df['TARGET_WEIGHT'] = 1 - df['DISTANCE']/distance # Higher value means a better match
130
  df['SOURCE_WEIGHTED_VALUE'] = df['SOURCE_S'] * df['TARGET_WEIGHT'] # Multiply the weight (which indicates a better match) with the value for Y and aggregate to get a less noisy estimate of Y
131
 
132
- # Group by X so for every second/x there will be 1 value of Y in the end
133
  grouped_X = df.groupby('TARGET_S').agg({'SOURCE_WEIGHTED_VALUE' : 'sum', 'TARGET_WEIGHT' : 'sum'})
134
  grouped_X['FINAL_SOURCE_VALUE'] = grouped_X['SOURCE_WEIGHTED_VALUE'] / grouped_X['TARGET_WEIGHT']
135
 
@@ -138,7 +200,7 @@ def get_videomatch_df(lims, D, I, hash_vectors, distance, min_distance=MIN_DISTA
138
  df = df.drop(columns=['SOURCE_WEIGHTED_VALUE', 'TARGET_WEIGHT'])
139
  df = df.rename({'FINAL_SOURCE_VALUE' : 'SOURCE_S'}, axis='columns')
140
 
141
- # Add NAN to "missing" x values (base it off hash vector, not target_s)
142
  step_size = 1/FPS
143
  x_complete = np.round(np.arange(start=0.0, stop = max(df['TARGET_S'])+step_size, step = step_size), 1) # More robust
144
  df['TARGET_S'] = np.round(df['TARGET_S'], 1)
@@ -150,11 +212,11 @@ def get_videomatch_df(lims, D, I, hash_vectors, distance, min_distance=MIN_DISTA
150
  # Interpolate between frames since there are missing values
151
  df['SOURCE_LIP_S'] = df['SOURCE_S'].interpolate(method='linear', limit_direction='both', axis=0)
152
 
153
- # Add timeshift col and timeshift col with Linearly Interpolated Values
154
  df['TIMESHIFT'] = df['SOURCE_S'].shift(1) - df['SOURCE_S']
155
  df['TIMESHIFT_LIP'] = df['SOURCE_LIP_S'].shift(1) - df['SOURCE_LIP_S']
156
 
157
- # Add Offset col that assumes the video is played at the same speed as the other to do a "timeshift"
158
  df['OFFSET'] = df['SOURCE_S'] - df['TARGET_S'] - np.min(df['SOURCE_S'])
159
  df['OFFSET_LIP'] = df['SOURCE_LIP_S'] - df['TARGET_S'] - np.min(df['SOURCE_LIP_S'])
160
 
@@ -163,4 +225,5 @@ def get_videomatch_df(lims, D, I, hash_vectors, distance, min_distance=MIN_DISTA
163
 
164
  # Add time column for plotting
165
  df['time'] = pd.to_datetime(df["TARGET_S"], unit='s') # Needs a datetime as input
 
166
  return df
 
15
  from videohash import compute_hashes, filepath_from_url
16
  from config import FPS, MIN_DISTANCE, MAX_DISTANCE, ROLLING_WINDOW_SIZE
17
 
 
 
 
 
 
 
18
  def index_hashes_for_video(url: str) -> faiss.IndexBinaryIVF:
19
+ """ Compute hashes of a video and index the video using faiss indices and return the index.
20
+
21
+ Args:
22
+ url (str): url to to compute hashes for and index.
23
+
24
+ Returns:
25
+ index (IndexBinaryIVF): an abstract structure for a FAISS-based binary index of the hashes.
26
+
27
+ """
28
+ # If the url already had indices created, fetch those.
29
  filepath = filepath_from_url(url)
30
  if os.path.exists(f'{filepath}.index'):
31
  logging.info(f"Loading indexed hashes from {filepath}.index")
 
33
  logging.info(f"Index {filepath}.index has in total {binary_index.ntotal} frames")
34
  return binary_index
35
 
36
+ # Create hash vectors for url by looping over hashes from the video.
37
  hash_vectors = np.array([x['hash'] for x in compute_hashes(url)])
38
  logging.info(f"Computed hashes for {hash_vectors.shape} frames.")
39
 
40
  # Initializing the quantizer.
41
  quantizer = faiss.IndexBinaryFlat(hash_vectors.shape[1]*8)
42
+
43
  # Initializing index.
44
  index = faiss.IndexBinaryIVF(quantizer, hash_vectors.shape[1]*8, min(16, hash_vectors.shape[0]))
45
+ index.nprobe = 1 # Nr of nearest clusters to be searched per query.
46
+
47
+ # Training and write the quantizer.
48
  index.train(hash_vectors)
 
49
  index.add(hash_vectors)
50
  faiss.write_index_binary(index, f'{filepath}.index')
51
  logging.info(f"Indexed hashes for {index.ntotal} frames to {filepath}.index.")
52
+
53
  return index
54
 
55
  def get_video_index(url: str):
56
  """" Builds up a FAISS index for a video.
57
+
58
+ Args:
59
+ filepath (str): Location of the source video (video that is to be indexed)
60
+
61
+ Returns:
62
+ video_index (IndexBinaryIVF): an abstract structure for a FAISS-based binary index of the hashes.
63
+ hash_vectors (ndarray): vector of the indexed frames that can be searched
64
+
65
  """
 
66
  video_index = index_hashes_for_video(url)
 
 
67
 
68
+ # Make sure the index is indexable
69
+ video_index.make_direct_map()
70
+
71
+ # Retrieve original indices
72
+ hash_vectors = np.array([video_index.reconstruct(i) for i in range(video_index.ntotal)])
73
  return video_index, hash_vectors
74
 
75
  def compare_videos(hash_vectors, target_index, MIN_DISTANCE = 3):
76
  """ The comparison between the target and the original video will be plotted based
77
  on the matches between the target and the original video over time. The matches are determined
78
  based on the minimum distance between hashes (as computed by faiss-vectors) before they're considered a match.
79
+
80
+ The results are returned as a triplet of 1D arrays:
81
+ lims, D, I, where result for query i is in I[lims[i]:lims[i+1]]
82
+ (indices of neighbors), D[lims[i]:lims[i+1]] (distances).
83
+ (See: https://github.com/facebookresearch/faiss/wiki/Special-operations-on-indexes)
84
+
85
+ Args:
86
+ hash_vectors (ndarray): vector of the indexed frames that can be searched.
87
+ target_index (IndexBinaryIVF): an abstract structure for a FAISS-based binary index of the hashes.
88
+ MIN_DISTANCE (int): minium distance for a match
89
+
90
+ Returns:
91
+ lims (ndarray): from where to where in I and D the result for query i is
92
+ D (ndarray): distances of the vectors within a radius around the query point
93
+ I (ndarray): indices of the neighbours
94
+ hash_vectors (ndarray): vector of the indexed frames that can be searched.
95
+
96
  """
 
 
 
97
  lims, D, I = target_index.range_search(hash_vectors, MIN_DISTANCE)
98
  return lims, D, I, hash_vectors
99
 
100
  def get_decent_distance(video_index, hash_vectors, target_index, MIN_DISTANCE, MAX_DISTANCE):
101
  """ To get a decent heurstic for a base distance check every distance from MIN_DISTANCE to MAX_DISTANCE
102
+ until the number of matches found is equal to or higher than the number of frames in the source video.
103
+ If the number of matches with a certain video is larger than the amount of frames, we set the distance heuristic.
104
+ This was emperically determined to be a decent heuristic to find the distance heuristic
105
 
106
+ Args:
107
+ video_index (IndexBinaryIVF): The index of the source video
108
+ hash_vectors (ndarray): The hash vectors of the target video
109
+ target_index (IndexBinaryIVF): The index of the target video
110
+ MIN_DISTANCE (int): Minimum distance between vectors to be considered a match.
111
+ MAX_DISTANCE (int): Maximum distance between vectors to prevent bad matches.
112
+
113
+ Returns:
114
+ None if not distance is found, otherwise an integer representing the heuristic distance value.
115
+
116
+ """
117
+ # Go over every distance with a step size of 2, since the distance increases/decreases with that step size
118
  for distance in np.arange(start = MIN_DISTANCE - 2, stop = MAX_DISTANCE + 2, step = 2, dtype=int):
119
+ distance = int(distance) # Cast for safety
 
 
 
120
  _, D, _, _ = compare_videos(hash_vectors, target_index, MIN_DISTANCE = distance)
121
  nr_source_frames = video_index.ntotal
122
  nr_matches = len(D)
 
125
  if nr_matches >= nr_source_frames:
126
  return distance
127
  logging.warning(f"No matches found for any distance between {MIN_DISTANCE} and {MAX_DISTANCE}")
128
+
129
  return None
130
 
131
  def get_change_points(df, smoothing_window_size=10, method='ROBUST', metric="ROLL_OFFSET_MODE"):
132
+ """Using https://github.com/facebookresearch/Kats to analyze the data to find points where the metric
133
+ changes.
134
+
135
+ Args:
136
+ df (DataFrame): Dataframe holding the information between the matching of two videos
137
+ smoothing_window_size (int): Smoothing window for the timeseries analysis. Defaults to 10.
138
+ method (str): Method for the timeseries analyis. Defaults to 'ROBUST'.
139
+ metric (str): Main reporting metric for the timeseries analysis. Defaults to "ROLL_OFFSET_MODE".
140
+
141
+ Returns:
142
+ change_points [TimeSeriesChangePoint]: Array of time series change point objects.
143
+
144
+ """
145
+ # Convert the df to how kats wants it
146
  tsd = TimeSeriesData(df.loc[:,['time', metric]])
147
+
148
+ # Depending on the method get the change points
149
  if method.upper() == "CUSUM":
150
  detector = CUSUMDetector(tsd)
151
  elif method.upper() == "ROBUST":
152
  detector = RobustStatDetector(tsd)
153
  change_points = detector.detector(smoothing_window_size=smoothing_window_size, comparison_window=-2)
154
 
155
+ # Log some statistics
156
  if method.upper() == "CUSUM" and change_points != []:
157
  mean_offset_prechange = change_points[0].mu0
158
  mean_offset_postchange = change_points[0].mu1
159
  jump_s = mean_offset_postchange - mean_offset_prechange
160
+ logging.info(f"Video jumps {jump_s:.1f}s in time at {mean_offset_prechange:.1f} seconds")
161
+
162
  return change_points
163
 
164
+ def get_videomatch_df(lims, D, I, hash_vectors, distance, window_size=ROLLING_WINDOW_SIZE, vanilla_df=False):
165
+ """Get the dataframe holding all information of the comparison between two videos.
166
+
167
+ Args:
168
+ lims (ndarray): from where to where in I and D the result for query i is
169
+ D (ndarray): distances of the vectors within a radius around the query point
170
+ I (ndarray): indices of the neighbours
171
+ hash_vectors (ndarray): vector of the indexed frames that can be searched.
172
+ distance (int): heuristic distance to use for the search for most accurate matches.
173
+ window_size (int): Rolling window size that is used when calculating the mode. Defaults to ROLLING_WINDOW_SIZE.
174
+ vanilla_df: Toggle for returning other baseline dataframe. Defaults to False.
175
 
176
+ Returns:
177
+ df (DataFrame): Dataframe with extra information added about decision making regarding the match between videos.
178
+
179
+ """
180
+ # Get match locations in seconds
181
  target = [(lims[i+1]-lims[i]) * [i] for i in range(hash_vectors.shape[0])]
182
  target_s = [i/FPS for j in target for i in j]
183
  source_s = [i/FPS for i in I]
184
 
185
+ # Make dataframe
186
  df = pd.DataFrame(zip(target_s, source_s, D, I), columns = ['TARGET_S', 'SOURCE_S', 'DISTANCE', 'INDICES'])
187
  if vanilla_df:
188
  return df
 
191
  df['TARGET_WEIGHT'] = 1 - df['DISTANCE']/distance # Higher value means a better match
192
  df['SOURCE_WEIGHTED_VALUE'] = df['SOURCE_S'] * df['TARGET_WEIGHT'] # Multiply the weight (which indicates a better match) with the value for Y and aggregate to get a less noisy estimate of Y
193
 
194
+ # Group by X so for every second/x there will be 1 source value in the end
195
  grouped_X = df.groupby('TARGET_S').agg({'SOURCE_WEIGHTED_VALUE' : 'sum', 'TARGET_WEIGHT' : 'sum'})
196
  grouped_X['FINAL_SOURCE_VALUE'] = grouped_X['SOURCE_WEIGHTED_VALUE'] / grouped_X['TARGET_WEIGHT']
197
 
 
200
  df = df.drop(columns=['SOURCE_WEIGHTED_VALUE', 'TARGET_WEIGHT'])
201
  df = df.rename({'FINAL_SOURCE_VALUE' : 'SOURCE_S'}, axis='columns')
202
 
203
+ # Add NAN to "missing" x values
204
  step_size = 1/FPS
205
  x_complete = np.round(np.arange(start=0.0, stop = max(df['TARGET_S'])+step_size, step = step_size), 1) # More robust
206
  df['TARGET_S'] = np.round(df['TARGET_S'], 1)
 
212
  # Interpolate between frames since there are missing values
213
  df['SOURCE_LIP_S'] = df['SOURCE_S'].interpolate(method='linear', limit_direction='both', axis=0)
214
 
215
+ # Add timeshift col and timeshift col with Linearly Interpolated Values (LIP)
216
  df['TIMESHIFT'] = df['SOURCE_S'].shift(1) - df['SOURCE_S']
217
  df['TIMESHIFT_LIP'] = df['SOURCE_LIP_S'].shift(1) - df['SOURCE_LIP_S']
218
 
219
+ # Add offset col that assumes the video is played at the same speed as the other to do a "timeshift"
220
  df['OFFSET'] = df['SOURCE_S'] - df['TARGET_S'] - np.min(df['SOURCE_S'])
221
  df['OFFSET_LIP'] = df['SOURCE_LIP_S'] - df['TARGET_S'] - np.min(df['SOURCE_LIP_S'])
222
 
 
225
 
226
  # Add time column for plotting
227
  df['time'] = pd.to_datetime(df["TARGET_S"], unit='s') # Needs a datetime as input
228
+
229
  return df