File size: 8,213 Bytes
7971a7a
 
1991773
7971a7a
 
 
 
 
 
9061a2e
 
b9cd4c4
 
7971a7a
 
9061a2e
7971a7a
6608d1c
 
 
 
 
1991773
7971a7a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0112deb
 
7971a7a
0112deb
7971a7a
 
0112deb
7971a7a
 
 
0112deb
7971a7a
0112deb
 
 
 
 
7971a7a
 
 
0112deb
 
7971a7a
9807395
7971a7a
9807395
 
 
 
 
 
 
7971a7a
 
9807395
 
 
 
7971a7a
 
6608d1c
 
7971a7a
 
 
 
 
9807395
879e657
7971a7a
 
 
 
 
 
 
 
 
 
 
 
 
b9cd4c4
9807395
 
 
 
 
 
b9cd4c4
 
 
 
 
 
 
 
 
 
9807395
b9cd4c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9061a2e
 
 
b9cd4c4
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
import os
import logging
import json
import faiss

from kats.detectors.cusum_detection import CUSUMDetector
from kats.detectors.robust_stat_detection import RobustStatDetector
from kats.consts import TimeSeriesData

from scipy import stats as st

import numpy as np
import pandas as pd

from videohash import compute_hashes, filepath_from_url
from config import FPS, MIN_DISTANCE, MAX_DISTANCE, ROLLING_WINDOW_SIZE

# def get_target_urls(json_file='apb2022.json'):
#     """ Obtain target urls for the target videos of a json file containing .mp4 files """
#     with open('apb2022.json', "r") as json_file:
#         target_videos = json.load(json_file)
#         return [video['mp4'] for video in target_videos]

def index_hashes_for_video(url: str) -> faiss.IndexBinaryIVF:
    """ Compute hashes of a video and index the video using faiss indices and return the index. """
    filepath = filepath_from_url(url)
    if os.path.exists(f'{filepath}.index'):
        logging.info(f"Loading indexed hashes from {filepath}.index")
        binary_index = faiss.read_index_binary(f'{filepath}.index') 
        logging.info(f"Index {filepath}.index has in total {binary_index.ntotal} frames")
        return binary_index

    hash_vectors = np.array([x['hash'] for x in compute_hashes(url)])
    logging.info(f"Computed hashes for {hash_vectors.shape} frames.")

    # Initializing the quantizer.
    quantizer = faiss.IndexBinaryFlat(hash_vectors.shape[1]*8)
    # Initializing index.
    index = faiss.IndexBinaryIVF(quantizer, hash_vectors.shape[1]*8, min(16, hash_vectors.shape[0]))
    index.nprobe = 1 # Number of nearest clusters to be searched per query. 
    # Training the quantizer.
    index.train(hash_vectors)
    #index = faiss.IndexBinaryFlat(64)
    index.add(hash_vectors)
    faiss.write_index_binary(index, f'{filepath}.index')
    logging.info(f"Indexed hashes for {index.ntotal} frames to {filepath}.index.")
    return index

def get_video_index(url: str):
    """" Builds up a FAISS index for a video.
    args: 
    - filepath: location of the source video
    """
    # Url (short video) 
    video_index = index_hashes_for_video(url)
    video_index.make_direct_map() # Make sure the index is indexable
    hash_vectors = np.array([video_index.reconstruct(i) for i in range(video_index.ntotal)]) # Retrieve original indices
    
    return video_index, hash_vectors

def compare_videos(hash_vectors, target_index, MIN_DISTANCE = 3):
    """ The comparison between the target and the original video will be plotted based
    on the matches between the target and the original video over time. The matches are determined
    based on the minimum distance between hashes (as computed by faiss-vectors) before they're considered a match.
    """
    # The results are returned as a triplet of 1D arrays 
    # lims, D, I, where result for query i is in I[lims[i]:lims[i+1]] 
    # (indices of neighbors), D[lims[i]:lims[i+1]] (distances).
    lims, D, I = target_index.range_search(hash_vectors, MIN_DISTANCE)
    return lims, D, I, hash_vectors

def get_decent_distance(video_index, hash_vectors, target_index, MIN_DISTANCE, MAX_DISTANCE):
    """ To get a decent heurstic for a base distance check every distance from MIN_DISTANCE to MAX_DISTANCE
    until the number of matches found is equal to or higher than the number of frames in the source video
    
    args:
        - video_index: The index of the source video
        - hash_vectors: The hash vectors of the target video
        - target_index: The index of the target video 
        """
    for distance in np.arange(start = MIN_DISTANCE - 2, stop = MAX_DISTANCE + 2, step = 2, dtype=int):
        distance = int(distance)
        # --- Previously --- 
        # video_index, hash_vectors = get_video_index(filepath)
        # target_index, _ = get_video_index(target)
        _, D, _, _ = compare_videos(hash_vectors, target_index, MIN_DISTANCE = distance)
        nr_source_frames = video_index.ntotal
        nr_matches = len(D)
        if nr_matches > 0:
            logging.info(f"{(nr_matches/nr_source_frames) * 100.0:.1f}% of frames have a match for distance '{distance}' ({nr_matches} matches for {nr_source_frames} frames)")
        if nr_matches >= nr_source_frames:
            return distance  
    logging.warning(f"No matches found for any distance between {MIN_DISTANCE} and {MAX_DISTANCE}")
    return None        
    
def get_change_points(df, smoothing_window_size=10, method='ROBUST', metric="ROLL_OFFSET_MODE"):
    tsd = TimeSeriesData(df.loc[:,['time', metric]])
    if method.upper() == "CUSUM":
        detector = CUSUMDetector(tsd)
    elif method.upper() == "ROBUST":
        detector = RobustStatDetector(tsd)
    change_points =  detector.detector(smoothing_window_size=smoothing_window_size, comparison_window=-2)

    # Print some stats
    if method.upper() == "CUSUM" and change_points != []:
        mean_offset_prechange = change_points[0].mu0 
        mean_offset_postchange = change_points[0].mu1 
        jump_s = mean_offset_postchange - mean_offset_prechange
        print(f"Video jumps {jump_s:.1f}s in time at {mean_offset_prechange:.1f} seconds")
    return change_points

def get_videomatch_df(lims, D, I, hash_vectors, distance, min_distance=MIN_DISTANCE, window_size=ROLLING_WINDOW_SIZE, vanilla_df=False):
    # --- Previously ---
    # distance = get_decent_distance(url, target, MIN_DISTANCE, MAX_DISTANCE)
    # _, hash_vectors = get_video_index(url)
    # target_index, _ = get_video_index(target)
    # lims, D, I, hash_vectors = compare_videos(hash_vectors, target_index, MIN_DISTANCE = distance)

    target = [(lims[i+1]-lims[i]) * [i] for i in range(hash_vectors.shape[0])]
    target_s = [i/FPS for j in target for i in j]
    source_s = [i/FPS for i in I]

    # Make df
    df = pd.DataFrame(zip(target_s, source_s, D, I), columns = ['TARGET_S', 'SOURCE_S', 'DISTANCE', 'INDICES'])
    if vanilla_df:
        return df

    # Weight values by distance of their match
    df['TARGET_WEIGHT'] = 1 - df['DISTANCE']/distance # Higher value means a better match    
    df['SOURCE_WEIGHTED_VALUE'] = df['SOURCE_S'] * df['TARGET_WEIGHT'] # Multiply the weight (which indicates a better match) with the value for Y and aggregate to get a less noisy estimate of Y

    # Group by X so for every second/x there will be 1 value of Y in the end
    grouped_X = df.groupby('TARGET_S').agg({'SOURCE_WEIGHTED_VALUE' : 'sum', 'TARGET_WEIGHT' : 'sum'})
    grouped_X['FINAL_SOURCE_VALUE'] = grouped_X['SOURCE_WEIGHTED_VALUE'] / grouped_X['TARGET_WEIGHT'] 

    # Remake the dataframe
    df = grouped_X.reset_index()
    df = df.drop(columns=['SOURCE_WEIGHTED_VALUE', 'TARGET_WEIGHT'])
    df = df.rename({'FINAL_SOURCE_VALUE' : 'SOURCE_S'}, axis='columns')

    # Add NAN to "missing" x values (base it off hash vector, not target_s)
    step_size = 1/FPS
    x_complete =  np.round(np.arange(start=0.0, stop = max(df['TARGET_S'])+step_size, step = step_size), 1) # More robust    
    df['TARGET_S'] = np.round(df['TARGET_S'], 1)
    df_complete = pd.DataFrame(x_complete, columns=['TARGET_S'])

    # Merge dataframes to get NAN values for every missing SOURCE_S
    df = df_complete.merge(df, on='TARGET_S', how='left')

    # Interpolate between frames since there are missing values
    df['SOURCE_LIP_S'] = df['SOURCE_S'].interpolate(method='linear', limit_direction='both', axis=0)
   
    # Add timeshift col and timeshift col with Linearly Interpolated Values
    df['TIMESHIFT'] = df['SOURCE_S'].shift(1) - df['SOURCE_S']
    df['TIMESHIFT_LIP'] = df['SOURCE_LIP_S'].shift(1) - df['SOURCE_LIP_S']

    # Add Offset col that assumes the video is played at the same speed as the other to do a "timeshift"
    df['OFFSET'] = df['SOURCE_S'] - df['TARGET_S'] - np.min(df['SOURCE_S'])
    df['OFFSET_LIP'] = df['SOURCE_LIP_S'] - df['TARGET_S'] - np.min(df['SOURCE_LIP_S'])
    
    # Add rolling window mode
    df['ROLL_OFFSET_MODE'] = np.round(df['OFFSET_LIP'], 0).rolling(window_size, center=True, min_periods=1).apply(lambda x: st.mode(x)[0])

    # Add time column for plotting
    df['time'] = pd.to_datetime(df["TARGET_S"], unit='s') # Needs a datetime as input
    return df