Spaces:
Build error
Build error
File size: 8,025 Bytes
b9cd4c4 30566f3 b9cd4c4 2a1a736 6608d1c 1991773 879e657 1991773 9061a2e 1991773 9061a2e 1991773 879e657 1991773 879e657 1991773 2a1a736 1991773 879e657 9807395 9061a2e 1991773 2a1a736 879e657 2a1a736 9061a2e 2a1a736 9061a2e 30566f3 2a1a736 30566f3 879e657 2a1a736 30566f3 9061a2e 2a1a736 30566f3 9061a2e 30566f3 2a1a736 30566f3 9061a2e 30566f3 9061a2e 6608d1c 879e657 6608d1c 879e657 6608d1c 9807395 9061a2e 879e657 2a1a736 1991773 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 |
import time
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from scipy import stats as st
from config import FPS
def plot_comparison(lims, D, I, hash_vectors, MIN_DISTANCE = 3):
sns.set_theme()
x = [(lims[i+1]-lims[i]) * [i] for i in range(hash_vectors.shape[0])]
x = [i/FPS for j in x for i in j]
y = [i/FPS for i in I]
# Create figure and dataframe to plot with sns
fig = plt.figure()
# plt.tight_layout()
df = pd.DataFrame(zip(x, y), columns = ['X', 'Y'])
g = sns.scatterplot(data=df, x='X', y='Y', s=2*(1-D/(MIN_DISTANCE+1)), alpha=1-D/MIN_DISTANCE)
# Set x-labels to be more readable
x_locs, x_labels = plt.xticks() # Get original locations and labels for x ticks
x_labels = [time.strftime('%H:%M:%S', time.gmtime(x)) for x in x_locs]
plt.xticks(x_locs, x_labels)
plt.xticks(rotation=90)
plt.xlabel('Time in source video (H:M:S)')
plt.xlim(0, None)
# Set y-labels to be more readable
y_locs, y_labels = plt.yticks() # Get original locations and labels for x ticks
y_labels = [time.strftime('%H:%M:%S', time.gmtime(y)) for y in y_locs]
plt.yticks(y_locs, y_labels)
plt.ylabel('Time in target video (H:M:S)')
# Adjust padding to fit gradio
plt.subplots_adjust(bottom=0.25, left=0.20)
return fig
def plot_multi_comparison(df, change_points):
""" From the dataframe plot the current set of plots, where the bottom right is most indicative """
fig, ax_arr = plt.subplots(3, 2, figsize=(12, 6), dpi=100, sharex=True)
sns.scatterplot(data = df, x='time', y='SOURCE_S', ax=ax_arr[0,0])
sns.lineplot(data = df, x='time', y='SOURCE_LIP_S', ax=ax_arr[0,1])
sns.scatterplot(data = df, x='time', y='OFFSET', ax=ax_arr[1,0])
sns.lineplot(data = df, x='time', y='OFFSET_LIP', ax=ax_arr[1,1])
# Plot change point as lines
sns.lineplot(data = df, x='time', y='OFFSET_LIP', ax=ax_arr[2,1])
for x in change_points:
cp_time = x.start_time
plt.vlines(x=cp_time, ymin=np.min(df['OFFSET_LIP']), ymax=np.max(df['OFFSET_LIP']), colors='red', lw=2)
rand_y_pos = np.random.uniform(low=np.min(df['OFFSET_LIP']), high=np.max(df['OFFSET_LIP']), size=None)
plt.text(x=cp_time, y=rand_y_pos, s=str(np.round(x.confidence, 2)), color='r', rotation=-0.0, fontsize=14)
plt.xticks(rotation=90)
return fig
def change_points_to_segments(df, change_points):
""" Convert change points from kats detector to segment indicators """
return [pd.to_datetime(0.0, unit='s').to_datetime64()] + [cp.start_time for cp in change_points] + [pd.to_datetime(df.iloc[-1]['TARGET_S'], unit='s').to_datetime64()]
def add_seconds_to_datetime64(datetime64, seconds, subtract=False):
"""Add or substract a number of seconds to a np.datetime64 object """
s, m = divmod(seconds, 1.0)
if subtract:
return datetime64 - np.timedelta64(int(s), 's') - np.timedelta64(int(m * 1000), 'ms')
return datetime64 + np.timedelta64(int(s), 's') + np.timedelta64(int(m * 1000), 'ms')
def plot_segment_comparison(df, change_points, video_mp4 = "Placeholder.mp4", video_id="Placeholder.videoID", threshold_diff = 1.5):
""" Based on the dataframe and detected change points do two things:
1. Make a decision on where each segment belongs in time and return that info as a list of dicts
2. Plot how this decision got made as an informative plot
args:
- df: dataframe
- change_points: detected points in time where the average metric value changes
- video_id: the unique identifier for the video currently being compared
- threshold_diff: to plot which segments are likely bad matches
"""
fig, ax_arr = plt.subplots(4, 1, figsize=(16, 6), dpi=300, sharex=True)
ax_arr[0].set_title(video_id)
sns.scatterplot(data = df, x='time', y='SOURCE_S', ax=ax_arr[0], label="SOURCE_S", color='blue', alpha=1.0)
# Plot original datapoints without linear interpolation, offset by target video time
sns.scatterplot(data = df, x='time', y='OFFSET', ax=ax_arr[1], label="OFFSET", color='orange', alpha=1.0)
# Plot linearly interpolated values next to metric vales
metric = 'ROLL_OFFSET_MODE' # 'OFFSET'
sns.lineplot(data = df, x='time', y='OFFSET_LIP', ax=ax_arr[2], label="OFFSET_LIP", color='orange')
sns.scatterplot(data = df, x='time', y=metric, ax=ax_arr[2], label=metric, alpha=0.5)
# Plot detected change points as lines which will indicate the segments
sns.scatterplot(data = df, x='time', y=metric, ax=ax_arr[3], label=metric, s=20)
timestamps = change_points_to_segments(df, change_points)
for x in timestamps:
plt.vlines(x=x, ymin=np.min(df[metric]), ymax=np.max(df[metric]), colors='black', lw=2, alpha=0.5)
# To store "decisions" about segments
segment_decisions = []
seg_i = 0
# Average segment difference threshold for plotting
for start_time, end_time in zip(timestamps[:-1], timestamps[1:]):
# Time to add to each origin time to get the correct time back since it is offset by add_offset
add_offset = np.min(df['SOURCE_S'])
# Cut out the segment between the segment lines
segment = df[(df['time'] > start_time) & (df['time'] < end_time)] # Not offset LIP
segment_no_nan = segment[~np.isnan(segment[metric])] # Remove NaNs
segment_offsets = segment_no_nan[metric] # np.round(segment_no_nan['OFFSET'], 1)
# Calculate mean/median/mode
# seg_sum_stat = np.mean(segment_offsets)
# seg_sum_stat = np.median(segment_offsets)
seg_sum_stat = st.mode(segment_offsets)[0][0]
# Get average difference from mean/median/mode of the segment to see if it is a "straight line" or not
average_diff = np.median(np.abs(segment_no_nan['OFFSET_LIP'] - seg_sum_stat))
average_offset = np.mean(segment_no_nan['OFFSET_LIP'])
# If the time where the segment comes from (origin time) is close to the start_time, it's a "good match", so no editing
noisy = False if average_diff < threshold_diff else True
origin_start_time = add_seconds_to_datetime64(start_time, seg_sum_stat + add_offset)
origin_end_time = add_seconds_to_datetime64(end_time, seg_sum_stat + add_offset)
# Plot green for a confident prediction (straight line), red otherwise
if not noisy:
# Plot estimated straight line
plt.hlines(y=seg_sum_stat, xmin=start_time, xmax=end_time, color='green', lw=5, alpha=0.5)
plt.text(x=start_time, y=seg_sum_stat, s=str(np.round(average_diff, 1)), color='green', rotation=-0.0, fontsize=14)
else:
# Plot estimated straight line
plt.hlines(y=seg_sum_stat, xmin=start_time, xmax=end_time, color='red', lw=5, alpha=0.5)
plt.text(x=start_time, y=seg_sum_stat, s=str(np.round(average_diff, 1)), color='red', rotation=-0.0, fontsize=14)
# Decisions about segments
decision = {"Target Start Time" : pd.to_datetime(start_time).strftime('%H:%M:%S'),
"Target End Time" : pd.to_datetime(end_time).strftime('%H:%M:%S'),
"Source Start Time" : pd.to_datetime(origin_start_time).strftime('%H:%M:%S'),
"Source End Time" : pd.to_datetime(origin_end_time).strftime('%H:%M:%S'),
"Source Video ID" : video_id,
"Source Video .mp4" : video_mp4,
"Uncertainty" : np.round(average_diff, 3),
"Average Offset in Seconds" : np.round(average_offset, 3),
# "Explanation" : f"{start_time_str} -> {end_time_str} comes from video with ID={video_id} from {origin_start_time_str} -> {origin_end_time_str}",
}
segment_decisions.append(decision)
seg_i += 1
# print(decision)
# Return figure
plt.xticks(rotation=90)
return fig, segment_decisions
|