# Start to finish - DINOv2 feature extraction

## Imports

In [None]:
from transformers import AutoImageProcessor, AutoModel
from PIL import Image


import matplotlib.pyplot as plt
import numpy as np
import requests
import torch
import cv2
import os

## Initialize pre-trained image processor and model

In [None]:
# Adjust for cuda - takes up 2193 MiB on device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

processor = AutoImageProcessor.from_pretrained('facebook/dinov2-large')
model = AutoModel.from_pretrained('facebook/dinov2-large').to(device)

## DINOv2 Feature Extraction

In [None]:
from tqdm import tqdm
import gc

torch.cuda.empty_cache() 
gc.collect()

In [None]:
# Path to your videos
path_to_videos = './dataset-tacdec/videos'

# Directory paths
processed_features_dir = './processed_features'
last_hidden_states_dir = os.path.join(processed_features_dir, 'last_hidden_states/')
pooler_outputs_dir = os.path.join(processed_features_dir, 'pooler_outputs/')

# Create directories if they don't exist
os.makedirs(last_hidden_states_dir, exist_ok=True)
os.makedirs(pooler_outputs_dir, exist_ok=True)

# Dictonary with filename as key, all feature extracted frames as values
feature_extracted_videos = {}

# Define batch size
batch_size = 32

# Process each video
for video_file in tqdm(os.listdir(path_to_videos)):
 full_path = os.path.join(path_to_videos, video_file)

 if not os.path.isfile(full_path):
 continue

 cap = cv2.VideoCapture(full_path)

 # List to hold all batch outputs, clear for each video
 batch_last_hidden_states = []
 batch_pooler_outputs = []
 
 batch_frames = []

 while True:
 ret, frame = cap.read()
 if not ret:
 
 # Process the last batch
 if len(batch_frames) > 0:
 inputs = processor(images=batch_frames, return_tensors="pt").to(device)
 
 with torch.no_grad():
 outputs = model(**inputs)
 
 for key, value in outputs.items():
 if key == 'last_hidden_state':
 # batch_last_hidden_states.append(value.cpu().numpy())
 batch_last_hidden_states.append(value)
 elif key == 'pooler_output':
 # batch_pooler_outputs.append(value.cpu().numpy())
 batch_pooler_outputs.append(value)
 else:
 print('Error in key, expected last_hidden_state or pooler_output, got: ', key)
 break

 # cv2 comes in BGR, but transformer takes RGB
 frame_rgb = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)
 batch_frames.append(frame_rgb)

 # Check if batch is full
 if len(batch_frames) == batch_size:
 inputs = processor(images=batch_frames, return_tensors="pt").to(device)
 # outputs = model(**inputs)
 with torch.no_grad():
 outputs = model(**inputs)
 for key, value in outputs.items():
 if key == 'last_hidden_state':
 batch_last_hidden_states.append(value)
 elif key == 'pooler_output':
 batch_pooler_outputs.append(value)
 else:
 print('Error in key, expected last_hidden_state or pooler_output, got: ', key)

 # Clear batch
 batch_frames = []

 
 all_last_hidden_states = torch.cat(batch_last_hidden_states, dim=0)
 all_pooler_outputs = torch.cat(batch_pooler_outputs, dim=0)

 # Save the tensors with the video name as filename
 pt_filename = video_file.replace('.mp4', '.pt')
 torch.save(all_last_hidden_states, os.path.join(last_hidden_states_dir, f'{pt_filename}'))
 torch.save(all_pooler_outputs, os.path.join(pooler_outputs_dir, f'{pt_filename}'))
 
print('Features extracted')

## Reload features to verify 

In [None]:
lhs_torch = torch.load('./processed_features/last_hidden_states/1738_avxeiaxxw6ocr.pt')
po_torch = torch.load('./processed_features/pooler_outputs/1738_avxeiaxxw6ocr.pt')

print('LHS Torch size: ', lhs_torch.size())
print('PO Torch size: ', po_torch.size())

for i in range(all_last_hidden_states.size(0)):
 print(f"Frame {i}:")
 print(all_last_hidden_states[i])
 print() 
 break

for i in range(lhs_torch.size(0)):
 print(f"Frame {i}:")
 print(all_last_hidden_states[i])
 print() 
 break


# Different sorts of plots

## Histogram of video length in seconds

In [None]:
import os
import cv2
import numpy as np

path_to_videos = './dataset-tacdec/videos'
video_lengths = []
frame_counts = []

# Iterate through each file in the directory
for video_file in os.listdir(path_to_videos):
 full_path = os.path.join(path_to_videos, video_file)

 if not os.path.isfile(full_path):
 continue

 cap = cv2.VideoCapture(full_path)

 # Calculate the length of the video
 # Note: Assuming the frame rate information is accurate
 if cap.isOpened():
 fps = cap.get(cv2.CAP_PROP_FPS) # Frame rate
 frame_count = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
 duration = frame_count / fps if fps > 0 else 0
 video_lengths.append(duration)
 frame_counts.append(frame_count)

 cap.release()

np.save('./video_durations', video_lengths)
np.save('./frame_counts', frame_counts)


In [None]:
import seaborn as sns

# Set the aesthetic style of the plots
sns.set(style="darkgrid")

# Plotting the histogram for video lengths
plt.figure(figsize=(12, 6))
sns.histplot(video_lengths, kde=True, color="blue")
plt.title('Histogram - Video Lengths')
plt.xlabel('Length of Videos (seconds)')
plt.ylabel('Number of Videos')

# Plotting the histogram for frame counts
plt.figure(figsize=(12, 6))
sns.histplot(frame_counts, kde=True, color="green")
plt.title('Histogram - Number of Frames')
plt.xlabel('Frame Count')
plt.ylabel('Number of Videos')

plt.show()

## Frame count and vid lengths

In [None]:
sns.boxplot(x=video_lengths)
plt.title('Box Plot of Video Lengths')
plt.xlabel('Video Length (seconds)')
plt.show()

sns.boxplot(x=frame_counts, color="r")
plt.title('Box Plot of Frame Counts')
plt.xlabel('Frame Count')
plt.show()


## Class distributions

In [None]:
import os
import json
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

path_to_labels = './dataset-tacdec/full_labels'
class_counts = {'background': 0, 'tackle-live': 0, 'tackle-replay': 0, 'tackle-live-incomplete': 0, 'tackle-replay-incomplete': 0, 'dummy_class': 0}

# Iterate through each JSON file in the labels directory
for label_file in os.listdir(path_to_labels):
 full_path = os.path.join(path_to_labels, label_file)

 if not os.path.isfile(full_path):
 continue

 with open(full_path, 'r') as file:
 data = json.load(file)
 frame_sections = data['frames_sections']

 # Extract annotations
 for section in frame_sections:
 for frame_number, frame_data in section.items():
 class_label = frame_data['radio_answer']
 if class_label in class_counts:
 class_counts[class_label] += 1

# Convert the dictionary to a DataFrame for Seaborn
df_class_counts = pd.DataFrame(list(class_counts.items()), columns=['Class', 'Occurrences'])

# Save the DataFrame to a CSV file
df_class_counts.to_csv('class_distribution.csv', sep=',', index=False, encoding='utf-8')

# Plotting the distribution using Seaborn
plt.figure(figsize=(10, 6))
sns.barplot(x='Class', y='Occurrences', data=df_class_counts, palette='viridis', alpha=0.75)
plt.title('Distribution of Frame Classes')
plt.xlabel('Class')
plt.ylabel('Number of Occurrences')
plt.xticks(rotation=45) # Rotate class names for better readability
plt.tight_layout() # Adjust layout to make room for the rotated x-axis labels
plt.show()


In [None]:
import pandas as pd
import matplotlib.pyplot as plt

# Ensure df_class_counts is already created as in the previous script

# Create a pie chart
plt.figure(figsize=(8, 8))
plt.pie(df_class_counts['Occurrences'], labels=df_class_counts['Class'], 
 autopct=lambda p: '{:.1f}%'.format(p), startangle=140, 
 colors=sns.color_palette('bright', len(df_class_counts)))
plt.title('Distribution of Frame Classes', fontweight='bold')
plt.show()