sradc
fix timestamp computation (was rounding fps to int...)
5a9c0da
raw
history blame
2.57 kB
import base64
import cv2
import pandas as pd
from PIL import Image
from tqdm import tqdm
from pipeline.clip_wrapper import MODEL_DIM, ClipWrapper
from pipeline.download_videos import DATA_DIR, REPO_ROOT, VIDEO_DIR
FRAME_EXTRACT_RATE_SECONDS = 5 # Extract a frame every 5 seconds
IMAGES_DIR = DATA_DIR / "images"
DATAFRAME_PATH = DATA_DIR / "dataset.parquet"
def process_videos() -> None:
IMAGES_DIR.mkdir(exist_ok=True, parents=True)
(IMAGES_DIR / ".gitignore").write_text("*")
"Runs clip on video frames, saves results to a parquet file"
clip_wrapper = ClipWrapper()
results = []
for video_path in tqdm(list(VIDEO_DIR.glob("*.mp4")), desc="Processing videos"):
video_id = video_path.stem
extracted_images_dir = IMAGES_DIR / video_id
extracted_images_dir.mkdir(exist_ok=True, parents=True)
complete_file = extracted_images_dir / "complete"
if complete_file.exists():
continue
for clip_vector, image, timestamp_secs, frame_idx in get_clip_vectors(
video_path, clip_wrapper
):
image_path = extracted_images_dir / f"{frame_idx}.jpg"
image.save(image_path)
with open(image_path, "rb") as f:
encoded_image = base64.b64encode(f.read()).decode()
results.append(
[
video_id,
frame_idx,
timestamp_secs,
encoded_image,
*clip_vector,
]
)
complete_file.touch()
df = pd.DataFrame(
results,
columns=["video_id", "frame_idx", "timestamp", "base64_image"]
+ [f"dim_{i}" for i in range(MODEL_DIM)],
)
print(f"Saving data to {DATAFRAME_PATH}")
df.to_parquet(DATAFRAME_PATH, index=False)
def get_clip_vectors(video_path, clip_wrapper):
cap = cv2.VideoCapture(str(video_path))
num_video_frames = int(cap.get(cv2.CAP_PROP_FRAME_COUNT))
fps = cap.get(cv2.CAP_PROP_FPS)
extract_every_n_frames = int(FRAME_EXTRACT_RATE_SECONDS * fps)
for frame_idx in tqdm(range(num_video_frames), desc="Running CLIP on video"):
ret, frame = cap.read()
if frame_idx % extract_every_n_frames != 0:
continue
image = Image.fromarray(frame[..., ::-1])
clip_vector = clip_wrapper.images2vec([image]).squeeze().numpy()
timestamp_secs = frame_idx / fps
yield clip_vector, image, timestamp_secs, frame_idx
cap.release()
if __name__ == "__main__":
process_videos()