pandagpt-vicuna-v0-7b / code /pytorchvideo /tests /test_data_ava_dataset.py
mvsoom's picture
Upload folder using huggingface_hub
3133fdb
raw
history blame contribute delete
9.21 kB
# Copyright (c) Facebook, Inc. and its affiliates. All Rights Reserved.
import contextlib
import pathlib
import random
import tempfile
import unittest
import torch
from pytorchvideo.data import Ava
from pytorchvideo.data.clip_sampling import make_clip_sampler
from utils import temp_frame_video
AVA_FPS = 30
@contextlib.contextmanager
def temp_ava_dataset_2_videos():
frame_names = [f"{str(i)}.png" for i in range(90)]
# Create csv containing 2 test frame videos.
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as frames_file:
frames_file.write("original_vido_id video_id frame_id path labels\n".encode())
# Frame video 1
with temp_frame_video(frame_names) as (frame_1_video_dir, data_1):
for i, frame_name in enumerate(frame_names):
original_video_id_1 = str(frame_1_video_dir)
video_id = "1"
frame_id = str(i)
path = pathlib.Path(frame_1_video_dir) / frame_name
label = "0"
frames_file.write(
f"{original_video_id_1} {video_id} {frame_id} {path} {label}\n".encode()
)
# Frame video 2
with temp_frame_video(frame_names, height=5, width=5) as (
frame_2_video_dir,
data_2,
):
for i, frame_name in enumerate(frame_names):
original_video_id_2 = str(frame_2_video_dir)
video_id = "2"
frame_id = str(i)
path = pathlib.Path(frame_2_video_dir) / frame_name
label = "1"
frames_file.write(
f"{original_video_id_2} {video_id} {frame_id} {path} {label}\n".encode()
)
frames_file.close()
yield frames_file.name, data_1, data_2, original_video_id_1, original_video_id_2
def get_random_bbox():
bb_list = [round(random.random(), 3) for x in range(4)]
converted_list = [str(element) for element in bb_list]
return bb_list, ",".join(converted_list)
class TestAvaDataset(unittest.TestCase):
def test_multiple_videos(self):
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as data_file:
with temp_ava_dataset_2_videos() as (
frame_paths_file,
video_1,
video_2,
video_1_name,
video_2_name,
):
# add bounding boxes
# video 1
bb_1_a, bb_1_a_string = get_random_bbox()
action_1_a, iou_1_a = 1, 0.85
bb_1_b, bb_1_b_string = get_random_bbox()
action_1_b, iou_1_b = 2, 0.4
data_file.write(
(
f"{video_1_name},902,{bb_1_a_string},"
+ f"{str(action_1_a)},{str(iou_1_a)}\n"
).encode()
)
data_file.write(
(
f"{video_1_name},902,{bb_1_b_string},"
+ f"{str(action_1_b)},{str(iou_1_b)}\n"
).encode()
)
# video 2
bb_2_a, bb_2_a_string = get_random_bbox()
action_2_a, iou_2_a = 3, 0.95
bb_2_b, bb_2_b_string = get_random_bbox()
action_2_b, iou_2_b = 4, 0.9
data_file.write(
(
f"{video_2_name},902,{bb_2_a_string},"
+ f"{str(action_2_a)},{str(iou_2_a)}\n"
).encode()
)
data_file.write(
(
f"{video_2_name},902,{bb_2_b_string},"
+ f"{str(action_2_b)},{str(iou_2_b)}\n"
).encode()
)
data_file.close()
dataset = Ava(
frame_paths_file=frame_paths_file,
frame_labels_file=data_file.name,
clip_sampler=make_clip_sampler("random", 1.0),
)
# All videos are of the form cthw and fps is 30
# Clip is samples at time step = 2 secs in video
sample_1 = next(dataset)
self.assertTrue(sample_1["video"].equal(video_1[:, 45:75, :, :]))
self.assertTrue(
torch.tensor(sample_1["boxes"]).equal(
torch.tensor([bb_1_a, bb_1_b])
)
)
self.assertTrue(
torch.tensor(sample_1["labels"]).equal(
torch.tensor([[action_1_a], [action_1_b]])
)
)
sample_2 = next(dataset)
self.assertTrue(sample_2["video"].equal(video_2[:, 45:75, :, :]))
self.assertTrue(
torch.tensor(sample_2["boxes"]).equal(
torch.tensor([bb_2_a, bb_2_b])
)
)
self.assertTrue(
torch.tensor(sample_2["labels"]).equal(
torch.tensor([[action_2_a], [action_2_b]])
)
)
def test_multiple_videos_with_label_map(self):
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as label_map_file:
with tempfile.NamedTemporaryFile(delete=False, suffix=".csv") as data_file:
with temp_ava_dataset_2_videos() as (
frame_paths_file,
video_1,
video_2,
video_1_name,
video_2_name,
):
# Create labelmap file
label_map = """item {
name: "bend/bow (at the waist)"
id: 1
}
item {
name: "crouch/kneel"
id: 3
}
item {
name: "dance"
id: 4
}"""
label_map_file.write(label_map.encode())
label_map_file.close()
# add bounding boxes
# video 1
bb_1_a, bb_1_a_string = get_random_bbox()
action_1_a, iou_1_a = 1, 0.85
bb_1_b, bb_1_b_string = get_random_bbox()
action_1_b, iou_1_b = 2, 0.4
data_file.write(
(
f"{video_1_name},902,{bb_1_a_string},"
+ f"{str(action_1_a)},{str(iou_1_a)}\n"
).encode()
)
data_file.write(
(
f"{video_1_name},902,{bb_1_b_string},"
+ f"{str(action_1_b)},{str(iou_1_b)}\n"
).encode()
)
# video 2
bb_2_a, bb_2_a_string = get_random_bbox()
action_2_a, iou_2_a = 3, 0.95
bb_2_b, bb_2_b_string = get_random_bbox()
action_2_b, iou_2_b = 4, 0.9
data_file.write(
(
f"{video_2_name},902,{bb_2_a_string},"
+ f"{str(action_2_a)},{str(iou_2_a)}\n"
).encode()
)
data_file.write(
(
f"{video_2_name},902,{bb_2_b_string},"
+ f"{str(action_2_b)},{str(iou_2_b)}\n"
).encode()
)
data_file.close()
dataset = Ava(
frame_paths_file=frame_paths_file,
frame_labels_file=data_file.name,
clip_sampler=make_clip_sampler("random", 1.0),
label_map_file=label_map_file.name,
)
# All videos are of the form cthw and fps is 30
# Clip is samples at time step = 2 secs in video
sample_1 = next(dataset)
self.assertTrue(sample_1["video"].equal(video_1[:, 45:75, :, :]))
self.assertTrue(
torch.tensor(sample_1["boxes"]).equal(torch.tensor([bb_1_a]))
)
self.assertTrue(
torch.tensor(sample_1["labels"]).equal(
torch.tensor([[action_1_a]])
)
)
sample_2 = next(dataset)
self.assertTrue(sample_2["video"].equal(video_2[:, 45:75, :, :]))
self.assertTrue(
torch.tensor(sample_2["boxes"]).equal(
torch.tensor([bb_2_a, bb_2_b])
)
)
self.assertTrue(
torch.tensor(sample_2["labels"]).equal(
torch.tensor([[action_2_a], [action_2_b]])
)
)