File size: 2,981 Bytes
447ff7c |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 |
import json
import cv2
import numpy as np
import os
from torch.utils.data import Dataset
from PIL import Image
import cv2
from .data_utils import *
from .base import BaseDataset
from pycocotools import mask as mask_utils
class UVOValDataset(BaseDataset):
def __init__(self, image_dir, video_json, image_json):
json_path = video_json
with open(json_path, 'r') as fcc_file:
data = json.load(fcc_file)
image_json_path = image_json
with open(image_json_path , 'r') as image_file:
video_dict = json.load(image_file)
self.image_root = image_dir
self.data = data['annotations']
self.video_dict = video_dict
self.size = (512,512)
self.clip_size = (224,224)
self.dynamic = 1
def __len__(self):
return 8000
def __getitem__(self, idx):
while(1):
idx = np.random.randint(0, len(self.data)-1)
try:
item = self.get_sample(idx)
return item
except:
idx = np.random.randint(0, len(self.data)-1)
def check_region_size(self, image, yyxx, ratio, mode = 'max'):
pass_flag = True
H,W = image.shape[0], image.shape[1]
H,W = H * ratio, W * ratio
y1,y2,x1,x2 = yyxx
h,w = y2-y1,x2-x1
if mode == 'max':
if h > H and w > W:
pass_flag = False
elif mode == 'min':
if h < H and w < W:
pass_flag = False
return pass_flag
def get_sample(self, idx):
ins_anno = self.data[idx]
video_id = str(ins_anno['video_id'])
video_names = self.video_dict[video_id]
masks = ins_anno['segmentations']
frames = video_names
# Sampling frames
min_interval = len(frames) // 5
start_frame_index = np.random.randint(low=0, high=len(frames) - min_interval)
end_frame_index = start_frame_index + np.random.randint(min_interval, len(frames) - start_frame_index )
end_frame_index = min(end_frame_index, len(frames) - 1)
# Get image path
ref_image_name = frames[start_frame_index]
tar_image_name = frames[end_frame_index]
ref_image_path = os.path.join(self.image_root, ref_image_name)
tar_image_path = os.path.join(self.image_root, tar_image_name)
# Read Image and Mask
ref_image = cv2.imread(ref_image_path)
ref_image = cv2.cvtColor(ref_image, cv2.COLOR_BGR2RGB)
tar_image = cv2.imread(tar_image_path)
tar_image = cv2.cvtColor(tar_image, cv2.COLOR_BGR2RGB)
ref_mask = mask_utils.decode(masks[start_frame_index])
tar_mask = mask_utils.decode(masks[end_frame_index])
item_with_collage = self.process_pairs(ref_image, ref_mask, tar_image, tar_mask)
sampled_time_steps = self.sample_timestep()
item_with_collage['time_steps'] = sampled_time_steps
return item_with_collage
|