Spaces:
Running
Running
from refer.refer import REFER | |
import numpy as np | |
from PIL import Image | |
import random | |
import os | |
from tqdm import tqdm | |
import pickle | |
from poly_utils import is_clockwise, revert_direction, check_length, reorder_points, \ | |
approximate_polygons, interpolate_polygons, image_to_base64, polygons_to_string | |
max_length = 400 | |
data_root = './refer/data' | |
datasets = ['refcoco', 'refcoco+', 'refcocog'] | |
image_dir = './datasets/images/mscoco/train2014' | |
val_test_files = pickle.load(open("data/val_test_files.p", "rb")) | |
combined_train_data = [] | |
for dataset in datasets: | |
if dataset == 'refcoco': | |
splits = ['train', 'val', 'testA', 'testB'] | |
splitBy = 'unc' | |
elif dataset == 'refcoco+': | |
splits = ['train', 'val', 'testA', 'testB'] | |
splitBy = 'unc' | |
elif dataset == 'refcocog': | |
splits = ['train', 'val'] | |
splitBy = 'umd' | |
save_dir = f'datasets/finetune/{dataset}' | |
os.makedirs(save_dir, exist_ok=True) | |
for split in splits: | |
num_pts = [] | |
max_num_pts = 0 | |
file_name = os.path.join(save_dir, f"{dataset}_{split}.tsv") | |
print("creating ", file_name) | |
uniq_ids = [] | |
image_ids = [] | |
sents = [] | |
coeffs_strings = [] | |
img_strings = [] | |
writer = open(file_name, 'w') | |
refer = REFER(data_root, dataset, splitBy) | |
ref_ids = refer.getRefIds(split=split) | |
for this_ref_id in tqdm(ref_ids): | |
this_img_id = refer.getImgIds(this_ref_id) | |
this_img = refer.Imgs[this_img_id[0]] | |
fn = this_img['file_name'] | |
img_id = fn.split(".")[0].split("_")[-1] | |
# load image | |
img = Image.open(os.path.join(image_dir, this_img['file_name'])).convert("RGB") | |
# convert image to string | |
img_base64 = image_to_base64(img, format='jpeg') | |
# load mask | |
ref = refer.loadRefs(this_ref_id) | |
ref_mask = np.array(refer.getMask(ref[0])['mask']) | |
annot = np.zeros(ref_mask.shape) | |
annot[ref_mask == 1] = 1 # 255 | |
annot_img = Image.fromarray(annot.astype(np.uint8), mode="P") | |
annot_base64 = image_to_base64(annot_img, format='png') | |
polygons = refer.getPolygon(ref[0])['polygon'] | |
polygons_processed = [] | |
for polygon in polygons: | |
# make the polygon clockwise | |
if not is_clockwise(polygon): | |
polygon = revert_direction(polygon) | |
# reorder the polygon so that the first vertex is the one closest to image origin | |
polygon = reorder_points(polygon) | |
polygons_processed.append(polygon) | |
polygons = sorted(polygons_processed, key=lambda x: (x[0] ** 2 + x[1] ** 2, x[0], x[1])) | |
polygons_interpolated = interpolate_polygons(polygons) | |
polygons = approximate_polygons(polygons, 5, max_length) | |
pts_string = polygons_to_string(polygons) | |
pts_string_interpolated = polygons_to_string(polygons_interpolated) | |
# load box | |
box = refer.getRefBox(this_ref_id) # x,y,w,h | |
x, y, w, h = box | |
box_string = f'{x},{y},{x + w},{y + h}' | |
max_num_pts = max(max_num_pts, check_length(polygons)) | |
num_pts.append(check_length(polygons)) | |
# load text | |
ref_sent = refer.Refs[this_ref_id] | |
for i, (sent, sent_id) in enumerate(zip(ref_sent['sentences'], ref_sent['sent_ids'])): | |
uniq_id = f"{this_ref_id}_{i}" | |
instance = '\t'.join( | |
[uniq_id, str(this_img_id[0]), sent['sent'], box_string, pts_string, img_base64, annot_base64, | |
pts_string_interpolated]) + '\n' | |
writer.write(instance) | |
if img_id not in val_test_files and split == 'train': # filtered out val/test files | |
combined_train_data.append(instance) | |
writer.close() | |
random.shuffle(combined_train_data) | |
file_name = os.path.join("datasets/finetune/refcoco+g_train_shuffled.tsv") | |
print("creating ", file_name) | |
writer = open(file_name, 'w') | |
writer.writelines(combined_train_data) | |
writer.close() | |