PolyFormer / data /create_finetuning_data.py
jiang
init commit
650c5f6
from refer.refer import REFER
import numpy as np
from PIL import Image
import random
import os
from tqdm import tqdm
import pickle
from poly_utils import is_clockwise, revert_direction, check_length, reorder_points, \
approximate_polygons, interpolate_polygons, image_to_base64, polygons_to_string
max_length = 400
data_root = './refer/data'
datasets = ['refcoco', 'refcoco+', 'refcocog']
image_dir = './datasets/images/mscoco/train2014'
val_test_files = pickle.load(open("data/val_test_files.p", "rb"))
combined_train_data = []
for dataset in datasets:
if dataset == 'refcoco':
splits = ['train', 'val', 'testA', 'testB']
splitBy = 'unc'
elif dataset == 'refcoco+':
splits = ['train', 'val', 'testA', 'testB']
splitBy = 'unc'
elif dataset == 'refcocog':
splits = ['train', 'val']
splitBy = 'umd'
save_dir = f'datasets/finetune/{dataset}'
os.makedirs(save_dir, exist_ok=True)
for split in splits:
num_pts = []
max_num_pts = 0
file_name = os.path.join(save_dir, f"{dataset}_{split}.tsv")
print("creating ", file_name)
uniq_ids = []
image_ids = []
sents = []
coeffs_strings = []
img_strings = []
writer = open(file_name, 'w')
refer = REFER(data_root, dataset, splitBy)
ref_ids = refer.getRefIds(split=split)
for this_ref_id in tqdm(ref_ids):
this_img_id = refer.getImgIds(this_ref_id)
this_img = refer.Imgs[this_img_id[0]]
fn = this_img['file_name']
img_id = fn.split(".")[0].split("_")[-1]
# load image
img = Image.open(os.path.join(image_dir, this_img['file_name'])).convert("RGB")
# convert image to string
img_base64 = image_to_base64(img, format='jpeg')
# load mask
ref = refer.loadRefs(this_ref_id)
ref_mask = np.array(refer.getMask(ref[0])['mask'])
annot = np.zeros(ref_mask.shape)
annot[ref_mask == 1] = 1 # 255
annot_img = Image.fromarray(annot.astype(np.uint8), mode="P")
annot_base64 = image_to_base64(annot_img, format='png')
polygons = refer.getPolygon(ref[0])['polygon']
polygons_processed = []
for polygon in polygons:
# make the polygon clockwise
if not is_clockwise(polygon):
polygon = revert_direction(polygon)
# reorder the polygon so that the first vertex is the one closest to image origin
polygon = reorder_points(polygon)
polygons_processed.append(polygon)
polygons = sorted(polygons_processed, key=lambda x: (x[0] ** 2 + x[1] ** 2, x[0], x[1]))
polygons_interpolated = interpolate_polygons(polygons)
polygons = approximate_polygons(polygons, 5, max_length)
pts_string = polygons_to_string(polygons)
pts_string_interpolated = polygons_to_string(polygons_interpolated)
# load box
box = refer.getRefBox(this_ref_id) # x,y,w,h
x, y, w, h = box
box_string = f'{x},{y},{x + w},{y + h}'
max_num_pts = max(max_num_pts, check_length(polygons))
num_pts.append(check_length(polygons))
# load text
ref_sent = refer.Refs[this_ref_id]
for i, (sent, sent_id) in enumerate(zip(ref_sent['sentences'], ref_sent['sent_ids'])):
uniq_id = f"{this_ref_id}_{i}"
instance = '\t'.join(
[uniq_id, str(this_img_id[0]), sent['sent'], box_string, pts_string, img_base64, annot_base64,
pts_string_interpolated]) + '\n'
writer.write(instance)
if img_id not in val_test_files and split == 'train': # filtered out val/test files
combined_train_data.append(instance)
writer.close()
random.shuffle(combined_train_data)
file_name = os.path.join("datasets/finetune/refcoco+g_train_shuffled.tsv")
print("creating ", file_name)
writer = open(file_name, 'w')
writer.writelines(combined_train_data)
writer.close()