Spaces:
Sleeping
Sleeping
import os | |
import pickle | |
import numpy as np | |
import shutil | |
mapping_dataset_directory = {'ActvityNet_hico_style_batch1':'ActivityNet_hico_batch1','charadesEgo_hico_style':'charadesego_frame', 'HAG_hico_style_new':'hag_frame','HACS_hico_style':'hacs_frame','kinetics_hico_style':'kinetics_dataset/k700-2020/train'} | |
train_pkl = "/home/lihong/chenyuanjie/Sandwich/Data/B123_train_KIN-FULL_with_node.pkl" | |
split_test_path = "/home/lihong/chenyuanjie/Sandwich/Data/B123_test_KIN-FULL_with_node.pkl" | |
with open(split_test_path, 'rb') as f: | |
data = pickle.load(f) | |
# split_test_pkl = [] | |
# action_num_dict = {} | |
# for data_idx, data_item in enumerate(data): | |
# if data_item[0] in mapping_dataset_directory.keys(): | |
# dataset = mapping_dataset_directory[data_item[0]] | |
# else: | |
# dataset = data_item[0] | |
# orig_label = data_item[2] | |
# node_labels = data_item[3] | |
# for nod_lab in node_labels: | |
# if nod_lab in action_num_dict.keys(): | |
# action_num_dict[nod_lab] += 1 | |
# else: | |
# action_num_dict[nod_lab] = 1 | |
# if data_idx %1000 == 0: | |
# print(len(data),data_idx) | |
# current_action_num_dict = {} | |
# dataset_list = [] | |
# for data_idx, data_item in enumerate(data): | |
# if data_item[0] in mapping_dataset_directory.keys(): | |
# dataset = mapping_dataset_directory[data_item[0]] | |
# else: | |
# dataset = data_item[0] | |
# image_path = '/data/xiaoqian/Images/' + dataset + '/' + data_item[1] | |
# if not os.path.isfile(image_path): | |
# if not dataset in dataset_list: | |
# dataset_list.append(dataset) | |
# continue | |
# orig_label = data_item[2] | |
# node_labels = data_item[3] | |
# flag = False | |
# for nod_lab in node_labels: | |
# if nod_lab in current_action_num_dict.keys(): | |
# if current_action_num_dict[nod_lab] < action_num_dict[nod_lab] * 0.1 and not flag: | |
# split_test_pkl.append(data_item) | |
# flag = True | |
# current_action_num_dict[nod_lab] += 1 | |
# else: | |
# split_test_pkl.append(data_item) | |
# flag = True | |
# current_action_num_dict[nod_lab] = 1 | |
# if data_idx % 1000 == 0: | |
# print(len(data),data_idx) | |
# print(action_num_dict, current_action_num_dict) | |
# with open(split_test_path, 'wb') as f: | |
# pickle.dump(split_test_pkl, f) | |
# exit() | |
## mapping node to idx | |
mapping_node_index = pickle.load(open("/home/lihong/chenyuanjie/Sandwich/Data/mapping_node_index.pkl", "rb")) | |
verbnet_topology = pickle.load(open("/home/lihong/chenyuanjie/Sandwich/Data/verbnet_topology_898.pkl", "rb")) | |
Father2Son, objects = verbnet_topology["Father2Son"], verbnet_topology["objects"] | |
objects = np.array(objects) | |
objects_290 = objects[mapping_node_index] | |
object_to_idx = {obj: idx for idx, obj in enumerate(objects_290)} | |
# filtered_objects = [obj.split("-")[0] for obj in objects_290] | |
selected_list = ["hit", "push","run","dress","drive","cook","throw","build","shake","cut"] | |
true_selected_list = ["hit-18.1","push-12","run-51.3.2","dress-41.1.1-1-1","drive-11.5","cooking-45.3","throw-17.1-1","build-26.1","shake-22.3-2","cut-21.1-1"] | |
true_selected_list_id = [object_to_idx[node] for node in true_selected_list] | |
true_selected_paired_list = ['run-51.3.2_hit-18.1', 'drive-11.5_dress-41.1.1-1-1', 'cooking-45.3_build-26.1','shake-22.3-2_cut-21.1-1'] #,'throw-17.1-1_push-12' | |
true_label = {} | |
# {'hit-18.1cut-21.1-1': 86808, 'hit-18.1drive-11.5': 14935, 'hit-18.1run-51.3.2': 34237} | |
# {'run-51.3.2run-51.3.2': 341324, 'run-51.3.2hit-18.1': 34237, 'run-51.3.2cut-21.1-1': 20389} | |
# {'dress-41.1.1-1-1dress-41.1.1-1-1': 470063, 'dress-41.1.1-1-1run-51.3.2': 63862, 'dress-41.1.1-1-1cut-21.1-1': 47727, 'dress-41.1.1-1-1drive-11.5': 24965, 'dress-41.1.1-1-1hit-18.1': 23118, 'dress-41.1.1-1-1push-12': 11982, 'dress-41.1.1-1-1cooking-45.3': 469, 'dress-41.1.1-1-1build-26.1': 306} | |
# {'drive-11.5drive-11.5': 238175, 'drive-11.5build-26.1': 15223, 'drive-11.5hit-18.1': 14935, 'drive-11.5cut-21.1-1': 30031, 'drive-11.5dress-41.1.1-1-1': 24965} | |
# {'cooking-45.3cooking-45.3': 68577, 'cooking-45.3build-26.1': 37668, 'cooking-45.3cut-21.1-1': 15072, 'cooking-45.3dress-41.1.1-1-1': 469} | |
# {'throw-17.1-1throw-17.1-1': 394887, 'throw-17.1-1hit-18.1': 92553, 'throw-17.1-1drive-11.5': 30348, 'throw-17.1-1dress-41.1.1-1-1': 97911, 'throw-17.1-1run-51.3.2': 30097, 'throw-17.1-1push-12': 20854, 'throw-17.1-1cut-21.1-1': 20714} | |
# {'build-26.1cooking-45.3': 37668, 'build-26.1build-26.1': 95743, 'build-26.1drive-11.5': 15223, 'build-26.1cut-21.1-1': 23454, 'build-26.1shake-22.3-2': 23015, 'build-26.1dress-41.1.1-1-1': 306} | |
# {'shake-22.3-2build-26.1': 23015, 'shake-22.3-2shake-22.3-2': 23015, 'shake-22.3-2cut-21.1-1': 13005} | |
# {'cut-21.1-1cut-21.1-1': 553752, 'cut-21.1-1hit-18.1': 86808, 'cut-21.1-1drive-11.5': 30031, 'cut-21.1-1dress-41.1.1-1-1': 47727, 'cut-21.1-1build-26.1': 23454, 'cut-21.1-1cooking-45.3': 15072, 'cut-21.1-1run-51.3.2': 20389, 'cut-21.1-1throw-17.1-1': 20714, 'cut-21.1-1shake-22.3-2': 13005} | |
selected_pkl = {} | |
selected_paired_pkl = {} | |
pangea_pkl = {} | |
negative_pkl = [] | |
dataset_list = [] | |
num_images = 0 | |
for data_idx, data_item in enumerate(data): | |
save_flag = False | |
if data_item[0] in mapping_dataset_directory.keys(): | |
dataset = mapping_dataset_directory[data_item[0]] | |
else: | |
dataset = data_item[0] | |
image_path = '/data/xiaoqian/Images/' + dataset + '/' + data_item[1] | |
if not os.path.isfile(image_path): | |
if not dataset in dataset_list: | |
dataset_list.append(dataset) | |
continue | |
print(data_item) | |
exit() | |
orig_label = data_item[2] | |
node_labels = data_item[3] | |
node_labels_id = [object_to_idx[node] for node in node_labels] | |
co_objects = list(set(node_labels_id).intersection(set(true_selected_list_id))) | |
if len(co_objects) > 0: | |
for sel_paired_objects in true_selected_list: | |
if object_to_idx[sel_paired_objects] in co_objects: | |
if sel_paired_objects not in selected_pkl.keys(): | |
selected_pkl[sel_paired_objects] = [data_idx] | |
save_flag = True | |
else: | |
if len(selected_pkl[sel_paired_objects]) < 2000: | |
save_flag = True | |
selected_pkl[sel_paired_objects].append(data_idx) | |
for sel_paired_objects in true_selected_paired_list: | |
sel_obj1, sel_obj2 = sel_paired_objects.split("_") | |
if object_to_idx[sel_obj1] in co_objects and object_to_idx[sel_obj2] in co_objects: | |
if sel_paired_objects not in selected_paired_pkl.keys(): | |
selected_paired_pkl[sel_paired_objects] = {} | |
if sel_paired_objects not in selected_paired_pkl[sel_paired_objects].keys(): | |
save_flag = True | |
selected_paired_pkl[sel_paired_objects][sel_paired_objects] = [data_idx] | |
else: | |
if len(selected_paired_pkl[sel_paired_objects][sel_paired_objects]) < 2000: | |
save_flag = True | |
selected_paired_pkl[sel_paired_objects][sel_paired_objects].append(data_idx) | |
elif object_to_idx[sel_obj1] in co_objects: | |
if sel_paired_objects not in selected_paired_pkl.keys(): | |
selected_paired_pkl[sel_paired_objects] = {} | |
if sel_obj1 not in selected_paired_pkl[sel_paired_objects].keys(): | |
save_flag = True | |
selected_paired_pkl[sel_paired_objects][sel_obj1] = [data_idx] | |
else: | |
if len(selected_paired_pkl[sel_paired_objects][sel_obj1]) < 2000: | |
save_flag = True | |
selected_paired_pkl[sel_paired_objects][sel_obj1].append(data_idx) | |
elif object_to_idx[sel_obj2] in co_objects: | |
if sel_paired_objects not in selected_paired_pkl.keys(): | |
selected_paired_pkl[sel_paired_objects] = {} | |
if sel_obj2 not in selected_paired_pkl[sel_paired_objects].keys(): | |
save_flag = True | |
selected_paired_pkl[sel_paired_objects][sel_obj2] = [data_idx] | |
else: | |
if len(selected_paired_pkl[sel_paired_objects][sel_obj2]) < 2000: | |
save_flag = True | |
selected_paired_pkl[sel_paired_objects][sel_obj2].append(data_idx) | |
else: | |
if len(negative_pkl) < 3000: | |
neg_flag = False | |
for sel_list in selected_list: | |
for nod_lab in node_labels: | |
if sel_list in nod_lab: | |
neg_flag = True | |
break | |
if neg_flag: | |
break | |
if not neg_flag: | |
save_flag = True | |
negative_pkl.append(data_idx) | |
if save_flag: | |
num_images += 1 | |
if not os.path.exists(os.path.dirname(os.path.join("/home/lihong/workspace/pangea/pangea", dataset, data_item[1]))): | |
os.makedirs(os.path.dirname(os.path.join("/home/lihong/workspace/pangea/pangea", dataset, data_item[1]))) | |
shutil.copy(image_path, os.path.join("/home/lihong/workspace/pangea/pangea", dataset, data_item[1])) | |
if data_idx % 1000 == 0: | |
print(len(data),data_idx) | |
for name in selected_pkl.keys(): | |
print(f"selected {name} affordance has {len(selected_pkl[name])} objects") | |
for name in selected_paired_pkl.keys(): | |
for sub_name in selected_paired_pkl[name].keys(): | |
print(f"selected {name} paired actions {sub_name} has {len(selected_paired_pkl[name][sub_name])} objects") | |
print("negative_pkl has {} objects".format(len(negative_pkl))) | |
print("num_images has {} objects".format(num_images)) | |
pangea_pkl["selected_pkl"] = selected_pkl | |
pangea_pkl["selected_paired_pkl"] = selected_paired_pkl | |
pangea_pkl["negative_pkl"] = negative_pkl | |
with open(os.path.join("/home/lihong/workspace/pangea","pangea_test.pkl"),"wb") as fp: | |
pickle.dump(pangea_pkl,fp) | |
print(dataset_list) | |
exit() | |
selected_images = {} | |
save_data = [] | |
for data_idx, data_item in enumerate(data): | |
if data_item[0] in mapping_dataset_directory.keys(): | |
dataset = mapping_dataset_directory[data_item[0]] | |
else: | |
dataset = data_item[0] | |
image_path = '/data/xiaoqian/Images/' + dataset + '/' + data_item[1] | |
if not os.path.isfile(image_path): | |
if not dataset in dataset_list: | |
dataset_list.append(dataset) | |
continue | |
orig_label = data_item[2] | |
node_labels = data_item[3] | |
if true_selected_list[9] in node_labels: | |
for i in range(len(true_selected_list)): | |
if true_selected_list[i] in node_labels: | |
if true_selected_list[9]+ true_selected_list[i] in selected_images.keys(): | |
selected_images[true_selected_list[9]+ true_selected_list[i]] += 1 | |
else: | |
selected_images[true_selected_list[9]+ true_selected_list[i]] = 1 | |
if data_idx %1000 == 0: | |
print(data_idx) | |
print(selected_images) | |