LLM_Inception / pangea.py
lihong2303
update
ba1b871
import os
import pickle
import numpy as np
import shutil
mapping_dataset_directory = {'ActvityNet_hico_style_batch1':'ActivityNet_hico_batch1','charadesEgo_hico_style':'charadesego_frame', 'HAG_hico_style_new':'hag_frame','HACS_hico_style':'hacs_frame','kinetics_hico_style':'kinetics_dataset/k700-2020/train'}
train_pkl = "/home/lihong/chenyuanjie/Sandwich/Data/B123_train_KIN-FULL_with_node.pkl"
split_test_path = "/home/lihong/chenyuanjie/Sandwich/Data/B123_test_KIN-FULL_with_node.pkl"
with open(split_test_path, 'rb') as f:
data = pickle.load(f)
# split_test_pkl = []
# action_num_dict = {}
# for data_idx, data_item in enumerate(data):
# if data_item[0] in mapping_dataset_directory.keys():
# dataset = mapping_dataset_directory[data_item[0]]
# else:
# dataset = data_item[0]
# orig_label = data_item[2]
# node_labels = data_item[3]
# for nod_lab in node_labels:
# if nod_lab in action_num_dict.keys():
# action_num_dict[nod_lab] += 1
# else:
# action_num_dict[nod_lab] = 1
# if data_idx %1000 == 0:
# print(len(data),data_idx)
# current_action_num_dict = {}
# dataset_list = []
# for data_idx, data_item in enumerate(data):
# if data_item[0] in mapping_dataset_directory.keys():
# dataset = mapping_dataset_directory[data_item[0]]
# else:
# dataset = data_item[0]
# image_path = '/data/xiaoqian/Images/' + dataset + '/' + data_item[1]
# if not os.path.isfile(image_path):
# if not dataset in dataset_list:
# dataset_list.append(dataset)
# continue
# orig_label = data_item[2]
# node_labels = data_item[3]
# flag = False
# for nod_lab in node_labels:
# if nod_lab in current_action_num_dict.keys():
# if current_action_num_dict[nod_lab] < action_num_dict[nod_lab] * 0.1 and not flag:
# split_test_pkl.append(data_item)
# flag = True
# current_action_num_dict[nod_lab] += 1
# else:
# split_test_pkl.append(data_item)
# flag = True
# current_action_num_dict[nod_lab] = 1
# if data_idx % 1000 == 0:
# print(len(data),data_idx)
# print(action_num_dict, current_action_num_dict)
# with open(split_test_path, 'wb') as f:
# pickle.dump(split_test_pkl, f)
# exit()
## mapping node to idx
mapping_node_index = pickle.load(open("/home/lihong/chenyuanjie/Sandwich/Data/mapping_node_index.pkl", "rb"))
verbnet_topology = pickle.load(open("/home/lihong/chenyuanjie/Sandwich/Data/verbnet_topology_898.pkl", "rb"))
Father2Son, objects = verbnet_topology["Father2Son"], verbnet_topology["objects"]
objects = np.array(objects)
objects_290 = objects[mapping_node_index]
object_to_idx = {obj: idx for idx, obj in enumerate(objects_290)}
# filtered_objects = [obj.split("-")[0] for obj in objects_290]
selected_list = ["hit", "push","run","dress","drive","cook","throw","build","shake","cut"]
true_selected_list = ["hit-18.1","push-12","run-51.3.2","dress-41.1.1-1-1","drive-11.5","cooking-45.3","throw-17.1-1","build-26.1","shake-22.3-2","cut-21.1-1"]
true_selected_list_id = [object_to_idx[node] for node in true_selected_list]
true_selected_paired_list = ['run-51.3.2_hit-18.1', 'drive-11.5_dress-41.1.1-1-1', 'cooking-45.3_build-26.1','shake-22.3-2_cut-21.1-1'] #,'throw-17.1-1_push-12'
true_label = {}
# {'hit-18.1cut-21.1-1': 86808, 'hit-18.1drive-11.5': 14935, 'hit-18.1run-51.3.2': 34237}
# {'run-51.3.2run-51.3.2': 341324, 'run-51.3.2hit-18.1': 34237, 'run-51.3.2cut-21.1-1': 20389}
# {'dress-41.1.1-1-1dress-41.1.1-1-1': 470063, 'dress-41.1.1-1-1run-51.3.2': 63862, 'dress-41.1.1-1-1cut-21.1-1': 47727, 'dress-41.1.1-1-1drive-11.5': 24965, 'dress-41.1.1-1-1hit-18.1': 23118, 'dress-41.1.1-1-1push-12': 11982, 'dress-41.1.1-1-1cooking-45.3': 469, 'dress-41.1.1-1-1build-26.1': 306}
# {'drive-11.5drive-11.5': 238175, 'drive-11.5build-26.1': 15223, 'drive-11.5hit-18.1': 14935, 'drive-11.5cut-21.1-1': 30031, 'drive-11.5dress-41.1.1-1-1': 24965}
# {'cooking-45.3cooking-45.3': 68577, 'cooking-45.3build-26.1': 37668, 'cooking-45.3cut-21.1-1': 15072, 'cooking-45.3dress-41.1.1-1-1': 469}
# {'throw-17.1-1throw-17.1-1': 394887, 'throw-17.1-1hit-18.1': 92553, 'throw-17.1-1drive-11.5': 30348, 'throw-17.1-1dress-41.1.1-1-1': 97911, 'throw-17.1-1run-51.3.2': 30097, 'throw-17.1-1push-12': 20854, 'throw-17.1-1cut-21.1-1': 20714}
# {'build-26.1cooking-45.3': 37668, 'build-26.1build-26.1': 95743, 'build-26.1drive-11.5': 15223, 'build-26.1cut-21.1-1': 23454, 'build-26.1shake-22.3-2': 23015, 'build-26.1dress-41.1.1-1-1': 306}
# {'shake-22.3-2build-26.1': 23015, 'shake-22.3-2shake-22.3-2': 23015, 'shake-22.3-2cut-21.1-1': 13005}
# {'cut-21.1-1cut-21.1-1': 553752, 'cut-21.1-1hit-18.1': 86808, 'cut-21.1-1drive-11.5': 30031, 'cut-21.1-1dress-41.1.1-1-1': 47727, 'cut-21.1-1build-26.1': 23454, 'cut-21.1-1cooking-45.3': 15072, 'cut-21.1-1run-51.3.2': 20389, 'cut-21.1-1throw-17.1-1': 20714, 'cut-21.1-1shake-22.3-2': 13005}
selected_pkl = {}
selected_paired_pkl = {}
pangea_pkl = {}
negative_pkl = []
dataset_list = []
num_images = 0
for data_idx, data_item in enumerate(data):
save_flag = False
if data_item[0] in mapping_dataset_directory.keys():
dataset = mapping_dataset_directory[data_item[0]]
else:
dataset = data_item[0]
image_path = '/data/xiaoqian/Images/' + dataset + '/' + data_item[1]
if not os.path.isfile(image_path):
if not dataset in dataset_list:
dataset_list.append(dataset)
continue
print(data_item)
exit()
orig_label = data_item[2]
node_labels = data_item[3]
node_labels_id = [object_to_idx[node] for node in node_labels]
co_objects = list(set(node_labels_id).intersection(set(true_selected_list_id)))
if len(co_objects) > 0:
for sel_paired_objects in true_selected_list:
if object_to_idx[sel_paired_objects] in co_objects:
if sel_paired_objects not in selected_pkl.keys():
selected_pkl[sel_paired_objects] = [data_idx]
save_flag = True
else:
if len(selected_pkl[sel_paired_objects]) < 2000:
save_flag = True
selected_pkl[sel_paired_objects].append(data_idx)
for sel_paired_objects in true_selected_paired_list:
sel_obj1, sel_obj2 = sel_paired_objects.split("_")
if object_to_idx[sel_obj1] in co_objects and object_to_idx[sel_obj2] in co_objects:
if sel_paired_objects not in selected_paired_pkl.keys():
selected_paired_pkl[sel_paired_objects] = {}
if sel_paired_objects not in selected_paired_pkl[sel_paired_objects].keys():
save_flag = True
selected_paired_pkl[sel_paired_objects][sel_paired_objects] = [data_idx]
else:
if len(selected_paired_pkl[sel_paired_objects][sel_paired_objects]) < 2000:
save_flag = True
selected_paired_pkl[sel_paired_objects][sel_paired_objects].append(data_idx)
elif object_to_idx[sel_obj1] in co_objects:
if sel_paired_objects not in selected_paired_pkl.keys():
selected_paired_pkl[sel_paired_objects] = {}
if sel_obj1 not in selected_paired_pkl[sel_paired_objects].keys():
save_flag = True
selected_paired_pkl[sel_paired_objects][sel_obj1] = [data_idx]
else:
if len(selected_paired_pkl[sel_paired_objects][sel_obj1]) < 2000:
save_flag = True
selected_paired_pkl[sel_paired_objects][sel_obj1].append(data_idx)
elif object_to_idx[sel_obj2] in co_objects:
if sel_paired_objects not in selected_paired_pkl.keys():
selected_paired_pkl[sel_paired_objects] = {}
if sel_obj2 not in selected_paired_pkl[sel_paired_objects].keys():
save_flag = True
selected_paired_pkl[sel_paired_objects][sel_obj2] = [data_idx]
else:
if len(selected_paired_pkl[sel_paired_objects][sel_obj2]) < 2000:
save_flag = True
selected_paired_pkl[sel_paired_objects][sel_obj2].append(data_idx)
else:
if len(negative_pkl) < 3000:
neg_flag = False
for sel_list in selected_list:
for nod_lab in node_labels:
if sel_list in nod_lab:
neg_flag = True
break
if neg_flag:
break
if not neg_flag:
save_flag = True
negative_pkl.append(data_idx)
if save_flag:
num_images += 1
if not os.path.exists(os.path.dirname(os.path.join("/home/lihong/workspace/pangea/pangea", dataset, data_item[1]))):
os.makedirs(os.path.dirname(os.path.join("/home/lihong/workspace/pangea/pangea", dataset, data_item[1])))
shutil.copy(image_path, os.path.join("/home/lihong/workspace/pangea/pangea", dataset, data_item[1]))
if data_idx % 1000 == 0:
print(len(data),data_idx)
for name in selected_pkl.keys():
print(f"selected {name} affordance has {len(selected_pkl[name])} objects")
for name in selected_paired_pkl.keys():
for sub_name in selected_paired_pkl[name].keys():
print(f"selected {name} paired actions {sub_name} has {len(selected_paired_pkl[name][sub_name])} objects")
print("negative_pkl has {} objects".format(len(negative_pkl)))
print("num_images has {} objects".format(num_images))
pangea_pkl["selected_pkl"] = selected_pkl
pangea_pkl["selected_paired_pkl"] = selected_paired_pkl
pangea_pkl["negative_pkl"] = negative_pkl
with open(os.path.join("/home/lihong/workspace/pangea","pangea_test.pkl"),"wb") as fp:
pickle.dump(pangea_pkl,fp)
print(dataset_list)
exit()
selected_images = {}
save_data = []
for data_idx, data_item in enumerate(data):
if data_item[0] in mapping_dataset_directory.keys():
dataset = mapping_dataset_directory[data_item[0]]
else:
dataset = data_item[0]
image_path = '/data/xiaoqian/Images/' + dataset + '/' + data_item[1]
if not os.path.isfile(image_path):
if not dataset in dataset_list:
dataset_list.append(dataset)
continue
orig_label = data_item[2]
node_labels = data_item[3]
if true_selected_list[9] in node_labels:
for i in range(len(true_selected_list)):
if true_selected_list[i] in node_labels:
if true_selected_list[9]+ true_selected_list[i] in selected_images.keys():
selected_images[true_selected_list[9]+ true_selected_list[i]] += 1
else:
selected_images[true_selected_list[9]+ true_selected_list[i]] = 1
if data_idx %1000 == 0:
print(data_idx)
print(selected_images)