In [51]:
import os
import h5py
import json
import numpy as np
import tqdm
import itertools
import copy
from collections import defaultdict

from StructDiffuser.tokenizer import Tokenizer

In [13]:
class SemanticArrangementDataset:

 def __init__(self, data_roots, index_roots, splits, tokenizer):

 self.data_roots = data_roots
 print("data dirs:", self.data_roots)

 self.tokenizer = tokenizer

 self.arrangement_data = []
 arrangement_steps = []
 for split in splits:
 for data_root, index_root in zip(data_roots, index_roots):
 arrangement_indices_file = os.path.join(data_root, index_root, "{}_arrangement_indices_file_all.txt".format(split))
 if os.path.exists(arrangement_indices_file):
 with open(arrangement_indices_file, "r") as fh:
 arrangement_steps.extend([(os.path.join(data_root, f[0]), f[1]) for f in eval(fh.readline().strip())])
 else:
 print("{} does not exist".format(arrangement_indices_file))

 # only keep one dummy step for each rearrangement
 for filename, step_t in arrangement_steps:
 if step_t == 0:
 self.arrangement_data.append(filename)
 print("{} valid sequences".format(len(self.arrangement_data)))

 def __len__(self):
 return len(self.arrangement_data)

 def get_raw_data(self, idx):

 filename = self.arrangement_data[idx]
 h5 = h5py.File(filename, 'r')
 goal_specification = json.loads(str(np.array(h5["goal_specification"])))

 ###################################
 # preparing sentence
 struct_spec = []

 # structure parameters
 # 5 parameters
 structure_parameters = goal_specification["shape"]
 if structure_parameters["type"] == "circle" or structure_parameters["type"] == "line":
 struct_spec.append((structure_parameters["type"], "shape"))
 struct_spec.append((structure_parameters["rotation"][2], "rotation"))
 struct_spec.append((structure_parameters["position"][0], "position_x"))
 struct_spec.append((structure_parameters["position"][1], "position_y"))
 if structure_parameters["type"] == "circle":
 struct_spec.append((structure_parameters["radius"], "radius"))
 elif structure_parameters["type"] == "line":
 struct_spec.append((structure_parameters["length"] / 2.0, "radius"))
 else:
 struct_spec.append((structure_parameters["type"], "shape"))
 struct_spec.append((structure_parameters["rotation"][2], "rotation"))
 struct_spec.append((structure_parameters["position"][0], "position_x"))
 struct_spec.append((structure_parameters["position"][1], "position_y"))

 return struct_spec

In [14]:
tokenizer = Tokenizer("/home/weiyu/data_drive/data_new_objects/type_vocabs_coarse.json")

data_roots = []
index_roots = []
for shape, index in [("circle", "index_10k"), ("line", "index_10k"), ("tower", "index_10k"), ("dinner", "index_10k")]:
 data_roots.append("/home/weiyu/data_drive/data_new_objects/examples_{}_new_objects/result".format(shape))
 index_roots.append(index)

dataset = SemanticArrangementDataset(data_roots=data_roots, index_roots=index_roots, splits=["train", "valid", "test"], tokenizer=tokenizer)


Build one vacab for everything...
The vocab has 124 tokens: {'PAD': 0, 'CLS': 1, 'class:MASK': 2, 'class:Basket': 3, 'class:BeerBottle': 4, 'class:Book': 5, 'class:Bottle': 6, 'class:Bowl': 7, 'class:Calculator': 8, 'class:Candle': 9, 'class:CellPhone': 10, 'class:ComputerMouse': 11, 'class:Controller': 12, 'class:Cup': 13, 'class:Donut': 14, 'class:Fork': 15, 'class:Hammer': 16, 'class:Knife': 17, 'class:Marker': 18, 'class:MilkCarton': 19, 'class:Mug': 20, 'class:Pan': 21, 'class:Pen': 22, 'class:PillBottle': 23, 'class:Plate': 24, 'class:PowerStrip': 25, 'class:Scissors': 26, 'class:SoapBottle': 27, 'class:SodaCan': 28, 'class:Spoon': 29, 'class:Stapler': 30, 'class:Teapot': 31, 'class:VideoGameController': 32, 'class:WineBottle': 33, 'class:CanOpener': 34, 'class:Fruit': 35, 'scene:MASK': 36, 'scene:dinner': 37, 'size:MASK': 38, 'size:L': 39, 'size:M': 40, 'size:S': 41, 'color:MASK': 42, 'color:blue': 43, 'color:cyan': 44, 'color:green': 45, 'color:magenta': 46, 'color:red': 47, '

In [4]:
idxs = np.random.permutation(len(dataset))
for i in idxs[:10]:
 print("\n")
 struct_spec = dataset.get_raw_data(i)
 print(struct_spec)
 struct_word_spec = tokenizer.convert_structure_params_to_natural_language(struct_spec)
 print(struct_word_spec)

 token_idxs = np.random.permutation(len(struct_spec))
 token_idxs = token_idxs[:np.random.randint(1, len(struct_spec) + 1)]
 token_idxs = sorted(token_idxs)
 incomplete_struct_spec = [struct_spec[ti] for ti in token_idxs]

 print(incomplete_struct_spec)
 print(tokenizer.convert_structure_params_to_natural_language(incomplete_struct_spec))

 type_value_tuple = tokenizer.convert_structure_params_to_type_value_tuple(incomplete_struct_spec)
 print(type_value_tuple)



{'place_at_once': 'False', 'position': [0.4530459674902468, 0.2866384076623889, 0.011194709806729462], 'rotation': [5.101818936729106e-05, 1.362746309147995e-06, 2.145504341444197], 'type': 'tower'}
[('tower', 'shape'), (2.145504341444197, 'rotation'), (0.4530459674902468, 'position_x'), (0.2866384076623889, 'position_y')]
tower in the middle left of the table facing west
[('tower', 'shape'), (2.145504341444197, 'rotation'), (0.4530459674902468, 'position_x'), (0.2866384076623889, 'position_y')]
tower in the middle left of the table facing west
(('rotation', 'west'), ('shape', 'tower'), ('x', 'middle'), ('y', 'left'))


{'length': 0.15789473684210525, 'length_increment': 0.05, 'max_length': 1.0, 'min_length': 0.0, 'place_at_once': 'True', 'position': [0.5744088910421017, 0.0, 0.0], 'rotation': [0.0, -0.0, 0.0], 'type': 'dinner', 'uniform_space': 'False'}
[('dinner', 'shape'), (0.0, 'rotation'), (0.5744088910421017, 'position_x'), (0.0, 'position_y')]
dinner in the middle center of th

In [49]:
unique_type_value_tuples = set()
for i in tqdm.tqdm(idxs):
 struct_spec = dataset.get_raw_data(i)

 incomplete_struct_specs = []
 for L in range(1, len(struct_spec) + 1):
 for subset in itertools.combinations(struct_spec, L):
 incomplete_struct_specs.append(subset)

 # print(incomplete_struct_specs)

 type_value_tuples = []
 for incomplete_struct_spec in incomplete_struct_specs:
 type_value_tuples.append(tokenizer.convert_structure_params_to_type_value_tuple(incomplete_struct_spec))

 unique_type_value_tuples.update(type_value_tuples)

print(len(unique_type_value_tuples))

100%|██████████| 40000/40000 [00:23<00:00, 1699.94it/s]

669





In [None]:
sentence_template = [
 "Put the objects {in a [size][shape]} on the {[x][y] of} the table {facing [rotation]}.",
 "Build a [size][shape] of the [objects] on the [x][y] of the table facing [rotation].",
 "Put the [objects] on the [x][y] of the table and make a [shape] facing [rotation].",
 "Rearrange the [objects] into a [shape], and put the structure on the [x][y] of the table facing [rotation].",
 "Could you ...",
 "Please ...",
 "Pick up the objects, put them into a [size][shape], place the [shape] on the [x][y] of table, make sure the [shape] is facing [rotation]."]



Enumerate all possible combinations of types

In [31]:
import itertools
types = ["size", "shape", "x", "y", "rotation"]

type_combs = []
for L in range(1, len(types) + 1):
 for subset in itertools.combinations(types, L):
 type_combs.append(tuple(sorted(subset)))

print(len(type_combs))
print(type_combs)

31
[('size',), ('shape',), ('x',), ('y',), ('rotation',), ('shape', 'size'), ('size', 'x'), ('size', 'y'), ('rotation', 'size'), ('shape', 'x'), ('shape', 'y'), ('rotation', 'shape'), ('x', 'y'), ('rotation', 'x'), ('rotation', 'y'), ('shape', 'size', 'x'), ('shape', 'size', 'y'), ('rotation', 'shape', 'size'), ('size', 'x', 'y'), ('rotation', 'size', 'x'), ('rotation', 'size', 'y'), ('shape', 'x', 'y'), ('rotation', 'shape', 'x'), ('rotation', 'shape', 'y'), ('rotation', 'x', 'y'), ('shape', 'size', 'x', 'y'), ('rotation', 'shape', 'size', 'x'), ('rotation', 'shape', 'size', 'y'), ('rotation', 'size', 'x', 'y'), ('rotation', 'shape', 'x', 'y'), ('rotation', 'shape', 'size', 'x', 'y')]


In [46]:
sentence_template_file = "/home/weiyu/Research/intern/StructDiffuser/src/StructDiffuser/language/sentence_template.txt"

import re

type_comb_to_templates = {}
for type_comb in type_combs:
 type_comb_to_templates[type_comb] = []

with open(sentence_template_file, "r") as fh:
 for line in fh:
 line = line.strip()
 if line:
 if line[0] == "#":
 continue
 type_list = re.findall('\[[^\]]*\]', line)
 type_comb = tuple(sorted(list(set([t[1:-1] for t in type_list]))))
 print(line, type_comb)

 type_comb_to_templates[type_comb].append(line)

build a [size] shape from the objects ('size',)
put the objects in to a [size] shape ('size',)
place the objects as a [size] shape ('size',)
make a [size] shape from the objects ('size',)
rearrange the objects into a [size] structure ('size',)
build a [shape] ('shape',)
make a [shape] ('shape',)
put the objects into a [shape] ('shape',)
place the objects as a [shape] ('shape',)
pick up the objects, and place them as a [shape] ('shape',)
place the objects on the [x] of the table ('x',)
put the objects on [x] ('x',)
make a structure from the objects and place it on [x] ('x',)
on the [x] of the table, place the objects ('x',)
move the objects to the [x] ('x',)
place the objects on the [y] of the table ('y',)
put the objects on [y] ('y',)
make a structure from the objects and place it on [y] ('y',)
on the [y] of the table, place the objects ('y',)
move the objects to the [y] ('y',)
build a structure facing [rotation] ('rotation',)
make a structure from the objects and make sure it is point

In [47]:
for type_comb in type_comb_to_templates:
 if len(type_comb_to_templates[type_comb]) != 5:
 print("{} does not have 5 templates".format(type_comb))

In [58]:
template_sentences = []
type_value_tuple_to_template_sentences = defaultdict(set)
for type_value_tuple in tqdm.tqdm(list(unique_type_value_tuples)):
 type_comb = tuple(sorted([tv[0] for tv in type_value_tuple]))
 template_sentences = copy.deepcopy(type_comb_to_templates[type_comb])

 # print(type_value_tuple)
 for template_sentence in template_sentences:
 for t, v in type_value_tuple:
 template_sentence = template_sentence.replace("[{}]".format(t), v)
 # print(template_sentence)

 type_value_tuple_to_template_sentences[type_value_tuple].add(template_sentence)

# convert to list
for type_value_tuple in type_value_tuple_to_template_sentences:
 type_value_tuple_to_template_sentences[type_value_tuple] = list(type_value_tuple_to_template_sentences[type_value_tuple])

100%|██████████| 669/669 [00:00<00:00, 60546.98it/s]


In [73]:
unique_template_sentences = set()

for type_value_tuple in type_value_tuple_to_template_sentences:
 # print("\n")
 # print(type_value_tuple)
 for template_sentence in type_value_tuple_to_template_sentences[type_value_tuple]:
 # print(template_sentence)
 unique_template_sentences.add(template_sentence)

unique_template_sentences = list(unique_template_sentences)
print("{} unique template sentences".format(len(unique_template_sentences)))

3345 unique template sentences


In [72]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-MiniLM-L6-v2')

In [76]:
#Our sentences we like to encode
# sentences = ['This framework generates embeddings for each input sentence',
# 'Sentences are passed as a list of string.',
# 'The quick brown fox jumps over the lazy dog.']
#Sentences are encoded by calling model.encode()


embeddings = model.encode(unique_template_sentences)
print(embeddings.shape)

(3345, 384)


In [80]:
template_sentence_to_embedding = {}
for embedding, template_sentence in zip(embeddings, unique_template_sentences):
 template_sentence_to_embedding[template_sentence] = embedding

In [82]:
import pickle
template_sentence_data = {"template_sentence_to_embedding": template_sentence_to_embedding,
 "type_value_tuple_to_template_sentences": type_value_tuple_to_template_sentences}
with open("/home/weiyu/Research/intern/StructDiffuser/src/StructDiffuser/language/template_sentence_data.pkl", "wb") as fh:
 pickle.dump(template_sentence_data, fh)