{ "cells": [ { "cell_type": "code", "execution_count": 51, "outputs": [], "source": [ "import os\n", "import h5py\n", "import json\n", "import numpy as np\n", "import tqdm\n", "import itertools\n", "import copy\n", "from collections import defaultdict\n", "\n", "from StructDiffuser.tokenizer import Tokenizer" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": true, "pycharm": { "name": "#%%\n" } }, "outputs": [], "source": [ "class SemanticArrangementDataset:\n", "\n", " def __init__(self, data_roots, index_roots, splits, tokenizer):\n", "\n", " self.data_roots = data_roots\n", " print(\"data dirs:\", self.data_roots)\n", "\n", " self.tokenizer = tokenizer\n", "\n", " self.arrangement_data = []\n", " arrangement_steps = []\n", " for split in splits:\n", " for data_root, index_root in zip(data_roots, index_roots):\n", " arrangement_indices_file = os.path.join(data_root, index_root, \"{}_arrangement_indices_file_all.txt\".format(split))\n", " if os.path.exists(arrangement_indices_file):\n", " with open(arrangement_indices_file, \"r\") as fh:\n", " arrangement_steps.extend([(os.path.join(data_root, f[0]), f[1]) for f in eval(fh.readline().strip())])\n", " else:\n", " print(\"{} does not exist\".format(arrangement_indices_file))\n", "\n", " # only keep one dummy step for each rearrangement\n", " for filename, step_t in arrangement_steps:\n", " if step_t == 0:\n", " self.arrangement_data.append(filename)\n", " print(\"{} valid sequences\".format(len(self.arrangement_data)))\n", "\n", " def __len__(self):\n", " return len(self.arrangement_data)\n", "\n", " def get_raw_data(self, idx):\n", "\n", " filename = self.arrangement_data[idx]\n", " h5 = h5py.File(filename, 'r')\n", " goal_specification = json.loads(str(np.array(h5[\"goal_specification\"])))\n", "\n", " ###################################\n", " # preparing sentence\n", " struct_spec = []\n", "\n", " # structure parameters\n", " # 5 parameters\n", " structure_parameters = goal_specification[\"shape\"]\n", " if structure_parameters[\"type\"] == \"circle\" or structure_parameters[\"type\"] == \"line\":\n", " struct_spec.append((structure_parameters[\"type\"], \"shape\"))\n", " struct_spec.append((structure_parameters[\"rotation\"][2], \"rotation\"))\n", " struct_spec.append((structure_parameters[\"position\"][0], \"position_x\"))\n", " struct_spec.append((structure_parameters[\"position\"][1], \"position_y\"))\n", " if structure_parameters[\"type\"] == \"circle\":\n", " struct_spec.append((structure_parameters[\"radius\"], \"radius\"))\n", " elif structure_parameters[\"type\"] == \"line\":\n", " struct_spec.append((structure_parameters[\"length\"] / 2.0, \"radius\"))\n", " else:\n", " struct_spec.append((structure_parameters[\"type\"], \"shape\"))\n", " struct_spec.append((structure_parameters[\"rotation\"][2], \"rotation\"))\n", " struct_spec.append((structure_parameters[\"position\"][0], \"position_x\"))\n", " struct_spec.append((structure_parameters[\"position\"][1], \"position_y\"))\n", "\n", " return struct_spec" ] }, { "cell_type": "markdown", "source": [], "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } } }, { "cell_type": "code", "execution_count": 14, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "Build one vacab for everything...\n", "The vocab has 124 tokens: {'PAD': 0, 'CLS': 1, 'class:MASK': 2, 'class:Basket': 3, 'class:BeerBottle': 4, 'class:Book': 5, 'class:Bottle': 6, 'class:Bowl': 7, 'class:Calculator': 8, 'class:Candle': 9, 'class:CellPhone': 10, 'class:ComputerMouse': 11, 'class:Controller': 12, 'class:Cup': 13, 'class:Donut': 14, 'class:Fork': 15, 'class:Hammer': 16, 'class:Knife': 17, 'class:Marker': 18, 'class:MilkCarton': 19, 'class:Mug': 20, 'class:Pan': 21, 'class:Pen': 22, 'class:PillBottle': 23, 'class:Plate': 24, 'class:PowerStrip': 25, 'class:Scissors': 26, 'class:SoapBottle': 27, 'class:SodaCan': 28, 'class:Spoon': 29, 'class:Stapler': 30, 'class:Teapot': 31, 'class:VideoGameController': 32, 'class:WineBottle': 33, 'class:CanOpener': 34, 'class:Fruit': 35, 'scene:MASK': 36, 'scene:dinner': 37, 'size:MASK': 38, 'size:L': 39, 'size:M': 40, 'size:S': 41, 'color:MASK': 42, 'color:blue': 43, 'color:cyan': 44, 'color:green': 45, 'color:magenta': 46, 'color:red': 47, 'color:yellow': 48, 'material:MASK': 49, 'material:glass': 50, 'material:metal': 51, 'material:plastic': 52, 'radius:MASK': 53, 'radius:less': 54, 'radius:greater': 55, 'radius:equal': 56, 'radius:0': 57, 'radius:1': 58, 'radius:2': 59, 'position_x:MASK': 60, 'position_x:less': 61, 'position_x:greater': 62, 'position_x:equal': 63, 'position_x:0': 64, 'position_x:1': 65, 'position_x:2': 66, 'position_y:MASK': 67, 'position_y:less': 68, 'position_y:greater': 69, 'position_y:equal': 70, 'position_y:0': 71, 'position_y:1': 72, 'position_y:2': 73, 'rotation:MASK': 74, 'rotation:less': 75, 'rotation:greater': 76, 'rotation:equal': 77, 'rotation:0': 78, 'rotation:1': 79, 'rotation:2': 80, 'rotation:3': 81, 'height:MASK': 82, 'height:less': 83, 'height:greater': 84, 'height:equal': 85, 'height:0': 86, 'height:1': 87, 'height:2': 88, 'height:3': 89, 'height:4': 90, 'height:5': 91, 'height:6': 92, 'height:7': 93, 'height:8': 94, 'height:9': 95, 'volumn:MASK': 96, 'volumn:less': 97, 'volumn:greater': 98, 'volumn:equal': 99, 'volumn:0': 100, 'volumn:1': 101, 'volumn:2': 102, 'volumn:3': 103, 'volumn:4': 104, 'volumn:5': 105, 'volumn:6': 106, 'volumn:7': 107, 'volumn:8': 108, 'volumn:9': 109, 'uniform_angle:MASK': 110, 'uniform_angle:False': 111, 'uniform_angle:True': 112, 'face_center:MASK': 113, 'face_center:False': 114, 'face_center:True': 115, 'angle_ratio:MASK': 116, 'angle_ratio:0.5': 117, 'angle_ratio:1.0': 118, 'shape:MASK': 119, 'shape:circle': 120, 'shape:line': 121, 'shape:tower': 122, 'shape:dinner': 123}\n", "\n", "Build vocabs for object position\n", "The obj_x vocab has 202 tokens: {'PAD': 0, 'MASK': 1, '0': 2, '1': 3, '2': 4, '3': 5, '4': 6, '5': 7, '6': 8, '7': 9, '8': 10, '9': 11, '10': 12, '11': 13, '12': 14, '13': 15, '14': 16, '15': 17, '16': 18, '17': 19, '18': 20, '19': 21, '20': 22, '21': 23, '22': 24, '23': 25, '24': 26, '25': 27, '26': 28, '27': 29, '28': 30, '29': 31, '30': 32, '31': 33, '32': 34, '33': 35, '34': 36, '35': 37, '36': 38, '37': 39, '38': 40, '39': 41, '40': 42, '41': 43, '42': 44, '43': 45, '44': 46, '45': 47, '46': 48, '47': 49, '48': 50, '49': 51, '50': 52, '51': 53, '52': 54, '53': 55, '54': 56, '55': 57, '56': 58, '57': 59, '58': 60, '59': 61, '60': 62, '61': 63, '62': 64, '63': 65, '64': 66, '65': 67, '66': 68, '67': 69, '68': 70, '69': 71, '70': 72, '71': 73, '72': 74, '73': 75, '74': 76, '75': 77, '76': 78, '77': 79, '78': 80, '79': 81, '80': 82, '81': 83, '82': 84, '83': 85, '84': 86, '85': 87, '86': 88, '87': 89, '88': 90, '89': 91, '90': 92, '91': 93, '92': 94, '93': 95, '94': 96, '95': 97, '96': 98, '97': 99, '98': 100, '99': 101, '100': 102, '101': 103, '102': 104, '103': 105, '104': 106, '105': 107, '106': 108, '107': 109, '108': 110, '109': 111, '110': 112, '111': 113, '112': 114, '113': 115, '114': 116, '115': 117, '116': 118, '117': 119, '118': 120, '119': 121, '120': 122, '121': 123, '122': 124, '123': 125, '124': 126, '125': 127, '126': 128, '127': 129, '128': 130, '129': 131, '130': 132, '131': 133, '132': 134, '133': 135, '134': 136, '135': 137, '136': 138, '137': 139, '138': 140, '139': 141, '140': 142, '141': 143, '142': 144, '143': 145, '144': 146, '145': 147, '146': 148, '147': 149, '148': 150, '149': 151, '150': 152, '151': 153, '152': 154, '153': 155, '154': 156, '155': 157, '156': 158, '157': 159, '158': 160, '159': 161, '160': 162, '161': 163, '162': 164, '163': 165, '164': 166, '165': 167, '166': 168, '167': 169, '168': 170, '169': 171, '170': 172, '171': 173, '172': 174, '173': 175, '174': 176, '175': 177, '176': 178, '177': 179, '178': 180, '179': 181, '180': 182, '181': 183, '182': 184, '183': 185, '184': 186, '185': 187, '186': 188, '187': 189, '188': 190, '189': 191, '190': 192, '191': 193, '192': 194, '193': 195, '194': 196, '195': 197, '196': 198, '197': 199, '198': 200, '199': 201}\n", "The obj_y vocab has 202 tokens: {'PAD': 0, 'MASK': 1, '0': 2, '1': 3, '2': 4, '3': 5, '4': 6, '5': 7, '6': 8, '7': 9, '8': 10, '9': 11, '10': 12, '11': 13, '12': 14, '13': 15, '14': 16, '15': 17, '16': 18, '17': 19, '18': 20, '19': 21, '20': 22, '21': 23, '22': 24, '23': 25, '24': 26, '25': 27, '26': 28, '27': 29, '28': 30, '29': 31, '30': 32, '31': 33, '32': 34, '33': 35, '34': 36, '35': 37, '36': 38, '37': 39, '38': 40, '39': 41, '40': 42, '41': 43, '42': 44, '43': 45, '44': 46, '45': 47, '46': 48, '47': 49, '48': 50, '49': 51, '50': 52, '51': 53, '52': 54, '53': 55, '54': 56, '55': 57, '56': 58, '57': 59, '58': 60, '59': 61, '60': 62, '61': 63, '62': 64, '63': 65, '64': 66, '65': 67, '66': 68, '67': 69, '68': 70, '69': 71, '70': 72, '71': 73, '72': 74, '73': 75, '74': 76, '75': 77, '76': 78, '77': 79, '78': 80, '79': 81, '80': 82, '81': 83, '82': 84, '83': 85, '84': 86, '85': 87, '86': 88, '87': 89, '88': 90, '89': 91, '90': 92, '91': 93, '92': 94, '93': 95, '94': 96, '95': 97, '96': 98, '97': 99, '98': 100, '99': 101, '100': 102, '101': 103, '102': 104, '103': 105, '104': 106, '105': 107, '106': 108, '107': 109, '108': 110, '109': 111, '110': 112, '111': 113, '112': 114, '113': 115, '114': 116, '115': 117, '116': 118, '117': 119, '118': 120, '119': 121, '120': 122, '121': 123, '122': 124, '123': 125, '124': 126, '125': 127, '126': 128, '127': 129, '128': 130, '129': 131, '130': 132, '131': 133, '132': 134, '133': 135, '134': 136, '135': 137, '136': 138, '137': 139, '138': 140, '139': 141, '140': 142, '141': 143, '142': 144, '143': 145, '144': 146, '145': 147, '146': 148, '147': 149, '148': 150, '149': 151, '150': 152, '151': 153, '152': 154, '153': 155, '154': 156, '155': 157, '156': 158, '157': 159, '158': 160, '159': 161, '160': 162, '161': 163, '162': 164, '163': 165, '164': 166, '165': 167, '166': 168, '167': 169, '168': 170, '169': 171, '170': 172, '171': 173, '172': 174, '173': 175, '174': 176, '175': 177, '176': 178, '177': 179, '178': 180, '179': 181, '180': 182, '181': 183, '182': 184, '183': 185, '184': 186, '185': 187, '186': 188, '187': 189, '188': 190, '189': 191, '190': 192, '191': 193, '192': 194, '193': 195, '194': 196, '195': 197, '196': 198, '197': 199, '198': 200, '199': 201}\n", "The obj_z vocab has 202 tokens: {'PAD': 0, 'MASK': 1, '0': 2, '1': 3, '2': 4, '3': 5, '4': 6, '5': 7, '6': 8, '7': 9, '8': 10, '9': 11, '10': 12, '11': 13, '12': 14, '13': 15, '14': 16, '15': 17, '16': 18, '17': 19, '18': 20, '19': 21, '20': 22, '21': 23, '22': 24, '23': 25, '24': 26, '25': 27, '26': 28, '27': 29, '28': 30, '29': 31, '30': 32, '31': 33, '32': 34, '33': 35, '34': 36, '35': 37, '36': 38, '37': 39, '38': 40, '39': 41, '40': 42, '41': 43, '42': 44, '43': 45, '44': 46, '45': 47, '46': 48, '47': 49, '48': 50, '49': 51, '50': 52, '51': 53, '52': 54, '53': 55, '54': 56, '55': 57, '56': 58, '57': 59, '58': 60, '59': 61, '60': 62, '61': 63, '62': 64, '63': 65, '64': 66, '65': 67, '66': 68, '67': 69, '68': 70, '69': 71, '70': 72, '71': 73, '72': 74, '73': 75, '74': 76, '75': 77, '76': 78, '77': 79, '78': 80, '79': 81, '80': 82, '81': 83, '82': 84, '83': 85, '84': 86, '85': 87, '86': 88, '87': 89, '88': 90, '89': 91, '90': 92, '91': 93, '92': 94, '93': 95, '94': 96, '95': 97, '96': 98, '97': 99, '98': 100, '99': 101, '100': 102, '101': 103, '102': 104, '103': 105, '104': 106, '105': 107, '106': 108, '107': 109, '108': 110, '109': 111, '110': 112, '111': 113, '112': 114, '113': 115, '114': 116, '115': 117, '116': 118, '117': 119, '118': 120, '119': 121, '120': 122, '121': 123, '122': 124, '123': 125, '124': 126, '125': 127, '126': 128, '127': 129, '128': 130, '129': 131, '130': 132, '131': 133, '132': 134, '133': 135, '134': 136, '135': 137, '136': 138, '137': 139, '138': 140, '139': 141, '140': 142, '141': 143, '142': 144, '143': 145, '144': 146, '145': 147, '146': 148, '147': 149, '148': 150, '149': 151, '150': 152, '151': 153, '152': 154, '153': 155, '154': 156, '155': 157, '156': 158, '157': 159, '158': 160, '159': 161, '160': 162, '161': 163, '162': 164, '163': 165, '164': 166, '165': 167, '166': 168, '167': 169, '168': 170, '169': 171, '170': 172, '171': 173, '172': 174, '173': 175, '174': 176, '175': 177, '176': 178, '177': 179, '178': 180, '179': 181, '180': 182, '181': 183, '182': 184, '183': 185, '184': 186, '185': 187, '186': 188, '187': 189, '188': 190, '189': 191, '190': 192, '191': 193, '192': 194, '193': 195, '194': 196, '195': 197, '196': 198, '197': 199, '198': 200, '199': 201}\n", "The obj_rr vocab has 362 tokens: {'PAD': 0, 'MASK': 1, '0': 2, '1': 3, '2': 4, '3': 5, '4': 6, '5': 7, '6': 8, '7': 9, '8': 10, '9': 11, '10': 12, '11': 13, '12': 14, '13': 15, '14': 16, '15': 17, '16': 18, '17': 19, '18': 20, '19': 21, '20': 22, '21': 23, '22': 24, '23': 25, '24': 26, '25': 27, '26': 28, '27': 29, '28': 30, '29': 31, '30': 32, '31': 33, '32': 34, '33': 35, '34': 36, '35': 37, '36': 38, '37': 39, '38': 40, '39': 41, '40': 42, '41': 43, '42': 44, '43': 45, '44': 46, '45': 47, '46': 48, '47': 49, '48': 50, '49': 51, '50': 52, '51': 53, '52': 54, '53': 55, '54': 56, '55': 57, '56': 58, '57': 59, '58': 60, '59': 61, '60': 62, '61': 63, '62': 64, '63': 65, '64': 66, '65': 67, '66': 68, '67': 69, '68': 70, '69': 71, '70': 72, '71': 73, '72': 74, '73': 75, '74': 76, '75': 77, '76': 78, '77': 79, '78': 80, '79': 81, '80': 82, '81': 83, '82': 84, '83': 85, '84': 86, '85': 87, '86': 88, '87': 89, '88': 90, '89': 91, '90': 92, '91': 93, '92': 94, '93': 95, '94': 96, '95': 97, '96': 98, '97': 99, '98': 100, '99': 101, '100': 102, '101': 103, '102': 104, '103': 105, '104': 106, '105': 107, '106': 108, '107': 109, '108': 110, '109': 111, '110': 112, '111': 113, '112': 114, '113': 115, '114': 116, '115': 117, '116': 118, '117': 119, '118': 120, '119': 121, '120': 122, '121': 123, '122': 124, '123': 125, '124': 126, '125': 127, '126': 128, '127': 129, '128': 130, '129': 131, '130': 132, '131': 133, '132': 134, '133': 135, '134': 136, '135': 137, '136': 138, '137': 139, '138': 140, '139': 141, '140': 142, '141': 143, '142': 144, '143': 145, '144': 146, '145': 147, '146': 148, '147': 149, '148': 150, '149': 151, '150': 152, '151': 153, '152': 154, '153': 155, '154': 156, '155': 157, '156': 158, '157': 159, '158': 160, '159': 161, '160': 162, '161': 163, '162': 164, '163': 165, '164': 166, '165': 167, '166': 168, '167': 169, '168': 170, '169': 171, '170': 172, '171': 173, '172': 174, '173': 175, '174': 176, '175': 177, '176': 178, '177': 179, '178': 180, '179': 181, '180': 182, '181': 183, '182': 184, '183': 185, '184': 186, '185': 187, '186': 188, '187': 189, '188': 190, '189': 191, '190': 192, '191': 193, '192': 194, '193': 195, '194': 196, '195': 197, '196': 198, '197': 199, '198': 200, '199': 201, '200': 202, '201': 203, '202': 204, '203': 205, '204': 206, '205': 207, '206': 208, '207': 209, '208': 210, '209': 211, '210': 212, '211': 213, '212': 214, '213': 215, '214': 216, '215': 217, '216': 218, '217': 219, '218': 220, '219': 221, '220': 222, '221': 223, '222': 224, '223': 225, '224': 226, '225': 227, '226': 228, '227': 229, '228': 230, '229': 231, '230': 232, '231': 233, '232': 234, '233': 235, '234': 236, '235': 237, '236': 238, '237': 239, '238': 240, '239': 241, '240': 242, '241': 243, '242': 244, '243': 245, '244': 246, '245': 247, '246': 248, '247': 249, '248': 250, '249': 251, '250': 252, '251': 253, '252': 254, '253': 255, '254': 256, '255': 257, '256': 258, '257': 259, '258': 260, '259': 261, '260': 262, '261': 263, '262': 264, '263': 265, '264': 266, '265': 267, '266': 268, '267': 269, '268': 270, '269': 271, '270': 272, '271': 273, '272': 274, '273': 275, '274': 276, '275': 277, '276': 278, '277': 279, '278': 280, '279': 281, '280': 282, '281': 283, '282': 284, '283': 285, '284': 286, '285': 287, '286': 288, '287': 289, '288': 290, '289': 291, '290': 292, '291': 293, '292': 294, '293': 295, '294': 296, '295': 297, '296': 298, '297': 299, '298': 300, '299': 301, '300': 302, '301': 303, '302': 304, '303': 305, '304': 306, '305': 307, '306': 308, '307': 309, '308': 310, '309': 311, '310': 312, '311': 313, '312': 314, '313': 315, '314': 316, '315': 317, '316': 318, '317': 319, '318': 320, '319': 321, '320': 322, '321': 323, '322': 324, '323': 325, '324': 326, '325': 327, '326': 328, '327': 329, '328': 330, '329': 331, '330': 332, '331': 333, '332': 334, '333': 335, '334': 336, '335': 337, '336': 338, '337': 339, '338': 340, '339': 341, '340': 342, '341': 343, '342': 344, '343': 345, '344': 346, '345': 347, '346': 348, '347': 349, '348': 350, '349': 351, '350': 352, '351': 353, '352': 354, '353': 355, '354': 356, '355': 357, '356': 358, '357': 359, '358': 360, '359': 361}\n", "The obj_rp vocab has 362 tokens: {'PAD': 0, 'MASK': 1, '0': 2, '1': 3, '2': 4, '3': 5, '4': 6, '5': 7, '6': 8, '7': 9, '8': 10, '9': 11, '10': 12, '11': 13, '12': 14, '13': 15, '14': 16, '15': 17, '16': 18, '17': 19, '18': 20, '19': 21, '20': 22, '21': 23, '22': 24, '23': 25, '24': 26, '25': 27, '26': 28, '27': 29, '28': 30, '29': 31, '30': 32, '31': 33, '32': 34, '33': 35, '34': 36, '35': 37, '36': 38, '37': 39, '38': 40, '39': 41, '40': 42, '41': 43, '42': 44, '43': 45, '44': 46, '45': 47, '46': 48, '47': 49, '48': 50, '49': 51, '50': 52, '51': 53, '52': 54, '53': 55, '54': 56, '55': 57, '56': 58, '57': 59, '58': 60, '59': 61, '60': 62, '61': 63, '62': 64, '63': 65, '64': 66, '65': 67, '66': 68, '67': 69, '68': 70, '69': 71, '70': 72, '71': 73, '72': 74, '73': 75, '74': 76, '75': 77, '76': 78, '77': 79, '78': 80, '79': 81, '80': 82, '81': 83, '82': 84, '83': 85, '84': 86, '85': 87, '86': 88, '87': 89, '88': 90, '89': 91, '90': 92, '91': 93, '92': 94, '93': 95, '94': 96, '95': 97, '96': 98, '97': 99, '98': 100, '99': 101, '100': 102, '101': 103, '102': 104, '103': 105, '104': 106, '105': 107, '106': 108, '107': 109, '108': 110, '109': 111, '110': 112, '111': 113, '112': 114, '113': 115, '114': 116, '115': 117, '116': 118, '117': 119, '118': 120, '119': 121, '120': 122, '121': 123, '122': 124, '123': 125, '124': 126, '125': 127, '126': 128, '127': 129, '128': 130, '129': 131, '130': 132, '131': 133, '132': 134, '133': 135, '134': 136, '135': 137, '136': 138, '137': 139, '138': 140, '139': 141, '140': 142, '141': 143, '142': 144, '143': 145, '144': 146, '145': 147, '146': 148, '147': 149, '148': 150, '149': 151, '150': 152, '151': 153, '152': 154, '153': 155, '154': 156, '155': 157, '156': 158, '157': 159, '158': 160, '159': 161, '160': 162, '161': 163, '162': 164, '163': 165, '164': 166, '165': 167, '166': 168, '167': 169, '168': 170, '169': 171, '170': 172, '171': 173, '172': 174, '173': 175, '174': 176, '175': 177, '176': 178, '177': 179, '178': 180, '179': 181, '180': 182, '181': 183, '182': 184, '183': 185, '184': 186, '185': 187, '186': 188, '187': 189, '188': 190, '189': 191, '190': 192, '191': 193, '192': 194, '193': 195, '194': 196, '195': 197, '196': 198, '197': 199, '198': 200, '199': 201, '200': 202, '201': 203, '202': 204, '203': 205, '204': 206, '205': 207, '206': 208, '207': 209, '208': 210, '209': 211, '210': 212, '211': 213, '212': 214, '213': 215, '214': 216, '215': 217, '216': 218, '217': 219, '218': 220, '219': 221, '220': 222, '221': 223, '222': 224, '223': 225, '224': 226, '225': 227, '226': 228, '227': 229, '228': 230, '229': 231, '230': 232, '231': 233, '232': 234, '233': 235, '234': 236, '235': 237, '236': 238, '237': 239, '238': 240, '239': 241, '240': 242, '241': 243, '242': 244, '243': 245, '244': 246, '245': 247, '246': 248, '247': 249, '248': 250, '249': 251, '250': 252, '251': 253, '252': 254, '253': 255, '254': 256, '255': 257, '256': 258, '257': 259, '258': 260, '259': 261, '260': 262, '261': 263, '262': 264, '263': 265, '264': 266, '265': 267, '266': 268, '267': 269, '268': 270, '269': 271, '270': 272, '271': 273, '272': 274, '273': 275, '274': 276, '275': 277, '276': 278, '277': 279, '278': 280, '279': 281, '280': 282, '281': 283, '282': 284, '283': 285, '284': 286, '285': 287, '286': 288, '287': 289, '288': 290, '289': 291, '290': 292, '291': 293, '292': 294, '293': 295, '294': 296, '295': 297, '296': 298, '297': 299, '298': 300, '299': 301, '300': 302, '301': 303, '302': 304, '303': 305, '304': 306, '305': 307, '306': 308, '307': 309, '308': 310, '309': 311, '310': 312, '311': 313, '312': 314, '313': 315, '314': 316, '315': 317, '316': 318, '317': 319, '318': 320, '319': 321, '320': 322, '321': 323, '322': 324, '323': 325, '324': 326, '325': 327, '326': 328, '327': 329, '328': 330, '329': 331, '330': 332, '331': 333, '332': 334, '333': 335, '334': 336, '335': 337, '336': 338, '337': 339, '338': 340, '339': 341, '340': 342, '341': 343, '342': 344, '343': 345, '344': 346, '345': 347, '346': 348, '347': 349, '348': 350, '349': 351, '350': 352, '351': 353, '352': 354, '353': 355, '354': 356, '355': 357, '356': 358, '357': 359, '358': 360, '359': 361}\n", "The obj_ry vocab has 362 tokens: {'PAD': 0, 'MASK': 1, '0': 2, '1': 3, '2': 4, '3': 5, '4': 6, '5': 7, '6': 8, '7': 9, '8': 10, '9': 11, '10': 12, '11': 13, '12': 14, '13': 15, '14': 16, '15': 17, '16': 18, '17': 19, '18': 20, '19': 21, '20': 22, '21': 23, '22': 24, '23': 25, '24': 26, '25': 27, '26': 28, '27': 29, '28': 30, '29': 31, '30': 32, '31': 33, '32': 34, '33': 35, '34': 36, '35': 37, '36': 38, '37': 39, '38': 40, '39': 41, '40': 42, '41': 43, '42': 44, '43': 45, '44': 46, '45': 47, '46': 48, '47': 49, '48': 50, '49': 51, '50': 52, '51': 53, '52': 54, '53': 55, '54': 56, '55': 57, '56': 58, '57': 59, '58': 60, '59': 61, '60': 62, '61': 63, '62': 64, '63': 65, '64': 66, '65': 67, '66': 68, '67': 69, '68': 70, '69': 71, '70': 72, '71': 73, '72': 74, '73': 75, '74': 76, '75': 77, '76': 78, '77': 79, '78': 80, '79': 81, '80': 82, '81': 83, '82': 84, '83': 85, '84': 86, '85': 87, '86': 88, '87': 89, '88': 90, '89': 91, '90': 92, '91': 93, '92': 94, '93': 95, '94': 96, '95': 97, '96': 98, '97': 99, '98': 100, '99': 101, '100': 102, '101': 103, '102': 104, '103': 105, '104': 106, '105': 107, '106': 108, '107': 109, '108': 110, '109': 111, '110': 112, '111': 113, '112': 114, '113': 115, '114': 116, '115': 117, '116': 118, '117': 119, '118': 120, '119': 121, '120': 122, '121': 123, '122': 124, '123': 125, '124': 126, '125': 127, '126': 128, '127': 129, '128': 130, '129': 131, '130': 132, '131': 133, '132': 134, '133': 135, '134': 136, '135': 137, '136': 138, '137': 139, '138': 140, '139': 141, '140': 142, '141': 143, '142': 144, '143': 145, '144': 146, '145': 147, '146': 148, '147': 149, '148': 150, '149': 151, '150': 152, '151': 153, '152': 154, '153': 155, '154': 156, '155': 157, '156': 158, '157': 159, '158': 160, '159': 161, '160': 162, '161': 163, '162': 164, '163': 165, '164': 166, '165': 167, '166': 168, '167': 169, '168': 170, '169': 171, '170': 172, '171': 173, '172': 174, '173': 175, '174': 176, '175': 177, '176': 178, '177': 179, '178': 180, '179': 181, '180': 182, '181': 183, '182': 184, '183': 185, '184': 186, '185': 187, '186': 188, '187': 189, '188': 190, '189': 191, '190': 192, '191': 193, '192': 194, '193': 195, '194': 196, '195': 197, '196': 198, '197': 199, '198': 200, '199': 201, '200': 202, '201': 203, '202': 204, '203': 205, '204': 206, '205': 207, '206': 208, '207': 209, '208': 210, '209': 211, '210': 212, '211': 213, '212': 214, '213': 215, '214': 216, '215': 217, '216': 218, '217': 219, '218': 220, '219': 221, '220': 222, '221': 223, '222': 224, '223': 225, '224': 226, '225': 227, '226': 228, '227': 229, '228': 230, '229': 231, '230': 232, '231': 233, '232': 234, '233': 235, '234': 236, '235': 237, '236': 238, '237': 239, '238': 240, '239': 241, '240': 242, '241': 243, '242': 244, '243': 245, '244': 246, '245': 247, '246': 248, '247': 249, '248': 250, '249': 251, '250': 252, '251': 253, '252': 254, '253': 255, '254': 256, '255': 257, '256': 258, '257': 259, '258': 260, '259': 261, '260': 262, '261': 263, '262': 264, '263': 265, '264': 266, '265': 267, '266': 268, '267': 269, '268': 270, '269': 271, '270': 272, '271': 273, '272': 274, '273': 275, '274': 276, '275': 277, '276': 278, '277': 279, '278': 280, '279': 281, '280': 282, '281': 283, '282': 284, '283': 285, '284': 286, '285': 287, '286': 288, '287': 289, '288': 290, '289': 291, '290': 292, '291': 293, '292': 294, '293': 295, '294': 296, '295': 297, '296': 298, '297': 299, '298': 300, '299': 301, '300': 302, '301': 303, '302': 304, '303': 305, '304': 306, '305': 307, '306': 308, '307': 309, '308': 310, '309': 311, '310': 312, '311': 313, '312': 314, '313': 315, '314': 316, '315': 317, '316': 318, '317': 319, '318': 320, '319': 321, '320': 322, '321': 323, '322': 324, '323': 325, '324': 326, '325': 327, '326': 328, '327': 329, '328': 330, '329': 331, '330': 332, '331': 333, '332': 334, '333': 335, '334': 336, '335': 337, '336': 338, '337': 339, '338': 340, '339': 341, '340': 342, '341': 343, '342': 344, '343': 345, '344': 346, '345': 347, '346': 348, '347': 349, '348': 350, '349': 351, '350': 352, '351': 353, '352': 354, '353': 355, '354': 356, '355': 357, '356': 358, '357': 359, '358': 360, '359': 361}\n", "The struct_x vocab has 202 tokens: {'PAD': 0, 'MASK': 1, '0': 2, '1': 3, '2': 4, '3': 5, '4': 6, '5': 7, '6': 8, '7': 9, '8': 10, '9': 11, '10': 12, '11': 13, '12': 14, '13': 15, '14': 16, '15': 17, '16': 18, '17': 19, '18': 20, '19': 21, '20': 22, '21': 23, '22': 24, '23': 25, '24': 26, '25': 27, '26': 28, '27': 29, '28': 30, '29': 31, '30': 32, '31': 33, '32': 34, '33': 35, '34': 36, '35': 37, '36': 38, '37': 39, '38': 40, '39': 41, '40': 42, '41': 43, '42': 44, '43': 45, '44': 46, '45': 47, '46': 48, '47': 49, '48': 50, '49': 51, '50': 52, '51': 53, '52': 54, '53': 55, '54': 56, '55': 57, '56': 58, '57': 59, '58': 60, '59': 61, '60': 62, '61': 63, '62': 64, '63': 65, '64': 66, '65': 67, '66': 68, '67': 69, '68': 70, '69': 71, '70': 72, '71': 73, '72': 74, '73': 75, '74': 76, '75': 77, '76': 78, '77': 79, '78': 80, '79': 81, '80': 82, '81': 83, '82': 84, '83': 85, '84': 86, '85': 87, '86': 88, '87': 89, '88': 90, '89': 91, '90': 92, '91': 93, '92': 94, '93': 95, '94': 96, '95': 97, '96': 98, '97': 99, '98': 100, '99': 101, '100': 102, '101': 103, '102': 104, '103': 105, '104': 106, '105': 107, '106': 108, '107': 109, '108': 110, '109': 111, '110': 112, '111': 113, '112': 114, '113': 115, '114': 116, '115': 117, '116': 118, '117': 119, '118': 120, '119': 121, '120': 122, '121': 123, '122': 124, '123': 125, '124': 126, '125': 127, '126': 128, '127': 129, '128': 130, '129': 131, '130': 132, '131': 133, '132': 134, '133': 135, '134': 136, '135': 137, '136': 138, '137': 139, '138': 140, '139': 141, '140': 142, '141': 143, '142': 144, '143': 145, '144': 146, '145': 147, '146': 148, '147': 149, '148': 150, '149': 151, '150': 152, '151': 153, '152': 154, '153': 155, '154': 156, '155': 157, '156': 158, '157': 159, '158': 160, '159': 161, '160': 162, '161': 163, '162': 164, '163': 165, '164': 166, '165': 167, '166': 168, '167': 169, '168': 170, '169': 171, '170': 172, '171': 173, '172': 174, '173': 175, '174': 176, '175': 177, '176': 178, '177': 179, '178': 180, '179': 181, '180': 182, '181': 183, '182': 184, '183': 185, '184': 186, '185': 187, '186': 188, '187': 189, '188': 190, '189': 191, '190': 192, '191': 193, '192': 194, '193': 195, '194': 196, '195': 197, '196': 198, '197': 199, '198': 200, '199': 201}\n", "The struct_y vocab has 202 tokens: {'PAD': 0, 'MASK': 1, '0': 2, '1': 3, '2': 4, '3': 5, '4': 6, '5': 7, '6': 8, '7': 9, '8': 10, '9': 11, '10': 12, '11': 13, '12': 14, '13': 15, '14': 16, '15': 17, '16': 18, '17': 19, '18': 20, '19': 21, '20': 22, '21': 23, '22': 24, '23': 25, '24': 26, '25': 27, '26': 28, '27': 29, '28': 30, '29': 31, '30': 32, '31': 33, '32': 34, '33': 35, '34': 36, '35': 37, '36': 38, '37': 39, '38': 40, '39': 41, '40': 42, '41': 43, '42': 44, '43': 45, '44': 46, '45': 47, '46': 48, '47': 49, '48': 50, '49': 51, '50': 52, '51': 53, '52': 54, '53': 55, '54': 56, '55': 57, '56': 58, '57': 59, '58': 60, '59': 61, '60': 62, '61': 63, '62': 64, '63': 65, '64': 66, '65': 67, '66': 68, '67': 69, '68': 70, '69': 71, '70': 72, '71': 73, '72': 74, '73': 75, '74': 76, '75': 77, '76': 78, '77': 79, '78': 80, '79': 81, '80': 82, '81': 83, '82': 84, '83': 85, '84': 86, '85': 87, '86': 88, '87': 89, '88': 90, '89': 91, '90': 92, '91': 93, '92': 94, '93': 95, '94': 96, '95': 97, '96': 98, '97': 99, '98': 100, '99': 101, '100': 102, '101': 103, '102': 104, '103': 105, '104': 106, '105': 107, '106': 108, '107': 109, '108': 110, '109': 111, '110': 112, '111': 113, '112': 114, '113': 115, '114': 116, '115': 117, '116': 118, '117': 119, '118': 120, '119': 121, '120': 122, '121': 123, '122': 124, '123': 125, '124': 126, '125': 127, '126': 128, '127': 129, '128': 130, '129': 131, '130': 132, '131': 133, '132': 134, '133': 135, '134': 136, '135': 137, '136': 138, '137': 139, '138': 140, '139': 141, '140': 142, '141': 143, '142': 144, '143': 145, '144': 146, '145': 147, '146': 148, '147': 149, '148': 150, '149': 151, '150': 152, '151': 153, '152': 154, '153': 155, '154': 156, '155': 157, '156': 158, '157': 159, '158': 160, '159': 161, '160': 162, '161': 163, '162': 164, '163': 165, '164': 166, '165': 167, '166': 168, '167': 169, '168': 170, '169': 171, '170': 172, '171': 173, '172': 174, '173': 175, '174': 176, '175': 177, '176': 178, '177': 179, '178': 180, '179': 181, '180': 182, '181': 183, '182': 184, '183': 185, '184': 186, '185': 187, '186': 188, '187': 189, '188': 190, '189': 191, '190': 192, '191': 193, '192': 194, '193': 195, '194': 196, '195': 197, '196': 198, '197': 199, '198': 200, '199': 201}\n", "The struct_z vocab has 202 tokens: {'PAD': 0, 'MASK': 1, '0': 2, '1': 3, '2': 4, '3': 5, '4': 6, '5': 7, '6': 8, '7': 9, '8': 10, '9': 11, '10': 12, '11': 13, '12': 14, '13': 15, '14': 16, '15': 17, '16': 18, '17': 19, '18': 20, '19': 21, '20': 22, '21': 23, '22': 24, '23': 25, '24': 26, '25': 27, '26': 28, '27': 29, '28': 30, '29': 31, '30': 32, '31': 33, '32': 34, '33': 35, '34': 36, '35': 37, '36': 38, '37': 39, '38': 40, '39': 41, '40': 42, '41': 43, '42': 44, '43': 45, '44': 46, '45': 47, '46': 48, '47': 49, '48': 50, '49': 51, '50': 52, '51': 53, '52': 54, '53': 55, '54': 56, '55': 57, '56': 58, '57': 59, '58': 60, '59': 61, '60': 62, '61': 63, '62': 64, '63': 65, '64': 66, '65': 67, '66': 68, '67': 69, '68': 70, '69': 71, '70': 72, '71': 73, '72': 74, '73': 75, '74': 76, '75': 77, '76': 78, '77': 79, '78': 80, '79': 81, '80': 82, '81': 83, '82': 84, '83': 85, '84': 86, '85': 87, '86': 88, '87': 89, '88': 90, '89': 91, '90': 92, '91': 93, '92': 94, '93': 95, '94': 96, '95': 97, '96': 98, '97': 99, '98': 100, '99': 101, '100': 102, '101': 103, '102': 104, '103': 105, '104': 106, '105': 107, '106': 108, '107': 109, '108': 110, '109': 111, '110': 112, '111': 113, '112': 114, '113': 115, '114': 116, '115': 117, '116': 118, '117': 119, '118': 120, '119': 121, '120': 122, '121': 123, '122': 124, '123': 125, '124': 126, '125': 127, '126': 128, '127': 129, '128': 130, '129': 131, '130': 132, '131': 133, '132': 134, '133': 135, '134': 136, '135': 137, '136': 138, '137': 139, '138': 140, '139': 141, '140': 142, '141': 143, '142': 144, '143': 145, '144': 146, '145': 147, '146': 148, '147': 149, '148': 150, '149': 151, '150': 152, '151': 153, '152': 154, '153': 155, '154': 156, '155': 157, '156': 158, '157': 159, '158': 160, '159': 161, '160': 162, '161': 163, '162': 164, '163': 165, '164': 166, '165': 167, '166': 168, '167': 169, '168': 170, '169': 171, '170': 172, '171': 173, '172': 174, '173': 175, '174': 176, '175': 177, '176': 178, '177': 179, '178': 180, '179': 181, '180': 182, '181': 183, '182': 184, '183': 185, '184': 186, '185': 187, '186': 188, '187': 189, '188': 190, '189': 191, '190': 192, '191': 193, '192': 194, '193': 195, '194': 196, '195': 197, '196': 198, '197': 199, '198': 200, '199': 201}\n", "The struct_rr vocab has 362 tokens: {'PAD': 0, 'MASK': 1, '0': 2, '1': 3, '2': 4, '3': 5, '4': 6, '5': 7, '6': 8, '7': 9, '8': 10, '9': 11, '10': 12, '11': 13, '12': 14, '13': 15, '14': 16, '15': 17, '16': 18, '17': 19, '18': 20, '19': 21, '20': 22, '21': 23, '22': 24, '23': 25, '24': 26, '25': 27, '26': 28, '27': 29, '28': 30, '29': 31, '30': 32, '31': 33, '32': 34, '33': 35, '34': 36, '35': 37, '36': 38, '37': 39, '38': 40, '39': 41, '40': 42, '41': 43, '42': 44, '43': 45, '44': 46, '45': 47, '46': 48, '47': 49, '48': 50, '49': 51, '50': 52, '51': 53, '52': 54, '53': 55, '54': 56, '55': 57, '56': 58, '57': 59, '58': 60, '59': 61, '60': 62, '61': 63, '62': 64, '63': 65, '64': 66, '65': 67, '66': 68, '67': 69, '68': 70, '69': 71, '70': 72, '71': 73, '72': 74, '73': 75, '74': 76, '75': 77, '76': 78, '77': 79, '78': 80, '79': 81, '80': 82, '81': 83, '82': 84, '83': 85, '84': 86, '85': 87, '86': 88, '87': 89, '88': 90, '89': 91, '90': 92, '91': 93, '92': 94, '93': 95, '94': 96, '95': 97, '96': 98, '97': 99, '98': 100, '99': 101, '100': 102, '101': 103, '102': 104, '103': 105, '104': 106, '105': 107, '106': 108, '107': 109, '108': 110, '109': 111, '110': 112, '111': 113, '112': 114, '113': 115, '114': 116, '115': 117, '116': 118, '117': 119, '118': 120, '119': 121, '120': 122, '121': 123, '122': 124, '123': 125, '124': 126, '125': 127, '126': 128, '127': 129, '128': 130, '129': 131, '130': 132, '131': 133, '132': 134, '133': 135, '134': 136, '135': 137, '136': 138, '137': 139, '138': 140, '139': 141, '140': 142, '141': 143, '142': 144, '143': 145, '144': 146, '145': 147, '146': 148, '147': 149, '148': 150, '149': 151, '150': 152, '151': 153, '152': 154, '153': 155, '154': 156, '155': 157, '156': 158, '157': 159, '158': 160, '159': 161, '160': 162, '161': 163, '162': 164, '163': 165, '164': 166, '165': 167, '166': 168, '167': 169, '168': 170, '169': 171, '170': 172, '171': 173, '172': 174, '173': 175, '174': 176, '175': 177, '176': 178, '177': 179, '178': 180, '179': 181, '180': 182, '181': 183, '182': 184, '183': 185, '184': 186, '185': 187, '186': 188, '187': 189, '188': 190, '189': 191, '190': 192, '191': 193, '192': 194, '193': 195, '194': 196, '195': 197, '196': 198, '197': 199, '198': 200, '199': 201, '200': 202, '201': 203, '202': 204, '203': 205, '204': 206, '205': 207, '206': 208, '207': 209, '208': 210, '209': 211, '210': 212, '211': 213, '212': 214, '213': 215, '214': 216, '215': 217, '216': 218, '217': 219, '218': 220, '219': 221, '220': 222, '221': 223, '222': 224, '223': 225, '224': 226, '225': 227, '226': 228, '227': 229, '228': 230, '229': 231, '230': 232, '231': 233, '232': 234, '233': 235, '234': 236, '235': 237, '236': 238, '237': 239, '238': 240, '239': 241, '240': 242, '241': 243, '242': 244, '243': 245, '244': 246, '245': 247, '246': 248, '247': 249, '248': 250, '249': 251, '250': 252, '251': 253, '252': 254, '253': 255, '254': 256, '255': 257, '256': 258, '257': 259, '258': 260, '259': 261, '260': 262, '261': 263, '262': 264, '263': 265, '264': 266, '265': 267, '266': 268, '267': 269, '268': 270, '269': 271, '270': 272, '271': 273, '272': 274, '273': 275, '274': 276, '275': 277, '276': 278, '277': 279, '278': 280, '279': 281, '280': 282, '281': 283, '282': 284, '283': 285, '284': 286, '285': 287, '286': 288, '287': 289, '288': 290, '289': 291, '290': 292, '291': 293, '292': 294, '293': 295, '294': 296, '295': 297, '296': 298, '297': 299, '298': 300, '299': 301, '300': 302, '301': 303, '302': 304, '303': 305, '304': 306, '305': 307, '306': 308, '307': 309, '308': 310, '309': 311, '310': 312, '311': 313, '312': 314, '313': 315, '314': 316, '315': 317, '316': 318, '317': 319, '318': 320, '319': 321, '320': 322, '321': 323, '322': 324, '323': 325, '324': 326, '325': 327, '326': 328, '327': 329, '328': 330, '329': 331, '330': 332, '331': 333, '332': 334, '333': 335, '334': 336, '335': 337, '336': 338, '337': 339, '338': 340, '339': 341, '340': 342, '341': 343, '342': 344, '343': 345, '344': 346, '345': 347, '346': 348, '347': 349, '348': 350, '349': 351, '350': 352, '351': 353, '352': 354, '353': 355, '354': 356, '355': 357, '356': 358, '357': 359, '358': 360, '359': 361}\n", "The struct_rp vocab has 362 tokens: {'PAD': 0, 'MASK': 1, '0': 2, '1': 3, '2': 4, '3': 5, '4': 6, '5': 7, '6': 8, '7': 9, '8': 10, '9': 11, '10': 12, '11': 13, '12': 14, '13': 15, '14': 16, '15': 17, '16': 18, '17': 19, '18': 20, '19': 21, '20': 22, '21': 23, '22': 24, '23': 25, '24': 26, '25': 27, '26': 28, '27': 29, '28': 30, '29': 31, '30': 32, '31': 33, '32': 34, '33': 35, '34': 36, '35': 37, '36': 38, '37': 39, '38': 40, '39': 41, '40': 42, '41': 43, '42': 44, '43': 45, '44': 46, '45': 47, '46': 48, '47': 49, '48': 50, '49': 51, '50': 52, '51': 53, '52': 54, '53': 55, '54': 56, '55': 57, '56': 58, '57': 59, '58': 60, '59': 61, '60': 62, '61': 63, '62': 64, '63': 65, '64': 66, '65': 67, '66': 68, '67': 69, '68': 70, '69': 71, '70': 72, '71': 73, '72': 74, '73': 75, '74': 76, '75': 77, '76': 78, '77': 79, '78': 80, '79': 81, '80': 82, '81': 83, '82': 84, '83': 85, '84': 86, '85': 87, '86': 88, '87': 89, '88': 90, '89': 91, '90': 92, '91': 93, '92': 94, '93': 95, '94': 96, '95': 97, '96': 98, '97': 99, '98': 100, '99': 101, '100': 102, '101': 103, '102': 104, '103': 105, '104': 106, '105': 107, '106': 108, '107': 109, '108': 110, '109': 111, '110': 112, '111': 113, '112': 114, '113': 115, '114': 116, '115': 117, '116': 118, '117': 119, '118': 120, '119': 121, '120': 122, '121': 123, '122': 124, '123': 125, '124': 126, '125': 127, '126': 128, '127': 129, '128': 130, '129': 131, '130': 132, '131': 133, '132': 134, '133': 135, '134': 136, '135': 137, '136': 138, '137': 139, '138': 140, '139': 141, '140': 142, '141': 143, '142': 144, '143': 145, '144': 146, '145': 147, '146': 148, '147': 149, '148': 150, '149': 151, '150': 152, '151': 153, '152': 154, '153': 155, '154': 156, '155': 157, '156': 158, '157': 159, '158': 160, '159': 161, '160': 162, '161': 163, '162': 164, '163': 165, '164': 166, '165': 167, '166': 168, '167': 169, '168': 170, '169': 171, '170': 172, '171': 173, '172': 174, '173': 175, '174': 176, '175': 177, '176': 178, '177': 179, '178': 180, '179': 181, '180': 182, '181': 183, '182': 184, '183': 185, '184': 186, '185': 187, '186': 188, '187': 189, '188': 190, '189': 191, '190': 192, '191': 193, '192': 194, '193': 195, '194': 196, '195': 197, '196': 198, '197': 199, '198': 200, '199': 201, '200': 202, '201': 203, '202': 204, '203': 205, '204': 206, '205': 207, '206': 208, '207': 209, '208': 210, '209': 211, '210': 212, '211': 213, '212': 214, '213': 215, '214': 216, '215': 217, '216': 218, '217': 219, '218': 220, '219': 221, '220': 222, '221': 223, '222': 224, '223': 225, '224': 226, '225': 227, '226': 228, '227': 229, '228': 230, '229': 231, '230': 232, '231': 233, '232': 234, '233': 235, '234': 236, '235': 237, '236': 238, '237': 239, '238': 240, '239': 241, '240': 242, '241': 243, '242': 244, '243': 245, '244': 246, '245': 247, '246': 248, '247': 249, '248': 250, '249': 251, '250': 252, '251': 253, '252': 254, '253': 255, '254': 256, '255': 257, '256': 258, '257': 259, '258': 260, '259': 261, '260': 262, '261': 263, '262': 264, '263': 265, '264': 266, '265': 267, '266': 268, '267': 269, '268': 270, '269': 271, '270': 272, '271': 273, '272': 274, '273': 275, '274': 276, '275': 277, '276': 278, '277': 279, '278': 280, '279': 281, '280': 282, '281': 283, '282': 284, '283': 285, '284': 286, '285': 287, '286': 288, '287': 289, '288': 290, '289': 291, '290': 292, '291': 293, '292': 294, '293': 295, '294': 296, '295': 297, '296': 298, '297': 299, '298': 300, '299': 301, '300': 302, '301': 303, '302': 304, '303': 305, '304': 306, '305': 307, '306': 308, '307': 309, '308': 310, '309': 311, '310': 312, '311': 313, '312': 314, '313': 315, '314': 316, '315': 317, '316': 318, '317': 319, '318': 320, '319': 321, '320': 322, '321': 323, '322': 324, '323': 325, '324': 326, '325': 327, '326': 328, '327': 329, '328': 330, '329': 331, '330': 332, '331': 333, '332': 334, '333': 335, '334': 336, '335': 337, '336': 338, '337': 339, '338': 340, '339': 341, '340': 342, '341': 343, '342': 344, '343': 345, '344': 346, '345': 347, '346': 348, '347': 349, '348': 350, '349': 351, '350': 352, '351': 353, '352': 354, '353': 355, '354': 356, '355': 357, '356': 358, '357': 359, '358': 360, '359': 361}\n", "The struct_ry vocab has 362 tokens: {'PAD': 0, 'MASK': 1, '0': 2, '1': 3, '2': 4, '3': 5, '4': 6, '5': 7, '6': 8, '7': 9, '8': 10, '9': 11, '10': 12, '11': 13, '12': 14, '13': 15, '14': 16, '15': 17, '16': 18, '17': 19, '18': 20, '19': 21, '20': 22, '21': 23, '22': 24, '23': 25, '24': 26, '25': 27, '26': 28, '27': 29, '28': 30, '29': 31, '30': 32, '31': 33, '32': 34, '33': 35, '34': 36, '35': 37, '36': 38, '37': 39, '38': 40, '39': 41, '40': 42, '41': 43, '42': 44, '43': 45, '44': 46, '45': 47, '46': 48, '47': 49, '48': 50, '49': 51, '50': 52, '51': 53, '52': 54, '53': 55, '54': 56, '55': 57, '56': 58, '57': 59, '58': 60, '59': 61, '60': 62, '61': 63, '62': 64, '63': 65, '64': 66, '65': 67, '66': 68, '67': 69, '68': 70, '69': 71, '70': 72, '71': 73, '72': 74, '73': 75, '74': 76, '75': 77, '76': 78, '77': 79, '78': 80, '79': 81, '80': 82, '81': 83, '82': 84, '83': 85, '84': 86, '85': 87, '86': 88, '87': 89, '88': 90, '89': 91, '90': 92, '91': 93, '92': 94, '93': 95, '94': 96, '95': 97, '96': 98, '97': 99, '98': 100, '99': 101, '100': 102, '101': 103, '102': 104, '103': 105, '104': 106, '105': 107, '106': 108, '107': 109, '108': 110, '109': 111, '110': 112, '111': 113, '112': 114, '113': 115, '114': 116, '115': 117, '116': 118, '117': 119, '118': 120, '119': 121, '120': 122, '121': 123, '122': 124, '123': 125, '124': 126, '125': 127, '126': 128, '127': 129, '128': 130, '129': 131, '130': 132, '131': 133, '132': 134, '133': 135, '134': 136, '135': 137, '136': 138, '137': 139, '138': 140, '139': 141, '140': 142, '141': 143, '142': 144, '143': 145, '144': 146, '145': 147, '146': 148, '147': 149, '148': 150, '149': 151, '150': 152, '151': 153, '152': 154, '153': 155, '154': 156, '155': 157, '156': 158, '157': 159, '158': 160, '159': 161, '160': 162, '161': 163, '162': 164, '163': 165, '164': 166, '165': 167, '166': 168, '167': 169, '168': 170, '169': 171, '170': 172, '171': 173, '172': 174, '173': 175, '174': 176, '175': 177, '176': 178, '177': 179, '178': 180, '179': 181, '180': 182, '181': 183, '182': 184, '183': 185, '184': 186, '185': 187, '186': 188, '187': 189, '188': 190, '189': 191, '190': 192, '191': 193, '192': 194, '193': 195, '194': 196, '195': 197, '196': 198, '197': 199, '198': 200, '199': 201, '200': 202, '201': 203, '202': 204, '203': 205, '204': 206, '205': 207, '206': 208, '207': 209, '208': 210, '209': 211, '210': 212, '211': 213, '212': 214, '213': 215, '214': 216, '215': 217, '216': 218, '217': 219, '218': 220, '219': 221, '220': 222, '221': 223, '222': 224, '223': 225, '224': 226, '225': 227, '226': 228, '227': 229, '228': 230, '229': 231, '230': 232, '231': 233, '232': 234, '233': 235, '234': 236, '235': 237, '236': 238, '237': 239, '238': 240, '239': 241, '240': 242, '241': 243, '242': 244, '243': 245, '244': 246, '245': 247, '246': 248, '247': 249, '248': 250, '249': 251, '250': 252, '251': 253, '252': 254, '253': 255, '254': 256, '255': 257, '256': 258, '257': 259, '258': 260, '259': 261, '260': 262, '261': 263, '262': 264, '263': 265, '264': 266, '265': 267, '266': 268, '267': 269, '268': 270, '269': 271, '270': 272, '271': 273, '272': 274, '273': 275, '274': 276, '275': 277, '276': 278, '277': 279, '278': 280, '279': 281, '280': 282, '281': 283, '282': 284, '283': 285, '284': 286, '285': 287, '286': 288, '287': 289, '288': 290, '289': 291, '290': 292, '291': 293, '292': 294, '293': 295, '294': 296, '295': 297, '296': 298, '297': 299, '298': 300, '299': 301, '300': 302, '301': 303, '302': 304, '303': 305, '304': 306, '305': 307, '306': 308, '307': 309, '308': 310, '309': 311, '310': 312, '311': 313, '312': 314, '313': 315, '314': 316, '315': 317, '316': 318, '317': 319, '318': 320, '319': 321, '320': 322, '321': 323, '322': 324, '323': 325, '324': 326, '325': 327, '326': 328, '327': 329, '328': 330, '329': 331, '330': 332, '331': 333, '332': 334, '333': 335, '334': 336, '335': 337, '336': 338, '337': 339, '338': 340, '339': 341, '340': 342, '341': 343, '342': 344, '343': 345, '344': 346, '345': 347, '346': 348, '347': 349, '348': 350, '349': 351, '350': 352, '351': 353, '352': 354, '353': 355, '354': 356, '355': 357, '356': 358, '357': 359, '358': 360, '359': 361}\n", "data dirs: ['/home/weiyu/data_drive/data_new_objects/examples_circle_new_objects/result', '/home/weiyu/data_drive/data_new_objects/examples_line_new_objects/result', '/home/weiyu/data_drive/data_new_objects/examples_tower_new_objects/result', '/home/weiyu/data_drive/data_new_objects/examples_dinner_new_objects/result']\n", "40000 valid sequences\n" ] } ], "source": [ "tokenizer = Tokenizer(\"/home/weiyu/data_drive/data_new_objects/type_vocabs_coarse.json\")\n", "\n", "data_roots = []\n", "index_roots = []\n", "for shape, index in [(\"circle\", \"index_10k\"), (\"line\", \"index_10k\"), (\"tower\", \"index_10k\"), (\"dinner\", \"index_10k\")]:\n", " data_roots.append(\"/home/weiyu/data_drive/data_new_objects/examples_{}_new_objects/result\".format(shape))\n", " index_roots.append(index)\n", "\n", "dataset = SemanticArrangementDataset(data_roots=data_roots, index_roots=index_roots, splits=[\"train\", \"valid\", \"test\"], tokenizer=tokenizer)" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 4, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "\n", "{'place_at_once': 'False', 'position': [0.4530459674902468, 0.2866384076623889, 0.011194709806729462], 'rotation': [5.101818936729106e-05, 1.362746309147995e-06, 2.145504341444197], 'type': 'tower'}\n", "[('tower', 'shape'), (2.145504341444197, 'rotation'), (0.4530459674902468, 'position_x'), (0.2866384076623889, 'position_y')]\n", "tower in the middle left of the table facing west\n", "[('tower', 'shape'), (2.145504341444197, 'rotation'), (0.4530459674902468, 'position_x'), (0.2866384076623889, 'position_y')]\n", "tower in the middle left of the table facing west\n", "(('rotation', 'west'), ('shape', 'tower'), ('x', 'middle'), ('y', 'left'))\n", "\n", "\n", "{'length': 0.15789473684210525, 'length_increment': 0.05, 'max_length': 1.0, 'min_length': 0.0, 'place_at_once': 'True', 'position': [0.5744088910421017, 0.0, 0.0], 'rotation': [0.0, -0.0, 0.0], 'type': 'dinner', 'uniform_space': 'False'}\n", "[('dinner', 'shape'), (0.0, 'rotation'), (0.5744088910421017, 'position_x'), (0.0, 'position_y')]\n", "dinner in the middle center of the table facing south\n", "[('dinner', 'shape'), (0.0, 'rotation'), (0.5744088910421017, 'position_x'), (0.0, 'position_y')]\n", "dinner in the middle center of the table facing south\n", "(('rotation', 'south'), ('shape', 'dinner'), ('x', 'middle'), ('y', 'center'))\n", "\n", "\n", "{'place_at_once': 'False', 'position': [0.5300184865230677, -0.11749143967722209, 0.043775766459831195], 'rotation': [8.311828443210225e-05, 2.8403995850279114e-05, -1.9831750137833084], 'type': 'tower'}\n", "[('tower', 'shape'), (-1.9831750137833084, 'rotation'), (0.5300184865230677, 'position_x'), (-0.11749143967722209, 'position_y')]\n", "tower in the middle center of the table facing north\n", "[('tower', 'shape')]\n", "tower\n", "(('shape', 'tower'),)\n", "\n", "\n", "{'length': 0.3157894736842105, 'length_increment': 0.05, 'max_length': 1.0, 'min_length': 0.0, 'place_at_once': 'True', 'position': [0.6482385523146229, 0.0, 0.0], 'rotation': [0.0, -0.0, 0.0], 'type': 'dinner', 'uniform_space': 'False'}\n", "[('dinner', 'shape'), (0.0, 'rotation'), (0.6482385523146229, 'position_x'), (0.0, 'position_y')]\n", "dinner in the top center of the table facing south\n", "[('dinner', 'shape')]\n", "dinner\n", "(('shape', 'dinner'),)\n", "\n", "\n", "{'angle_ratio': 1.0, 'face_center': 'True', 'max_radius': 0.5, 'min_radius': 0.050687861718942046, 'place_at_once': 'True', 'position': [0.2998438437491998, -0.03599718247376027, 0.0], 'radius': 0.0966402394976866, 'radius_increment': 0.005, 'rotation': [0.0, -0.0, 2.053106459668934], 'type': 'circle', 'uniform_angle': 'True'}\n", "[('circle', 'shape'), (2.053106459668934, 'rotation'), (0.2998438437491998, 'position_x'), (-0.03599718247376027, 'position_y'), (0.0966402394976866, 'radius')]\n", "small circle in the middle center of the table facing west\n", "[('circle', 'shape'), (2.053106459668934, 'rotation'), (0.2998438437491998, 'position_x'), (-0.03599718247376027, 'position_y'), (0.0966402394976866, 'radius')]\n", "small circle in the middle center of the table facing west\n", "(('rotation', 'west'), ('shape', 'circle'), ('size', 'small'), ('x', 'middle'), ('y', 'center'))\n", "\n", "\n", "{'length': 0.4245597103515504, 'length_increment': 0.005, 'max_length': 1.0, 'min_length': 0.21760311495166934, 'place_at_once': 'True', 'position': [0.6672547106460816, 0.0, 0.0], 'rotation': [0.0, -0.0, 0.0], 'type': 'line', 'uniform_space': 'True'}\n", "[('line', 'shape'), (0.0, 'rotation'), (0.6672547106460816, 'position_x'), (0.0, 'position_y'), (0.2122798551757752, 'radius')]\n", "medium line in the top center of the table facing south\n", "[('line', 'shape'), (0.0, 'rotation'), (0.6672547106460816, 'position_x'), (0.2122798551757752, 'radius')]\n", "medium line in the top facing south\n", "(('rotation', 'south'), ('shape', 'line'), ('size', 'medium'), ('x', 'top'))\n", "\n", "\n", "{'place_at_once': 'False', 'position': [0.6555576184899171, 0.22241488561049588, 0.006522659915853506], 'rotation': [-0.000139418832574769, -7.243860660016997e-05, 2.2437880740062814], 'type': 'tower'}\n", "[('tower', 'shape'), (2.2437880740062814, 'rotation'), (0.6555576184899171, 'position_x'), (0.22241488561049588, 'position_y')]\n", "tower in the top left of the table facing west\n", "[(2.2437880740062814, 'rotation'), (0.6555576184899171, 'position_x')]\n", "in the top facing west\n", "(('rotation', 'west'), ('x', 'top'))\n", "\n", "\n", "{'length': 0.4925060249864075, 'length_increment': 0.005, 'max_length': 1.0, 'min_length': 0.4925060249864075, 'place_at_once': 'True', 'position': [0.7754676784901477, 0.0, 0.0], 'rotation': [0.0, -0.0, 0.0], 'type': 'line', 'uniform_space': 'False'}\n", "[('line', 'shape'), (0.0, 'rotation'), (0.7754676784901477, 'position_x'), (0.0, 'position_y'), (0.24625301249320375, 'radius')]\n", "medium line in the top center of the table facing south\n", "[(0.0, 'rotation'), (0.7754676784901477, 'position_x')]\n", "in the top facing south\n", "(('rotation', 'south'), ('x', 'top'))\n", "\n", "\n", "{'angle_ratio': 1.0, 'face_center': 'True', 'max_radius': 0.5, 'min_radius': 0.2260219063147572, 'place_at_once': 'True', 'position': [0.6256453430245876, 0.1131426073908803, 0.0], 'radius': 0.2260219063147572, 'radius_increment': 0.005, 'rotation': [0.0, -0.0, 1.6063513593439724], 'type': 'circle', 'uniform_angle': 'True'}\n", "[('circle', 'shape'), (1.6063513593439724, 'rotation'), (0.6256453430245876, 'position_x'), (0.1131426073908803, 'position_y'), (0.2260219063147572, 'radius')]\n", "medium circle in the middle center of the table facing west\n", "[(1.6063513593439724, 'rotation'), (0.6256453430245876, 'position_x')]\n", "in the middle facing west\n", "(('rotation', 'west'), ('x', 'middle'))\n", "\n", "\n", "{'angle_ratio': 1.0, 'face_center': 'True', 'max_radius': 0.5, 'min_radius': 0.14976631196286583, 'place_at_once': 'True', 'position': [0.5157008668336853, 0.11005531020590054, 0.0], 'radius': 0.15991801306539147, 'radius_increment': 0.005, 'rotation': [0.0, -0.0, -2.2145659262893918], 'type': 'circle', 'uniform_angle': 'True'}\n", "[('circle', 'shape'), (-2.2145659262893918, 'rotation'), (0.5157008668336853, 'position_x'), (0.11005531020590054, 'position_y'), (0.15991801306539147, 'radius')]\n", "small circle in the middle center of the table facing north\n", "[('circle', 'shape'), (0.5157008668336853, 'position_x'), (0.15991801306539147, 'radius')]\n", "small circle in the middle\n", "(('shape', 'circle'), ('size', 'small'), ('x', 'middle'))\n" ] } ], "source": [ "idxs = np.random.permutation(len(dataset))\n", "for i in idxs[:10]:\n", " print(\"\\n\")\n", " struct_spec = dataset.get_raw_data(i)\n", " print(struct_spec)\n", " struct_word_spec = tokenizer.convert_structure_params_to_natural_language(struct_spec)\n", " print(struct_word_spec)\n", "\n", " token_idxs = np.random.permutation(len(struct_spec))\n", " token_idxs = token_idxs[:np.random.randint(1, len(struct_spec) + 1)]\n", " token_idxs = sorted(token_idxs)\n", " incomplete_struct_spec = [struct_spec[ti] for ti in token_idxs]\n", "\n", " print(incomplete_struct_spec)\n", " print(tokenizer.convert_structure_params_to_natural_language(incomplete_struct_spec))\n", "\n", " type_value_tuple = tokenizer.convert_structure_params_to_type_value_tuple(incomplete_struct_spec)\n", " print(type_value_tuple)" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 49, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 40000/40000 [00:23<00:00, 1699.94it/s]" ] }, { "name": "stdout", "output_type": "stream", "text": [ "669\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "unique_type_value_tuples = set()\n", "for i in tqdm.tqdm(idxs):\n", " struct_spec = dataset.get_raw_data(i)\n", "\n", " incomplete_struct_specs = []\n", " for L in range(1, len(struct_spec) + 1):\n", " for subset in itertools.combinations(struct_spec, L):\n", " incomplete_struct_specs.append(subset)\n", "\n", " # print(incomplete_struct_specs)\n", "\n", " type_value_tuples = []\n", " for incomplete_struct_spec in incomplete_struct_specs:\n", " type_value_tuples.append(tokenizer.convert_structure_params_to_type_value_tuple(incomplete_struct_spec))\n", "\n", " unique_type_value_tuples.update(type_value_tuples)\n", "\n", "print(len(unique_type_value_tuples))" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": null, "outputs": [], "source": [ "sentence_template = [\n", " \"Put the objects {in a [size][shape]} on the {[x][y] of} the table {facing [rotation]}.\",\n", " \"Build a [size][shape] of the [objects] on the [x][y] of the table facing [rotation].\",\n", " \"Put the [objects] on the [x][y] of the table and make a [shape] facing [rotation].\",\n", " \"Rearrange the [objects] into a [shape], and put the structure on the [x][y] of the table facing [rotation].\",\n", " \"Could you ...\",\n", " \"Please ...\",\n", " \"Pick up the objects, put them into a [size][shape], place the [shape] on the [x][y] of table, make sure the [shape] is facing [rotation].\"]\n", "\n" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "markdown", "source": [ "Enumerate all possible combinations of types" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%% md\n" } } }, { "cell_type": "code", "execution_count": 31, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "31\n", "[('size',), ('shape',), ('x',), ('y',), ('rotation',), ('shape', 'size'), ('size', 'x'), ('size', 'y'), ('rotation', 'size'), ('shape', 'x'), ('shape', 'y'), ('rotation', 'shape'), ('x', 'y'), ('rotation', 'x'), ('rotation', 'y'), ('shape', 'size', 'x'), ('shape', 'size', 'y'), ('rotation', 'shape', 'size'), ('size', 'x', 'y'), ('rotation', 'size', 'x'), ('rotation', 'size', 'y'), ('shape', 'x', 'y'), ('rotation', 'shape', 'x'), ('rotation', 'shape', 'y'), ('rotation', 'x', 'y'), ('shape', 'size', 'x', 'y'), ('rotation', 'shape', 'size', 'x'), ('rotation', 'shape', 'size', 'y'), ('rotation', 'size', 'x', 'y'), ('rotation', 'shape', 'x', 'y'), ('rotation', 'shape', 'size', 'x', 'y')]\n" ] } ], "source": [ "import itertools\n", "types = [\"size\", \"shape\", \"x\", \"y\", \"rotation\"]\n", "\n", "type_combs = []\n", "for L in range(1, len(types) + 1):\n", " for subset in itertools.combinations(types, L):\n", " type_combs.append(tuple(sorted(subset)))\n", "\n", "print(len(type_combs))\n", "print(type_combs)" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 46, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "build a [size] shape from the objects ('size',)\n", "put the objects in to a [size] shape ('size',)\n", "place the objects as a [size] shape ('size',)\n", "make a [size] shape from the objects ('size',)\n", "rearrange the objects into a [size] structure ('size',)\n", "build a [shape] ('shape',)\n", "make a [shape] ('shape',)\n", "put the objects into a [shape] ('shape',)\n", "place the objects as a [shape] ('shape',)\n", "pick up the objects, and place them as a [shape] ('shape',)\n", "place the objects on the [x] of the table ('x',)\n", "put the objects on [x] ('x',)\n", "make a structure from the objects and place it on [x] ('x',)\n", "on the [x] of the table, place the objects ('x',)\n", "move the objects to the [x] ('x',)\n", "place the objects on the [y] of the table ('y',)\n", "put the objects on [y] ('y',)\n", "make a structure from the objects and place it on [y] ('y',)\n", "on the [y] of the table, place the objects ('y',)\n", "move the objects to the [y] ('y',)\n", "build a structure facing [rotation] ('rotation',)\n", "make a structure from the objects and make sure it is pointing [rotation] ('rotation',)\n", "put the objects in a structure that faces [rotation] ('rotation',)\n", "rotate the object structure so that it points [rotation] ('rotation',)\n", "[rotation] is the direction the structure built from the objects should be facing ('rotation',)\n", "build a [size] [shape] ('shape', 'size')\n", "make a [size] [shape] ('shape', 'size')\n", "put the objects into a [size] [shape] ('shape', 'size')\n", "place the objects as a [size] [shape] ('shape', 'size')\n", "pick up the objects, and place them as a [size] [shape] ('shape', 'size')\n", "build a [size] shape from the objects on the [x] of the table ('size', 'x')\n", "put the objects in to a [size] shape and place it on [x] ('size', 'x')\n", "on the [x] of the table, place the objects as a [size] shape ('size', 'x')\n", "make a [size] shape from the objects and move it to [x] ('size', 'x')\n", "rearrange the objects into a [size] structure on [x] ('size', 'x')\n", "build a [size] shape from the objects on the [y] of the table ('size', 'y')\n", "put the objects in to a [size] shape and place it on [y] ('size', 'y')\n", "on the [y] of the table, place the objects as a [size] shape ('size', 'y')\n", "make a [size] shape from the objects and move it to [y] ('size', 'y')\n", "rearrange the objects into a [size] structure on [y] ('size', 'y')\n", "build a [size] shape from the objects facing [rotation] ('rotation', 'size')\n", "put the objects in to a [size] shape and place it so that it faces [rotation] ('rotation', 'size')\n", "place the objects as a [size] shape and [rotation] is the direction the shape built from the objects should be facing ('rotation', 'size')\n", "make a [size] structure from the objects and rotate the object structure so that it points [rotation] ('rotation', 'size')\n", "rearrange the objects into a [size] structure that points to [rotation] ('rotation', 'size')\n", "build a [shape] from the objects on the [x] of the table ('shape', 'x')\n", "put the objects in to a [shape] and place it on [x] ('shape', 'x')\n", "on the [x] of the table, place the objects as a [shape] ('shape', 'x')\n", "make a [shape] from the objects and move it to [x] ('shape', 'x')\n", "rearrange the objects into a [shape] on [x] ('shape', 'x')\n", "build a [shape] from the objects on the [y] of the table ('shape', 'y')\n", "put the objects in to a [shape] and place it on [y] ('shape', 'y')\n", "on the [y] of the table, place the objects as a [shape] ('shape', 'y')\n", "make a [shape] from the objects and move it to [y] ('shape', 'y')\n", "rearrange the objects into a [shape] on [y] ('shape', 'y')\n", "build a [shape] from the objects facing [rotation] ('rotation', 'shape')\n", "put the objects in to a [shape] and place it so that it faces [rotation] ('rotation', 'shape')\n", "place the objects as a [shape] and [rotation] is the direction the shape built from the objects should be facing ('rotation', 'shape')\n", "make a [shape] from the objects and rotate the shape so that it points [rotation] ('rotation', 'shape')\n", "rearrange the objects into a [shape] that points to [rotation] ('rotation', 'shape')\n", "place the objects on the [x] and [y] of the table ('x', 'y')\n", "put the objects on [x] [y] of the table ('x', 'y')\n", "make a structure from the objects and place it on [x] [y] ('x', 'y')\n", "on the [x] [y] of the table, place the objects ('x', 'y')\n", "move the objects to the [x] [y] ('x', 'y')\n", "build a structure on the [x] of the table facing [rotation] ('rotation', 'x')\n", "make a structure from the objects and make sure it is pointing [rotation] and on [x] ('rotation', 'x')\n", "rearrange the objects in a structure that faces [rotation] and place it on [x] ('rotation', 'x')\n", "move and rotate the object structure so that it is on [x] and points [rotation] ('rotation', 'x')\n", "[rotation] is the direction the structure built from the objects should be facing, [x] is the location ('rotation', 'x')\n", "build a structure on the [y] of the table facing [rotation] ('rotation', 'y')\n", "make a structure from the objects and make sure it is pointing [rotation] and on [y] ('rotation', 'y')\n", "rearrange the objects in a structure that faces [rotation] and place it on [y] ('rotation', 'y')\n", "move and rotate the object structure so that it is on [y] and points [rotation] ('rotation', 'y')\n", "[rotation] is the direction the structure built from the objects should be facing, [y] is the location ('rotation', 'y')\n", "build a [size] [shape] from the objects on the [x] of the table ('shape', 'size', 'x')\n", "put the objects in to a [size] [shape] and place it on [x] ('shape', 'size', 'x')\n", "on the [x] of the table, place the objects as a [shape], make the shape [size] ('shape', 'size', 'x')\n", "make a [size] [shape] from the objects and move it to [x] ('shape', 'size', 'x')\n", "rearrange the objects into a [size] [shape] on [x] ('shape', 'size', 'x')\n", "build a [size] [shape] from the objects on the [y] of the table ('shape', 'size', 'y')\n", "put the objects in to a [size] [shape] and place it on [y] ('shape', 'size', 'y')\n", "on the [y] of the table, place the objects as a [shape], make the shape [size] ('shape', 'size', 'y')\n", "make a [size] [shape] from the objects and move it to [y] ('shape', 'size', 'y')\n", "rearrange the objects into a [size] [shape] on [y] ('shape', 'size', 'y')\n", "build a [size] [shape] from the objects facing [rotation] ('rotation', 'shape', 'size')\n", "put the objects in to a [size] [shape] and place it so that it faces [rotation] ('rotation', 'shape', 'size')\n", "place the objects as a [size] [shape] and [rotation] is the direction the shape built from the objects should be facing ('rotation', 'shape', 'size')\n", "make a [size] [shape] from the objects and rotate the shape so that it points [rotation] ('rotation', 'shape', 'size')\n", "rearrange the objects into a [size] [shape] that points to [rotation] ('rotation', 'shape', 'size')\n", "build a [size] shape from the objects on the [x] [y] of the table ('size', 'x', 'y')\n", "put the objects in to a [size] shape and place it on [x] and [y] ('size', 'x', 'y')\n", "on the [x] [y] of the table, place the objects as a [size] shape ('size', 'x', 'y')\n", "make a [size] shape from the objects and move it to [x] [y] ('size', 'x', 'y')\n", "rearrange the objects into a [size] structure on [x] and on [y] ('size', 'x', 'y')\n", "build a [size] structure on the [x] of the table facing [rotation] ('rotation', 'size', 'x')\n", "make a [size] structure from the objects and make sure it is pointing [rotation] and on [x] ('rotation', 'size', 'x')\n", "rearrange the objects in a [size] structure that faces [rotation] and place it on [x] ('rotation', 'size', 'x')\n", "move and rotate the [size] object structure so that it is on [x] and points [rotation] ('rotation', 'size', 'x')\n", "[rotation] is the direction the [size] structure built from the objects should be facing, [x] is the location ('rotation', 'size', 'x')\n", "build a [size] structure on the [y] of the table facing [rotation] ('rotation', 'size', 'y')\n", "make a [size] structure from the objects and make sure it is pointing [rotation] and on [y] ('rotation', 'size', 'y')\n", "rearrange the objects in a [size] structure that faces [rotation] and place it on [y] ('rotation', 'size', 'y')\n", "move and rotate the [size] object structure so that it is on [y] and points [rotation] ('rotation', 'size', 'y')\n", "[rotation] is the direction the [size] structure built from the objects should be facing, [y] is the location ('rotation', 'size', 'y')\n", "build a [shape] from the objects on the [x] [y] of the table ('shape', 'x', 'y')\n", "put the objects in to a [shape] and place it on [x] and [y] ('shape', 'x', 'y')\n", "on the [x] [y] of the table, place the objects as a [shape] ('shape', 'x', 'y')\n", "make a [shape] from the objects and move it to [x] [y] ('shape', 'x', 'y')\n", "rearrange the objects into a [shape] on [x] and on [y] ('shape', 'x', 'y')\n", "build a [shape] on the [x] of the table facing [rotation] ('rotation', 'shape', 'x')\n", "make a [shape] from the objects and make sure it is pointing [rotation] and on [x] ('rotation', 'shape', 'x')\n", "rearrange the objects in a [shape] that faces [rotation] and place it on [x] ('rotation', 'shape', 'x')\n", "move and rotate the [shape] so that it is on [x] and points [rotation] ('rotation', 'shape', 'x')\n", "[rotation] is the direction the [shape] built from the objects should be facing, [x] is the location ('rotation', 'shape', 'x')\n", "build a [shape] on the [y] of the table facing [rotation] ('rotation', 'shape', 'y')\n", "make a [shape] from the objects and make sure it is pointing [rotation] and on [y] ('rotation', 'shape', 'y')\n", "rearrange the objects in a [shape] that faces [rotation] and place it on [y] ('rotation', 'shape', 'y')\n", "move and rotate the [shape] so that it is on [y] and points [rotation] ('rotation', 'shape', 'y')\n", "[rotation] is the direction the [shape] built from the objects should be facing, [y] is the location ('rotation', 'shape', 'y')\n", "build a structure on the [x] [y] of the table facing [rotation] ('rotation', 'x', 'y')\n", "make a structure from the objects and make sure it is pointing [rotation] and on [x] [y] ('rotation', 'x', 'y')\n", "rearrange the objects in a structure that faces [rotation] and place it on [x] [y] ('rotation', 'x', 'y')\n", "move and rotate the object structure so that it is on [x] [y] and points [rotation] ('rotation', 'x', 'y')\n", "[rotation] is the direction the structure built from the objects should be facing, [x] [y] is the location ('rotation', 'x', 'y')\n", "build a [shape] from the objects on the [x] [y] of the table, make the [shape] [size] ('shape', 'size', 'x', 'y')\n", "put the objects in to a [size] [shape] and place it on [x] and [y] ('shape', 'size', 'x', 'y')\n", "on the [x] [y] of the table, place the objects as a [size] [shape] ('shape', 'size', 'x', 'y')\n", "make a [size] [shape] from the objects and move it to [x] [y] ('shape', 'size', 'x', 'y')\n", "rearrange the objects into a [size] [shape] on [x] and on [y] ('shape', 'size', 'x', 'y')\n", "build a [size] [shape] on the [x] of the table facing [rotation] ('rotation', 'shape', 'size', 'x')\n", "make a [size] [shape] from the objects and make sure it is pointing [rotation] and on [x] ('rotation', 'shape', 'size', 'x')\n", "rearrange the objects in a [size] [shape] that faces [rotation] and place it on [x] ('rotation', 'shape', 'size', 'x')\n", "move and rotate the [size] [shape] so that it is on [x] and points [rotation] ('rotation', 'shape', 'size', 'x')\n", "[rotation] is the direction the [size] [shape] built from the objects should be facing, [x] is the location ('rotation', 'shape', 'size', 'x')\n", "build a [size] [shape] on the [y] of the table facing [rotation] ('rotation', 'shape', 'size', 'y')\n", "make a [size] [shape] from the objects and make sure it is pointing [rotation] and on [y] ('rotation', 'shape', 'size', 'y')\n", "rearrange the objects in a [size] [shape] that faces [rotation] and place it on [y] ('rotation', 'shape', 'size', 'y')\n", "move and rotate the [size] [shape] so that it is on [y] and points [rotation] ('rotation', 'shape', 'size', 'y')\n", "[rotation] is the direction the [size] [shape] built from the objects should be facing, [y] is the location ('rotation', 'shape', 'size', 'y')\n", "build a [size] structure on the [x] [y] of the table facing [rotation] ('rotation', 'size', 'x', 'y')\n", "make a [size] structure from the objects and make sure it is pointing [rotation] and on [x] [y] ('rotation', 'size', 'x', 'y')\n", "rearrange the objects in a [size] structure that faces [rotation] and place it on [x] [y] ('rotation', 'size', 'x', 'y')\n", "move and rotate the [size] object structure so that it is on [x] [y] and points [rotation] ('rotation', 'size', 'x', 'y')\n", "[rotation] is the direction the [size] structure built from the objects should be facing, [x] [y] is the location ('rotation', 'size', 'x', 'y')\n", "build a [shape] on the [x] [y] of the table facing [rotation] ('rotation', 'shape', 'x', 'y')\n", "make a [shape] from the objects and make sure it is pointing [rotation] and on [x] [y] ('rotation', 'shape', 'x', 'y')\n", "rearrange the objects as a [shape] that faces [rotation] and place it on [x] [y] ('rotation', 'shape', 'x', 'y')\n", "move and rotate the [shape] so that it is on [x] [y] and points [rotation] ('rotation', 'shape', 'x', 'y')\n", "[rotation] is the direction the [shape] built from the objects should be facing, [x] [y] is the location ('rotation', 'shape', 'x', 'y')\n", "build a [size] [shape] on the [x] [y] of the table facing [rotation] ('rotation', 'shape', 'size', 'x', 'y')\n", "make a [size] [shape] from the objects and make sure it is pointing [rotation] and on [x] [y] ('rotation', 'shape', 'size', 'x', 'y')\n", "rearrange the objects as a [size] [shape] that faces [rotation] and place it on [x] [y] ('rotation', 'shape', 'size', 'x', 'y')\n", "move and rotate the [size] [shape] so that it is on [x] [y] and points [rotation] ('rotation', 'shape', 'size', 'x', 'y')\n", "[rotation] is the direction the [size] [shape] built from the objects should be facing, [x] [y] is the location ('rotation', 'shape', 'size', 'x', 'y')\n" ] } ], "source": [ "sentence_template_file = \"/home/weiyu/Research/intern/StructDiffuser/src/StructDiffuser/language/sentence_template.txt\"\n", "\n", "import re\n", "\n", "type_comb_to_templates = {}\n", "for type_comb in type_combs:\n", " type_comb_to_templates[type_comb] = []\n", "\n", "with open(sentence_template_file, \"r\") as fh:\n", " for line in fh:\n", " line = line.strip()\n", " if line:\n", " if line[0] == \"#\":\n", " continue\n", " type_list = re.findall('\\[[^\\]]*\\]', line)\n", " type_comb = tuple(sorted(list(set([t[1:-1] for t in type_list]))))\n", " print(line, type_comb)\n", "\n", " type_comb_to_templates[type_comb].append(line)" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 47, "outputs": [], "source": [ "for type_comb in type_comb_to_templates:\n", " if len(type_comb_to_templates[type_comb]) != 5:\n", " print(\"{} does not have 5 templates\".format(type_comb))" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 58, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "100%|██████████| 669/669 [00:00<00:00, 60546.98it/s]\n" ] } ], "source": [ "template_sentences = []\n", "type_value_tuple_to_template_sentences = defaultdict(set)\n", "for type_value_tuple in tqdm.tqdm(list(unique_type_value_tuples)):\n", " type_comb = tuple(sorted([tv[0] for tv in type_value_tuple]))\n", " template_sentences = copy.deepcopy(type_comb_to_templates[type_comb])\n", "\n", " # print(type_value_tuple)\n", " for template_sentence in template_sentences:\n", " for t, v in type_value_tuple:\n", " template_sentence = template_sentence.replace(\"[{}]\".format(t), v)\n", " # print(template_sentence)\n", "\n", " type_value_tuple_to_template_sentences[type_value_tuple].add(template_sentence)\n", "\n", "# convert to list\n", "for type_value_tuple in type_value_tuple_to_template_sentences:\n", " type_value_tuple_to_template_sentences[type_value_tuple] = list(type_value_tuple_to_template_sentences[type_value_tuple])" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 73, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3345 unique template sentences\n" ] } ], "source": [ "unique_template_sentences = set()\n", "\n", "for type_value_tuple in type_value_tuple_to_template_sentences:\n", " # print(\"\\n\")\n", " # print(type_value_tuple)\n", " for template_sentence in type_value_tuple_to_template_sentences[type_value_tuple]:\n", " # print(template_sentence)\n", " unique_template_sentences.add(template_sentence)\n", "\n", "unique_template_sentences = list(unique_template_sentences)\n", "print(\"{} unique template sentences\".format(len(unique_template_sentences)))" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 72, "outputs": [], "source": [ "from sentence_transformers import SentenceTransformer\n", "model = SentenceTransformer('all-MiniLM-L6-v2')" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 76, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(3345, 384)\n" ] } ], "source": [ "#Our sentences we like to encode\n", "# sentences = ['This framework generates embeddings for each input sentence',\n", "# 'Sentences are passed as a list of string.',\n", "# 'The quick brown fox jumps over the lazy dog.']\n", "#Sentences are encoded by calling model.encode()\n", "\n", "\n", "embeddings = model.encode(unique_template_sentences)\n", "print(embeddings.shape)" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 80, "outputs": [], "source": [ "template_sentence_to_embedding = {}\n", "for embedding, template_sentence in zip(embeddings, unique_template_sentences):\n", " template_sentence_to_embedding[template_sentence] = embedding" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } }, { "cell_type": "code", "execution_count": 82, "outputs": [], "source": [ "import pickle\n", "template_sentence_data = {\"template_sentence_to_embedding\": template_sentence_to_embedding,\n", " \"type_value_tuple_to_template_sentences\": type_value_tuple_to_template_sentences}\n", "with open(\"/home/weiyu/Research/intern/StructDiffuser/src/StructDiffuser/language/template_sentence_data.pkl\", \"wb\") as fh:\n", " pickle.dump(template_sentence_data, fh)" ], "metadata": { "collapsed": false, "pycharm": { "name": "#%%\n" } } } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 0 }