from gym_minigrid.minigrid import COLOR_TO_IDX, OBJECT_TO_IDX def generate_text_obs(obs, info): text_observation = obs_to_text(info) llm_prompt = "Obs : " llm_prompt += "".join(text_observation) # add utterances if obs["utterance_history"] != "Conversation: \n": utt_hist = obs['utterance_history'] utt_hist = utt_hist.replace("Conversation: \n","") llm_prompt += utt_hist return llm_prompt def obs_to_text(info): image, vis_mask = info["image"], info["vis_mask"] carrying = info["carrying"] agent_pos_vx, agent_pos_vy = info["agent_pos_vx"], info["agent_pos_vy"] npc_actions_dict = info["npc_actions_dict"] # (OBJECT_TO_IDX[self.type], COLOR_TO_IDX[self.color], state) # State, 0: open, 1: closed, 2: locked IDX_TO_COLOR = dict(zip(COLOR_TO_IDX.values(), COLOR_TO_IDX.keys())) IDX_TO_OBJECT = dict(zip(OBJECT_TO_IDX.values(), OBJECT_TO_IDX.keys())) list_textual_descriptions = [] if carrying is not None: list_textual_descriptions.append("You carry a {} {}".format(carrying.color, carrying.type)) # agent_pos_vx, agent_pos_vy = self.get_view_coords(self.agent_pos[0], self.agent_pos[1]) view_field_dictionary = dict() for i in range(image.shape[0]): for j in range(image.shape[1]): if image[i][j][0] != 0 and image[i][j][0] != 1 and image[i][j][0] != 2: if i not in view_field_dictionary.keys(): view_field_dictionary[i] = dict() view_field_dictionary[i][j] = image[i][j] else: view_field_dictionary[i][j] = image[i][j] # Find the wall if any # We describe a wall only if there is no objects between the agent and the wall in straight line # Find wall in front add_wall_descr = False if add_wall_descr: j = agent_pos_vy - 1 object_seen = False while j >= 0 and not object_seen: if image[agent_pos_vx][j][0] != 0 and image[agent_pos_vx][j][0] != 1: if image[agent_pos_vx][j][0] == 2: list_textual_descriptions.append( f"A wall is {agent_pos_vy - j} steps in front of you. \n") # forward object_seen = True else: object_seen = True j -= 1 # Find wall left i = agent_pos_vx - 1 object_seen = False while i >= 0 and not object_seen: if image[i][agent_pos_vy][0] != 0 and image[i][agent_pos_vy][0] != 1: if image[i][agent_pos_vy][0] == 2: list_textual_descriptions.append( f"A wall is {agent_pos_vx - i} steps to the left. \n") # left object_seen = True else: object_seen = True i -= 1 # Find wall right i = agent_pos_vx + 1 object_seen = False while i < image.shape[0] and not object_seen: if image[i][agent_pos_vy][0] != 0 and image[i][agent_pos_vy][0] != 1: if image[i][agent_pos_vy][0] == 2: list_textual_descriptions.append( f"A wall is {i - agent_pos_vx} steps to the right. \n") # right object_seen = True else: object_seen = True i += 1 # list_textual_descriptions.append("You see the following objects: ") # returns the position of seen objects relative to you for i in view_field_dictionary.keys(): for j in view_field_dictionary[i].keys(): if i != agent_pos_vx or j != agent_pos_vy: object = view_field_dictionary[i][j] # # don't show npc # if IDX_TO_OBJECT[object[0]] == "npc": # continue front_dist = agent_pos_vy - j left_right_dist = i - agent_pos_vx loc_descr = "" if front_dist == 1 and left_right_dist == 0: loc_descr += "Right in front of you " elif left_right_dist == 1 and front_dist == 0: loc_descr += "Just to the right of you" elif left_right_dist == -1 and front_dist == 0: loc_descr += "Just to the left of you" else: front_str = str(front_dist) + " steps in front of you " if front_dist > 0 else "" loc_descr += front_str suff = "s" if abs(left_right_dist) > 0 else "" and_ = "and" if loc_descr != "" else "" if left_right_dist < 0: left_right_str = f"{and_} {-left_right_dist} step{suff} to the left" loc_descr += left_right_str elif left_right_dist > 0: left_right_str = f"{and_} {left_right_dist} step{suff} to the right" loc_descr += left_right_str else: left_right_str = "" loc_descr += left_right_str loc_descr += f" there is a " obj_type = IDX_TO_OBJECT[object[0]] if obj_type == "npc": IDX_TO_STATE = {0: 'friendly', 1: 'antagonistic'} description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} peer. " # gaze gaze_dir = { 0: "towards you", 1: "to the left of you", 2: "in the same direction as you", 3: "to the right of you", } description += f"It is looking {gaze_dir[object[3]]}. " # point point_dir = { 0: "towards you", 1: "to the left of you", 2: "in the same direction as you", 3: "to the right of you", } if object[4] != 255: description += f"It is pointing {point_dir[object[4]]}. " # last action last_action = {v: k for k, v in npc_actions_dict.items()}[object[5]] last_action = { "go_forward": "foward", "rotate_left": "turn left", "rotate_right": "turn right", "toggle_action": "toggle", "point_stop_point": "stop pointing", "point_E": "", "point_S": "", "point_W": "", "point_N": "", "stop_point": "stop pointing", "no_op": "" }[last_action] if last_action not in ["no_op", ""]: description += f"It's last action is {last_action}. " elif obj_type in ["switch", "apple", "generatorplatform", "marble", "marbletee", "fence"]: # todo: this assumes that Switch.no_light == True description = f"{IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} " assert object[2:].mean() == 0 elif obj_type == "lockablebox": IDX_TO_STATE = {0: 'open', 1: 'closed', 2: 'locked'} description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} " assert object[3:].mean() == 0 elif obj_type == "applegenerator": IDX_TO_STATE = {1: 'square', 2: 'round'} description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} " assert object[3:].mean() == 0 elif obj_type == "remotedoor": IDX_TO_STATE = {0: 'open', 1: 'closed'} description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} " assert object[3:].mean() == 0 elif obj_type == "door": IDX_TO_STATE = {0: 'open', 1: 'closed', 2: 'locked'} description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} " assert object[3:].mean() == 0 elif obj_type == "lever": IDX_TO_STATE = {1: 'activated', 0: 'unactivated'} if object[3] == 255: countdown_txt = "" else: countdown_txt = f"with {object[3]} timesteps left. " description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} {countdown_txt}" assert object[4:].mean() == 0 else: raise ValueError(f"Undefined object type {obj_type}") full_destr = loc_descr + description + "\n" list_textual_descriptions.append(full_destr) if len(list_textual_descriptions) == 0: list_textual_descriptions.append("\n") return list_textual_descriptions