grg commited on
Commit
11bd154
β€’
1 Parent(s): 67eafed

Moving LLM obs to text in textworld utils, bugfixes.

Browse files
gym-minigrid/gym_minigrid/minigrid.py CHANGED
@@ -3443,7 +3443,7 @@ class MultiModalMiniGridEnv(MiniGridEnv):
3443
 
3444
  obs = super().render(*args, **kwargs)
3445
 
3446
- if args[0] == 'human':
3447
  # draw text to the side of the image
3448
  self.window.clear_text() # erase previous text
3449
  if show_dialogue:
 
3443
 
3444
  obs = super().render(*args, **kwargs)
3445
 
3446
+ if args and args[0] == 'human':
3447
  # draw text to the side of the image
3448
  self.window.clear_text() # erase previous text
3449
  if show_dialogue:
gym-minigrid/gym_minigrid/social_ai_envs/informationseekingenv.py CHANGED
@@ -1249,7 +1249,7 @@ class InformationSeekingEnv(MultiModalMiniGridEnv):
1249
 
1250
  def render(self, *args, **kwargs):
1251
  obs = super().render(*args, **kwargs)
1252
- if args[0] == 'human':
1253
  self.window.clear_text() # erase previous text
1254
  self.window.set_caption(self.full_conversation)
1255
 
 
1249
 
1250
  def render(self, *args, **kwargs):
1251
  obs = super().render(*args, **kwargs)
1252
+ if args and args[0] == 'human':
1253
  self.window.clear_text() # erase previous text
1254
  self.window.set_caption(self.full_conversation)
1255
 
scripts/LLM_test.py CHANGED
@@ -10,8 +10,9 @@ import numpy as np
10
  import torch
11
  from pathlib import Path
12
 
13
- from utils.babyai_utils.baby_agent import load_agent
14
  from utils import *
 
15
  from models import *
16
  import subprocess
17
  import os
@@ -161,230 +162,230 @@ def prompt_preprocessor(llm_prompt):
161
 
162
  return "\n".join(new_lines)
163
 
164
- def generate_text_obs(obs, info):
165
-
166
- text_observation = obs_to_text(info)
167
-
168
- llm_prompt = "Obs : "
169
- llm_prompt += "".join(text_observation)
170
-
171
- # add utterances
172
- if obs["utterance_history"] != "Conversation: \n":
173
- utt_hist = obs['utterance_history']
174
- utt_hist = utt_hist.replace("Conversation: \n","")
175
- llm_prompt += utt_hist
176
-
177
- return llm_prompt
178
-
179
- def obs_to_text(info):
180
- image, vis_mask = info["image"], info["vis_mask"]
181
- carrying = info["carrying"]
182
- agent_pos_vx, agent_pos_vy = info["agent_pos_vx"], info["agent_pos_vy"]
183
- npc_actions_dict = info["npc_actions_dict"]
184
-
185
- # (OBJECT_TO_IDX[self.type], COLOR_TO_IDX[self.color], state)
186
- # State, 0: open, 1: closed, 2: locked
187
- IDX_TO_COLOR = dict(zip(COLOR_TO_IDX.values(), COLOR_TO_IDX.keys()))
188
- IDX_TO_OBJECT = dict(zip(OBJECT_TO_IDX.values(), OBJECT_TO_IDX.keys()))
189
-
190
- list_textual_descriptions = []
191
-
192
- if carrying is not None:
193
- list_textual_descriptions.append("You carry a {} {}".format(carrying.color, carrying.type))
194
-
195
- # agent_pos_vx, agent_pos_vy = self.get_view_coords(self.agent_pos[0], self.agent_pos[1])
196
-
197
- view_field_dictionary = dict()
198
-
199
- for i in range(image.shape[0]):
200
- for j in range(image.shape[1]):
201
- if image[i][j][0] != 0 and image[i][j][0] != 1 and image[i][j][0] != 2:
202
- if i not in view_field_dictionary.keys():
203
- view_field_dictionary[i] = dict()
204
- view_field_dictionary[i][j] = image[i][j]
205
- else:
206
- view_field_dictionary[i][j] = image[i][j]
207
-
208
- # Find the wall if any
209
- # We describe a wall only if there is no objects between the agent and the wall in straight line
210
-
211
- # Find wall in front
212
- add_wall_descr = False
213
- if add_wall_descr:
214
- j = agent_pos_vy - 1
215
- object_seen = False
216
- while j >= 0 and not object_seen:
217
- if image[agent_pos_vx][j][0] != 0 and image[agent_pos_vx][j][0] != 1:
218
- if image[agent_pos_vx][j][0] == 2:
219
- list_textual_descriptions.append(
220
- f"A wall is {agent_pos_vy - j} steps in front of you. \n") # forward
221
- object_seen = True
222
- else:
223
- object_seen = True
224
- j -= 1
225
- # Find wall left
226
- i = agent_pos_vx - 1
227
- object_seen = False
228
- while i >= 0 and not object_seen:
229
- if image[i][agent_pos_vy][0] != 0 and image[i][agent_pos_vy][0] != 1:
230
- if image[i][agent_pos_vy][0] == 2:
231
- list_textual_descriptions.append(
232
- f"A wall is {agent_pos_vx - i} steps to the left. \n") # left
233
- object_seen = True
234
- else:
235
- object_seen = True
236
- i -= 1
237
- # Find wall right
238
- i = agent_pos_vx + 1
239
- object_seen = False
240
- while i < image.shape[0] and not object_seen:
241
- if image[i][agent_pos_vy][0] != 0 and image[i][agent_pos_vy][0] != 1:
242
- if image[i][agent_pos_vy][0] == 2:
243
- list_textual_descriptions.append(
244
- f"A wall is {i - agent_pos_vx} steps to the right. \n") # right
245
- object_seen = True
246
- else:
247
- object_seen = True
248
- i += 1
249
-
250
- # list_textual_descriptions.append("You see the following objects: ")
251
- # returns the position of seen objects relative to you
252
- for i in view_field_dictionary.keys():
253
- for j in view_field_dictionary[i].keys():
254
- if i != agent_pos_vx or j != agent_pos_vy:
255
- object = view_field_dictionary[i][j]
256
-
257
- # # don't show npc
258
- # if IDX_TO_OBJECT[object[0]] == "npc":
259
- # continue
260
-
261
- front_dist = agent_pos_vy - j
262
- left_right_dist = i - agent_pos_vx
263
-
264
- loc_descr = ""
265
- if front_dist == 1 and left_right_dist == 0:
266
- loc_descr += "Right in front of you "
267
-
268
- elif left_right_dist == 1 and front_dist == 0:
269
- loc_descr += "Just to the right of you"
270
-
271
- elif left_right_dist == -1 and front_dist == 0:
272
- loc_descr += "Just to the left of you"
273
-
274
- else:
275
- front_str = str(front_dist) + " steps in front of you " if front_dist > 0 else ""
276
-
277
- loc_descr += front_str
278
-
279
- suff = "s" if abs(left_right_dist) > 0 else ""
280
- and_ = "and" if loc_descr != "" else ""
281
-
282
- if left_right_dist < 0:
283
- left_right_str = f"{and_} {-left_right_dist} step{suff} to the left"
284
- loc_descr += left_right_str
285
-
286
- elif left_right_dist > 0:
287
- left_right_str = f"{and_} {left_right_dist} step{suff} to the right"
288
- loc_descr += left_right_str
289
-
290
- else:
291
- left_right_str = ""
292
- loc_descr += left_right_str
293
-
294
- loc_descr += f" there is a "
295
-
296
- obj_type = IDX_TO_OBJECT[object[0]]
297
- if obj_type == "npc":
298
- IDX_TO_STATE = {0: 'friendly', 1: 'antagonistic'}
299
-
300
- description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} peer. "
301
-
302
- # gaze
303
- gaze_dir = {
304
- 0: "towards you",
305
- 1: "to the left of you",
306
- 2: "in the same direction as you",
307
- 3: "to the right of you",
308
- }
309
- description += f"It is looking {gaze_dir[object[3]]}. "
310
-
311
- # point
312
- point_dir = {
313
- 0: "towards you",
314
- 1: "to the left of you",
315
- 2: "in the same direction as you",
316
- 3: "to the right of you",
317
- }
318
-
319
- if object[4] != 255:
320
- description += f"It is pointing {point_dir[object[4]]}. "
321
-
322
- # last action
323
- last_action = {v: k for k, v in npc_actions_dict.items()}[object[5]]
324
-
325
- last_action = {
326
- "go_forward": "foward",
327
- "rotate_left": "turn left",
328
- "rotate_right": "turn right",
329
- "toggle_action": "toggle",
330
- "point_stop_point": "stop pointing",
331
- "point_E": "",
332
- "point_S": "",
333
- "point_W": "",
334
- "point_N": "",
335
- "stop_point": "stop pointing",
336
- "no_op": ""
337
- }[last_action]
338
-
339
- if last_action not in ["no_op", ""]:
340
- description += f"It's last action is {last_action}. "
341
-
342
- elif obj_type in ["switch", "apple", "generatorplatform", "marble", "marbletee", "fence"]:
343
- # todo: this assumes that Switch.no_light == True
344
- description = f"{IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
345
- assert object[2:].mean() == 0
346
-
347
- elif obj_type == "lockablebox":
348
- IDX_TO_STATE = {0: 'open', 1: 'closed', 2: 'locked'}
349
- description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
350
- assert object[3:].mean() == 0
351
-
352
- elif obj_type == "applegenerator":
353
- IDX_TO_STATE = {1: 'square', 2: 'round'}
354
- description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
355
- assert object[3:].mean() == 0
356
-
357
- elif obj_type == "remotedoor":
358
- IDX_TO_STATE = {0: 'open', 1: 'closed'}
359
- description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
360
- assert object[3:].mean() == 0
361
-
362
- elif obj_type == "door":
363
- IDX_TO_STATE = {0: 'open', 1: 'closed', 2: 'locked'}
364
- description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
365
- assert object[3:].mean() == 0
366
-
367
- elif obj_type == "lever":
368
- IDX_TO_STATE = {1: 'activated', 0: 'unactivated'}
369
- if object[3] == 255:
370
- countdown_txt = ""
371
- else:
372
- countdown_txt = f"with {object[3]} timesteps left. "
373
-
374
- description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} {countdown_txt}"
375
-
376
- assert object[4:].mean() == 0
377
- else:
378
- raise ValueError(f"Undefined object type {obj_type}")
379
-
380
- full_destr = loc_descr + description + "\n"
381
-
382
- list_textual_descriptions.append(full_destr)
383
-
384
- if len(list_textual_descriptions) == 0:
385
- list_textual_descriptions.append("\n")
386
-
387
- return list_textual_descriptions
388
 
389
  def plt_2_rgb(env):
390
  # data = np.frombuffer(env.window.fig.canvas.tostring_rgb(), dtype=np.uint8)
@@ -639,7 +640,6 @@ def load_in_context_examples(in_context_episodes):
639
  print("-------------------------- IN CONTEXT EXAMPLES --------------------------")
640
  print(in_context_examples)
641
  print("-------------------------------------------------------------------------")
642
- exit()
643
 
644
  return in_context_examples
645
 
 
10
  import torch
11
  from pathlib import Path
12
 
13
+ # from utils.babyai_utils.baby_agent import load_agent
14
  from utils import *
15
+ from textworld_utils.utils import generate_text_obs
16
  from models import *
17
  import subprocess
18
  import os
 
162
 
163
  return "\n".join(new_lines)
164
 
165
+ # def generate_text_obs(obs, info):
166
+ #
167
+ # text_observation = obs_to_text(info)
168
+ #
169
+ # llm_prompt = "Obs : "
170
+ # llm_prompt += "".join(text_observation)
171
+ #
172
+ # # add utterances
173
+ # if obs["utterance_history"] != "Conversation: \n":
174
+ # utt_hist = obs['utterance_history']
175
+ # utt_hist = utt_hist.replace("Conversation: \n","")
176
+ # llm_prompt += utt_hist
177
+ #
178
+ # return llm_prompt
179
+
180
+ # def obs_to_text(info):
181
+ # image, vis_mask = info["image"], info["vis_mask"]
182
+ # carrying = info["carrying"]
183
+ # agent_pos_vx, agent_pos_vy = info["agent_pos_vx"], info["agent_pos_vy"]
184
+ # npc_actions_dict = info["npc_actions_dict"]
185
+ #
186
+ # # (OBJECT_TO_IDX[self.type], COLOR_TO_IDX[self.color], state)
187
+ # # State, 0: open, 1: closed, 2: locked
188
+ # IDX_TO_COLOR = dict(zip(COLOR_TO_IDX.values(), COLOR_TO_IDX.keys()))
189
+ # IDX_TO_OBJECT = dict(zip(OBJECT_TO_IDX.values(), OBJECT_TO_IDX.keys()))
190
+ #
191
+ # list_textual_descriptions = []
192
+ #
193
+ # if carrying is not None:
194
+ # list_textual_descriptions.append("You carry a {} {}".format(carrying.color, carrying.type))
195
+ #
196
+ # # agent_pos_vx, agent_pos_vy = self.get_view_coords(self.agent_pos[0], self.agent_pos[1])
197
+ #
198
+ # view_field_dictionary = dict()
199
+ #
200
+ # for i in range(image.shape[0]):
201
+ # for j in range(image.shape[1]):
202
+ # if image[i][j][0] != 0 and image[i][j][0] != 1 and image[i][j][0] != 2:
203
+ # if i not in view_field_dictionary.keys():
204
+ # view_field_dictionary[i] = dict()
205
+ # view_field_dictionary[i][j] = image[i][j]
206
+ # else:
207
+ # view_field_dictionary[i][j] = image[i][j]
208
+ #
209
+ # # Find the wall if any
210
+ # # We describe a wall only if there is no objects between the agent and the wall in straight line
211
+ #
212
+ # # Find wall in front
213
+ # add_wall_descr = False
214
+ # if add_wall_descr:
215
+ # j = agent_pos_vy - 1
216
+ # object_seen = False
217
+ # while j >= 0 and not object_seen:
218
+ # if image[agent_pos_vx][j][0] != 0 and image[agent_pos_vx][j][0] != 1:
219
+ # if image[agent_pos_vx][j][0] == 2:
220
+ # list_textual_descriptions.append(
221
+ # f"A wall is {agent_pos_vy - j} steps in front of you. \n") # forward
222
+ # object_seen = True
223
+ # else:
224
+ # object_seen = True
225
+ # j -= 1
226
+ # # Find wall left
227
+ # i = agent_pos_vx - 1
228
+ # object_seen = False
229
+ # while i >= 0 and not object_seen:
230
+ # if image[i][agent_pos_vy][0] != 0 and image[i][agent_pos_vy][0] != 1:
231
+ # if image[i][agent_pos_vy][0] == 2:
232
+ # list_textual_descriptions.append(
233
+ # f"A wall is {agent_pos_vx - i} steps to the left. \n") # left
234
+ # object_seen = True
235
+ # else:
236
+ # object_seen = True
237
+ # i -= 1
238
+ # # Find wall right
239
+ # i = agent_pos_vx + 1
240
+ # object_seen = False
241
+ # while i < image.shape[0] and not object_seen:
242
+ # if image[i][agent_pos_vy][0] != 0 and image[i][agent_pos_vy][0] != 1:
243
+ # if image[i][agent_pos_vy][0] == 2:
244
+ # list_textual_descriptions.append(
245
+ # f"A wall is {i - agent_pos_vx} steps to the right. \n") # right
246
+ # object_seen = True
247
+ # else:
248
+ # object_seen = True
249
+ # i += 1
250
+ #
251
+ # # list_textual_descriptions.append("You see the following objects: ")
252
+ # # returns the position of seen objects relative to you
253
+ # for i in view_field_dictionary.keys():
254
+ # for j in view_field_dictionary[i].keys():
255
+ # if i != agent_pos_vx or j != agent_pos_vy:
256
+ # object = view_field_dictionary[i][j]
257
+ #
258
+ # # # don't show npc
259
+ # # if IDX_TO_OBJECT[object[0]] == "npc":
260
+ # # continue
261
+ #
262
+ # front_dist = agent_pos_vy - j
263
+ # left_right_dist = i - agent_pos_vx
264
+ #
265
+ # loc_descr = ""
266
+ # if front_dist == 1 and left_right_dist == 0:
267
+ # loc_descr += "Right in front of you "
268
+ #
269
+ # elif left_right_dist == 1 and front_dist == 0:
270
+ # loc_descr += "Just to the right of you"
271
+ #
272
+ # elif left_right_dist == -1 and front_dist == 0:
273
+ # loc_descr += "Just to the left of you"
274
+ #
275
+ # else:
276
+ # front_str = str(front_dist) + " steps in front of you " if front_dist > 0 else ""
277
+ #
278
+ # loc_descr += front_str
279
+ #
280
+ # suff = "s" if abs(left_right_dist) > 0 else ""
281
+ # and_ = "and" if loc_descr != "" else ""
282
+ #
283
+ # if left_right_dist < 0:
284
+ # left_right_str = f"{and_} {-left_right_dist} step{suff} to the left"
285
+ # loc_descr += left_right_str
286
+ #
287
+ # elif left_right_dist > 0:
288
+ # left_right_str = f"{and_} {left_right_dist} step{suff} to the right"
289
+ # loc_descr += left_right_str
290
+ #
291
+ # else:
292
+ # left_right_str = ""
293
+ # loc_descr += left_right_str
294
+ #
295
+ # loc_descr += f" there is a "
296
+ #
297
+ # obj_type = IDX_TO_OBJECT[object[0]]
298
+ # if obj_type == "npc":
299
+ # IDX_TO_STATE = {0: 'friendly', 1: 'antagonistic'}
300
+ #
301
+ # description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} peer. "
302
+ #
303
+ # # gaze
304
+ # gaze_dir = {
305
+ # 0: "towards you",
306
+ # 1: "to the left of you",
307
+ # 2: "in the same direction as you",
308
+ # 3: "to the right of you",
309
+ # }
310
+ # description += f"It is looking {gaze_dir[object[3]]}. "
311
+ #
312
+ # # point
313
+ # point_dir = {
314
+ # 0: "towards you",
315
+ # 1: "to the left of you",
316
+ # 2: "in the same direction as you",
317
+ # 3: "to the right of you",
318
+ # }
319
+ #
320
+ # if object[4] != 255:
321
+ # description += f"It is pointing {point_dir[object[4]]}. "
322
+ #
323
+ # # last action
324
+ # last_action = {v: k for k, v in npc_actions_dict.items()}[object[5]]
325
+ #
326
+ # last_action = {
327
+ # "go_forward": "foward",
328
+ # "rotate_left": "turn left",
329
+ # "rotate_right": "turn right",
330
+ # "toggle_action": "toggle",
331
+ # "point_stop_point": "stop pointing",
332
+ # "point_E": "",
333
+ # "point_S": "",
334
+ # "point_W": "",
335
+ # "point_N": "",
336
+ # "stop_point": "stop pointing",
337
+ # "no_op": ""
338
+ # }[last_action]
339
+ #
340
+ # if last_action not in ["no_op", ""]:
341
+ # description += f"It's last action is {last_action}. "
342
+ #
343
+ # elif obj_type in ["switch", "apple", "generatorplatform", "marble", "marbletee", "fence"]:
344
+ # # todo: this assumes that Switch.no_light == True
345
+ # description = f"{IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
346
+ # assert object[2:].mean() == 0
347
+ #
348
+ # elif obj_type == "lockablebox":
349
+ # IDX_TO_STATE = {0: 'open', 1: 'closed', 2: 'locked'}
350
+ # description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
351
+ # assert object[3:].mean() == 0
352
+ #
353
+ # elif obj_type == "applegenerator":
354
+ # IDX_TO_STATE = {1: 'square', 2: 'round'}
355
+ # description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
356
+ # assert object[3:].mean() == 0
357
+ #
358
+ # elif obj_type == "remotedoor":
359
+ # IDX_TO_STATE = {0: 'open', 1: 'closed'}
360
+ # description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
361
+ # assert object[3:].mean() == 0
362
+ #
363
+ # elif obj_type == "door":
364
+ # IDX_TO_STATE = {0: 'open', 1: 'closed', 2: 'locked'}
365
+ # description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
366
+ # assert object[3:].mean() == 0
367
+ #
368
+ # elif obj_type == "lever":
369
+ # IDX_TO_STATE = {1: 'activated', 0: 'unactivated'}
370
+ # if object[3] == 255:
371
+ # countdown_txt = ""
372
+ # else:
373
+ # countdown_txt = f"with {object[3]} timesteps left. "
374
+ #
375
+ # description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} {countdown_txt}"
376
+ #
377
+ # assert object[4:].mean() == 0
378
+ # else:
379
+ # raise ValueError(f"Undefined object type {obj_type}")
380
+ #
381
+ # full_destr = loc_descr + description + "\n"
382
+ #
383
+ # list_textual_descriptions.append(full_destr)
384
+ #
385
+ # if len(list_textual_descriptions) == 0:
386
+ # list_textual_descriptions.append("\n")
387
+ #
388
+ # return list_textual_descriptions
389
 
390
  def plt_2_rgb(env):
391
  # data = np.frombuffer(env.window.fig.canvas.tostring_rgb(), dtype=np.uint8)
 
640
  print("-------------------------- IN CONTEXT EXAMPLES --------------------------")
641
  print(in_context_examples)
642
  print("-------------------------------------------------------------------------")
 
643
 
644
  return in_context_examples
645
 
textworld_utils/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .utils import *
textworld_utils/utils.py ADDED
@@ -0,0 +1,226 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from gym_minigrid.minigrid import COLOR_TO_IDX, OBJECT_TO_IDX
2
+
3
+ def generate_text_obs(obs, info):
4
+
5
+ text_observation = obs_to_text(info)
6
+
7
+ llm_prompt = "Obs : "
8
+ llm_prompt += "".join(text_observation)
9
+
10
+ # add utterances
11
+ if obs["utterance_history"] != "Conversation: \n":
12
+ utt_hist = obs['utterance_history']
13
+ utt_hist = utt_hist.replace("Conversation: \n","")
14
+ llm_prompt += utt_hist
15
+
16
+ return llm_prompt
17
+
18
+ def obs_to_text(info):
19
+ image, vis_mask = info["image"], info["vis_mask"]
20
+ carrying = info["carrying"]
21
+ agent_pos_vx, agent_pos_vy = info["agent_pos_vx"], info["agent_pos_vy"]
22
+ npc_actions_dict = info["npc_actions_dict"]
23
+
24
+ # (OBJECT_TO_IDX[self.type], COLOR_TO_IDX[self.color], state)
25
+ # State, 0: open, 1: closed, 2: locked
26
+ IDX_TO_COLOR = dict(zip(COLOR_TO_IDX.values(), COLOR_TO_IDX.keys()))
27
+ IDX_TO_OBJECT = dict(zip(OBJECT_TO_IDX.values(), OBJECT_TO_IDX.keys()))
28
+
29
+ list_textual_descriptions = []
30
+
31
+ if carrying is not None:
32
+ list_textual_descriptions.append("You carry a {} {}".format(carrying.color, carrying.type))
33
+
34
+ # agent_pos_vx, agent_pos_vy = self.get_view_coords(self.agent_pos[0], self.agent_pos[1])
35
+
36
+ view_field_dictionary = dict()
37
+
38
+ for i in range(image.shape[0]):
39
+ for j in range(image.shape[1]):
40
+ if image[i][j][0] != 0 and image[i][j][0] != 1 and image[i][j][0] != 2:
41
+ if i not in view_field_dictionary.keys():
42
+ view_field_dictionary[i] = dict()
43
+ view_field_dictionary[i][j] = image[i][j]
44
+ else:
45
+ view_field_dictionary[i][j] = image[i][j]
46
+
47
+ # Find the wall if any
48
+ # We describe a wall only if there is no objects between the agent and the wall in straight line
49
+
50
+ # Find wall in front
51
+ add_wall_descr = False
52
+ if add_wall_descr:
53
+ j = agent_pos_vy - 1
54
+ object_seen = False
55
+ while j >= 0 and not object_seen:
56
+ if image[agent_pos_vx][j][0] != 0 and image[agent_pos_vx][j][0] != 1:
57
+ if image[agent_pos_vx][j][0] == 2:
58
+ list_textual_descriptions.append(
59
+ f"A wall is {agent_pos_vy - j} steps in front of you. \n") # forward
60
+ object_seen = True
61
+ else:
62
+ object_seen = True
63
+ j -= 1
64
+ # Find wall left
65
+ i = agent_pos_vx - 1
66
+ object_seen = False
67
+ while i >= 0 and not object_seen:
68
+ if image[i][agent_pos_vy][0] != 0 and image[i][agent_pos_vy][0] != 1:
69
+ if image[i][agent_pos_vy][0] == 2:
70
+ list_textual_descriptions.append(
71
+ f"A wall is {agent_pos_vx - i} steps to the left. \n") # left
72
+ object_seen = True
73
+ else:
74
+ object_seen = True
75
+ i -= 1
76
+ # Find wall right
77
+ i = agent_pos_vx + 1
78
+ object_seen = False
79
+ while i < image.shape[0] and not object_seen:
80
+ if image[i][agent_pos_vy][0] != 0 and image[i][agent_pos_vy][0] != 1:
81
+ if image[i][agent_pos_vy][0] == 2:
82
+ list_textual_descriptions.append(
83
+ f"A wall is {i - agent_pos_vx} steps to the right. \n") # right
84
+ object_seen = True
85
+ else:
86
+ object_seen = True
87
+ i += 1
88
+
89
+ # list_textual_descriptions.append("You see the following objects: ")
90
+ # returns the position of seen objects relative to you
91
+ for i in view_field_dictionary.keys():
92
+ for j in view_field_dictionary[i].keys():
93
+ if i != agent_pos_vx or j != agent_pos_vy:
94
+ object = view_field_dictionary[i][j]
95
+
96
+ # # don't show npc
97
+ # if IDX_TO_OBJECT[object[0]] == "npc":
98
+ # continue
99
+
100
+ front_dist = agent_pos_vy - j
101
+ left_right_dist = i - agent_pos_vx
102
+
103
+ loc_descr = ""
104
+ if front_dist == 1 and left_right_dist == 0:
105
+ loc_descr += "Right in front of you "
106
+
107
+ elif left_right_dist == 1 and front_dist == 0:
108
+ loc_descr += "Just to the right of you"
109
+
110
+ elif left_right_dist == -1 and front_dist == 0:
111
+ loc_descr += "Just to the left of you"
112
+
113
+ else:
114
+ front_str = str(front_dist) + " steps in front of you " if front_dist > 0 else ""
115
+
116
+ loc_descr += front_str
117
+
118
+ suff = "s" if abs(left_right_dist) > 0 else ""
119
+ and_ = "and" if loc_descr != "" else ""
120
+
121
+ if left_right_dist < 0:
122
+ left_right_str = f"{and_} {-left_right_dist} step{suff} to the left"
123
+ loc_descr += left_right_str
124
+
125
+ elif left_right_dist > 0:
126
+ left_right_str = f"{and_} {left_right_dist} step{suff} to the right"
127
+ loc_descr += left_right_str
128
+
129
+ else:
130
+ left_right_str = ""
131
+ loc_descr += left_right_str
132
+
133
+ loc_descr += f" there is a "
134
+
135
+ obj_type = IDX_TO_OBJECT[object[0]]
136
+ if obj_type == "npc":
137
+ IDX_TO_STATE = {0: 'friendly', 1: 'antagonistic'}
138
+
139
+ description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} peer. "
140
+
141
+ # gaze
142
+ gaze_dir = {
143
+ 0: "towards you",
144
+ 1: "to the left of you",
145
+ 2: "in the same direction as you",
146
+ 3: "to the right of you",
147
+ }
148
+ description += f"It is looking {gaze_dir[object[3]]}. "
149
+
150
+ # point
151
+ point_dir = {
152
+ 0: "towards you",
153
+ 1: "to the left of you",
154
+ 2: "in the same direction as you",
155
+ 3: "to the right of you",
156
+ }
157
+
158
+ if object[4] != 255:
159
+ description += f"It is pointing {point_dir[object[4]]}. "
160
+
161
+ # last action
162
+ last_action = {v: k for k, v in npc_actions_dict.items()}[object[5]]
163
+
164
+ last_action = {
165
+ "go_forward": "foward",
166
+ "rotate_left": "turn left",
167
+ "rotate_right": "turn right",
168
+ "toggle_action": "toggle",
169
+ "point_stop_point": "stop pointing",
170
+ "point_E": "",
171
+ "point_S": "",
172
+ "point_W": "",
173
+ "point_N": "",
174
+ "stop_point": "stop pointing",
175
+ "no_op": ""
176
+ }[last_action]
177
+
178
+ if last_action not in ["no_op", ""]:
179
+ description += f"It's last action is {last_action}. "
180
+
181
+ elif obj_type in ["switch", "apple", "generatorplatform", "marble", "marbletee", "fence"]:
182
+ # todo: this assumes that Switch.no_light == True
183
+ description = f"{IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
184
+ assert object[2:].mean() == 0
185
+
186
+ elif obj_type == "lockablebox":
187
+ IDX_TO_STATE = {0: 'open', 1: 'closed', 2: 'locked'}
188
+ description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
189
+ assert object[3:].mean() == 0
190
+
191
+ elif obj_type == "applegenerator":
192
+ IDX_TO_STATE = {1: 'square', 2: 'round'}
193
+ description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
194
+ assert object[3:].mean() == 0
195
+
196
+ elif obj_type == "remotedoor":
197
+ IDX_TO_STATE = {0: 'open', 1: 'closed'}
198
+ description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
199
+ assert object[3:].mean() == 0
200
+
201
+ elif obj_type == "door":
202
+ IDX_TO_STATE = {0: 'open', 1: 'closed', 2: 'locked'}
203
+ description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
204
+ assert object[3:].mean() == 0
205
+
206
+ elif obj_type == "lever":
207
+ IDX_TO_STATE = {1: 'activated', 0: 'unactivated'}
208
+ if object[3] == 255:
209
+ countdown_txt = ""
210
+ else:
211
+ countdown_txt = f"with {object[3]} timesteps left. "
212
+
213
+ description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} {countdown_txt}"
214
+
215
+ assert object[4:].mean() == 0
216
+ else:
217
+ raise ValueError(f"Undefined object type {obj_type}")
218
+
219
+ full_destr = loc_descr + description + "\n"
220
+
221
+ list_textual_descriptions.append(full_destr)
222
+
223
+ if len(list_textual_descriptions) == 0:
224
+ list_textual_descriptions.append("\n")
225
+
226
+ return list_textual_descriptions