Spaces:

flowers-team
/

SocialAISchool

Sleeping

App Files Files Community

grg commited on Nov 9, 2023

Commit

11bd154

1 Parent(s): 67eafed

Moving LLM obs to text in textworld utils, bugfixes.

Browse files

Files changed (5) hide show

gym-minigrid/gym_minigrid/minigrid.py +1 -1
gym-minigrid/gym_minigrid/social_ai_envs/informationseekingenv.py +1 -1
scripts/LLM_test.py +226 -226
textworld_utils/__init__.py +1 -0
textworld_utils/utils.py +226 -0

gym-minigrid/gym_minigrid/minigrid.py CHANGED Viewed

@@ -3443,7 +3443,7 @@ class MultiModalMiniGridEnv(MiniGridEnv):
         obs = super().render(*args, **kwargs)
-        if args[0] == 'human':
             # draw text to the side of the image
             self.window.clear_text()  # erase previous text
             if show_dialogue:

         obs = super().render(*args, **kwargs)
+        if args and args[0] == 'human':
             # draw text to the side of the image
             self.window.clear_text()  # erase previous text
             if show_dialogue:

gym-minigrid/gym_minigrid/social_ai_envs/informationseekingenv.py CHANGED Viewed

@@ -1249,7 +1249,7 @@ class InformationSeekingEnv(MultiModalMiniGridEnv):
     def render(self, *args, **kwargs):
         obs = super().render(*args, **kwargs)
-        if args[0] == 'human':
             self.window.clear_text()  # erase previous text
             self.window.set_caption(self.full_conversation)

     def render(self, *args, **kwargs):
         obs = super().render(*args, **kwargs)
+        if args and args[0] == 'human':
             self.window.clear_text()  # erase previous text
             self.window.set_caption(self.full_conversation)

scripts/LLM_test.py CHANGED Viewed

@@ -10,8 +10,9 @@ import numpy as np
 import torch
 from pathlib import Path
-from utils.babyai_utils.baby_agent import load_agent
 from utils import *
 from models import *
 import subprocess
 import os
@@ -161,230 +162,230 @@ def prompt_preprocessor(llm_prompt):
     return "\n".join(new_lines)
-def generate_text_obs(obs, info):
-    text_observation = obs_to_text(info)
-    llm_prompt = "Obs : "
-    llm_prompt += "".join(text_observation)
-    # add utterances
-    if obs["utterance_history"] != "Conversation: \n":
-        utt_hist = obs['utterance_history']
-        utt_hist = utt_hist.replace("Conversation: \n","")
-        llm_prompt += utt_hist
-    return llm_prompt
-def obs_to_text(info):
-    image, vis_mask = info["image"], info["vis_mask"]
-    carrying = info["carrying"]
-    agent_pos_vx, agent_pos_vy = info["agent_pos_vx"], info["agent_pos_vy"]
-    npc_actions_dict = info["npc_actions_dict"]
-    # (OBJECT_TO_IDX[self.type], COLOR_TO_IDX[self.color], state)
-    # State, 0: open, 1: closed, 2: locked
-    IDX_TO_COLOR = dict(zip(COLOR_TO_IDX.values(), COLOR_TO_IDX.keys()))
-    IDX_TO_OBJECT = dict(zip(OBJECT_TO_IDX.values(), OBJECT_TO_IDX.keys()))
-    list_textual_descriptions = []
-    if carrying is not None:
-        list_textual_descriptions.append("You carry a {} {}".format(carrying.color, carrying.type))
-    # agent_pos_vx, agent_pos_vy = self.get_view_coords(self.agent_pos[0], self.agent_pos[1])
-    view_field_dictionary = dict()
-    for i in range(image.shape[0]):
-        for j in range(image.shape[1]):
-            if image[i][j][0] != 0 and image[i][j][0] != 1 and image[i][j][0] != 2:
-                if i not in view_field_dictionary.keys():
-                    view_field_dictionary[i] = dict()
-                    view_field_dictionary[i][j] = image[i][j]
-                else:
-                    view_field_dictionary[i][j] = image[i][j]
-    # Find the wall if any
-    #  We describe a wall only if there is no objects between the agent and the wall in straight line
-    # Find wall in front
-    add_wall_descr = False
-    if add_wall_descr:
-        j = agent_pos_vy - 1
-        object_seen = False
-        while j >= 0 and not object_seen:
-            if image[agent_pos_vx][j][0] != 0 and image[agent_pos_vx][j][0] != 1:
-                if image[agent_pos_vx][j][0] == 2:
-                    list_textual_descriptions.append(
-                        f"A wall is {agent_pos_vy - j} steps in front of you. \n")  # forward
-                    object_seen = True
-                else:
-                    object_seen = True
-            j -= 1
-        # Find wall left
-        i = agent_pos_vx - 1
-        object_seen = False
-        while i >= 0 and not object_seen:
-            if image[i][agent_pos_vy][0] != 0 and image[i][agent_pos_vy][0] != 1:
-                if image[i][agent_pos_vy][0] == 2:
-                    list_textual_descriptions.append(
-                        f"A wall is {agent_pos_vx - i} steps to the left. \n")  # left
-                    object_seen = True
-                else:
-                    object_seen = True
-            i -= 1
-        # Find wall right
-        i = agent_pos_vx + 1
-        object_seen = False
-        while i < image.shape[0] and not object_seen:
-            if image[i][agent_pos_vy][0] != 0 and image[i][agent_pos_vy][0] != 1:
-                if image[i][agent_pos_vy][0] == 2:
-                    list_textual_descriptions.append(
-                        f"A wall is {i - agent_pos_vx} steps to the right. \n")  # right
-                    object_seen = True
-                else:
-                    object_seen = True
-            i += 1
-    # list_textual_descriptions.append("You see the following objects: ")
-    # returns the position of seen objects relative to you
-    for i in view_field_dictionary.keys():
-        for j in view_field_dictionary[i].keys():
-            if i != agent_pos_vx or j != agent_pos_vy:
-                object = view_field_dictionary[i][j]
-                # # don't show npc
-                # if IDX_TO_OBJECT[object[0]] == "npc":
-                #     continue
-                front_dist = agent_pos_vy - j
-                left_right_dist = i - agent_pos_vx
-                loc_descr = ""
-                if front_dist == 1 and left_right_dist == 0:
-                    loc_descr += "Right in front of you "
-                elif left_right_dist == 1 and front_dist == 0:
-                    loc_descr += "Just to the right of you"
-                elif left_right_dist == -1 and front_dist == 0:
-                    loc_descr += "Just to the left of you"
-                else:
-                    front_str = str(front_dist) + " steps in front of you " if front_dist > 0 else ""
-                    loc_descr += front_str
-                    suff = "s" if abs(left_right_dist) > 0 else ""
-                    and_ = "and" if loc_descr != "" else ""
-                    if left_right_dist < 0:
-                        left_right_str = f"{and_} {-left_right_dist} step{suff} to the left"
-                        loc_descr += left_right_str
-                    elif left_right_dist > 0:
-                        left_right_str = f"{and_} {left_right_dist} step{suff} to the right"
-                        loc_descr += left_right_str
-                    else:
-                        left_right_str = ""
-                        loc_descr += left_right_str
-                loc_descr += f" there is a "
-                obj_type = IDX_TO_OBJECT[object[0]]
-                if obj_type == "npc":
-                    IDX_TO_STATE = {0: 'friendly', 1: 'antagonistic'}
-                    description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} peer. "
-                    # gaze
-                    gaze_dir = {
-                        0: "towards you",
-                        1: "to the left of you",
-                        2: "in the same direction as you",
-                        3: "to the right of you",
-                    }
-                    description += f"It is looking {gaze_dir[object[3]]}. "
-                    # point
-                    point_dir = {
-                        0: "towards you",
-                        1: "to the left of you",
-                        2: "in the same direction as you",
-                        3: "to the right of you",
-                    }
-                    if object[4] != 255:
-                        description += f"It is pointing {point_dir[object[4]]}. "
-                    # last action
-                    last_action = {v: k for k, v in npc_actions_dict.items()}[object[5]]
-                    last_action = {
-                        "go_forward": "foward",
-                        "rotate_left": "turn left",
-                        "rotate_right": "turn right",
-                        "toggle_action": "toggle",
-                        "point_stop_point": "stop pointing",
-                        "point_E": "",
-                        "point_S": "",
-                        "point_W": "",
-                        "point_N": "",
-                        "stop_point": "stop pointing",
-                        "no_op": ""
-                    }[last_action]
-                    if last_action not in ["no_op", ""]:
-                        description += f"It's last action is {last_action}. "
-                elif obj_type in ["switch", "apple", "generatorplatform", "marble", "marbletee", "fence"]:
-                    # todo: this assumes that Switch.no_light == True
-                    description = f"{IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
-                    assert object[2:].mean() == 0
-                elif obj_type == "lockablebox":
-                    IDX_TO_STATE = {0: 'open', 1: 'closed', 2: 'locked'}
-                    description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
-                    assert object[3:].mean() == 0
-                elif obj_type == "applegenerator":
-                    IDX_TO_STATE = {1: 'square', 2: 'round'}
-                    description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
-                    assert object[3:].mean() == 0
-                elif obj_type == "remotedoor":
-                    IDX_TO_STATE = {0: 'open', 1: 'closed'}
-                    description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
-                    assert object[3:].mean() == 0
-                elif obj_type == "door":
-                    IDX_TO_STATE = {0: 'open', 1: 'closed', 2: 'locked'}
-                    description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
-                    assert object[3:].mean() == 0
-                elif obj_type == "lever":
-                    IDX_TO_STATE = {1: 'activated', 0: 'unactivated'}
-                    if object[3] == 255:
-                        countdown_txt = ""
-                    else:
-                        countdown_txt = f"with {object[3]} timesteps left. "
-                    description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} {countdown_txt}"
-                    assert object[4:].mean() == 0
-                else:
-                    raise ValueError(f"Undefined object type {obj_type}")
-                full_destr = loc_descr + description + "\n"
-                list_textual_descriptions.append(full_destr)
-    if len(list_textual_descriptions) == 0:
-        list_textual_descriptions.append("\n")
-    return list_textual_descriptions
 def plt_2_rgb(env):
     # data = np.frombuffer(env.window.fig.canvas.tostring_rgb(), dtype=np.uint8)
@@ -639,7 +640,6 @@ def load_in_context_examples(in_context_episodes):
     print("-------------------------- IN CONTEXT EXAMPLES --------------------------")
     print(in_context_examples)
     print("-------------------------------------------------------------------------")
-    exit()
     return in_context_examples

 import torch
 from pathlib import Path
+# from utils.babyai_utils.baby_agent import load_agent
 from utils import *
+from textworld_utils.utils import generate_text_obs
 from models import *
 import subprocess
 import os
     return "\n".join(new_lines)
+# def generate_text_obs(obs, info):
+#
+#     text_observation = obs_to_text(info)
+#
+#     llm_prompt = "Obs : "
+#     llm_prompt += "".join(text_observation)
+#
+#     # add utterances
+#     if obs["utterance_history"] != "Conversation: \n":
+#         utt_hist = obs['utterance_history']
+#         utt_hist = utt_hist.replace("Conversation: \n","")
+#         llm_prompt += utt_hist
+#
+#     return llm_prompt
+# def obs_to_text(info):
+#     image, vis_mask = info["image"], info["vis_mask"]
+#     carrying = info["carrying"]
+#     agent_pos_vx, agent_pos_vy = info["agent_pos_vx"], info["agent_pos_vy"]
+#     npc_actions_dict = info["npc_actions_dict"]
+#
+#     # (OBJECT_TO_IDX[self.type], COLOR_TO_IDX[self.color], state)
+#     # State, 0: open, 1: closed, 2: locked
+#     IDX_TO_COLOR = dict(zip(COLOR_TO_IDX.values(), COLOR_TO_IDX.keys()))
+#     IDX_TO_OBJECT = dict(zip(OBJECT_TO_IDX.values(), OBJECT_TO_IDX.keys()))
+#
+#     list_textual_descriptions = []
+#
+#     if carrying is not None:
+#         list_textual_descriptions.append("You carry a {} {}".format(carrying.color, carrying.type))
+#
+#     # agent_pos_vx, agent_pos_vy = self.get_view_coords(self.agent_pos[0], self.agent_pos[1])
+#
+#     view_field_dictionary = dict()
+#
+#     for i in range(image.shape[0]):
+#         for j in range(image.shape[1]):
+#             if image[i][j][0] != 0 and image[i][j][0] != 1 and image[i][j][0] != 2:
+#                 if i not in view_field_dictionary.keys():
+#                     view_field_dictionary[i] = dict()
+#                     view_field_dictionary[i][j] = image[i][j]
+#                 else:
+#                     view_field_dictionary[i][j] = image[i][j]
+#
+#     # Find the wall if any
+#     #  We describe a wall only if there is no objects between the agent and the wall in straight line
+#
+#     # Find wall in front
+#     add_wall_descr = False
+#     if add_wall_descr:
+#         j = agent_pos_vy - 1
+#         object_seen = False
+#         while j >= 0 and not object_seen:
+#             if image[agent_pos_vx][j][0] != 0 and image[agent_pos_vx][j][0] != 1:
+#                 if image[agent_pos_vx][j][0] == 2:
+#                     list_textual_descriptions.append(
+#                         f"A wall is {agent_pos_vy - j} steps in front of you. \n")  # forward
+#                     object_seen = True
+#                 else:
+#                     object_seen = True
+#             j -= 1
+#         # Find wall left
+#         i = agent_pos_vx - 1
+#         object_seen = False
+#         while i >= 0 and not object_seen:
+#             if image[i][agent_pos_vy][0] != 0 and image[i][agent_pos_vy][0] != 1:
+#                 if image[i][agent_pos_vy][0] == 2:
+#                     list_textual_descriptions.append(
+#                         f"A wall is {agent_pos_vx - i} steps to the left. \n")  # left
+#                     object_seen = True
+#                 else:
+#                     object_seen = True
+#             i -= 1
+#         # Find wall right
+#         i = agent_pos_vx + 1
+#         object_seen = False
+#         while i < image.shape[0] and not object_seen:
+#             if image[i][agent_pos_vy][0] != 0 and image[i][agent_pos_vy][0] != 1:
+#                 if image[i][agent_pos_vy][0] == 2:
+#                     list_textual_descriptions.append(
+#                         f"A wall is {i - agent_pos_vx} steps to the right. \n")  # right
+#                     object_seen = True
+#                 else:
+#                     object_seen = True
+#             i += 1
+#
+#     # list_textual_descriptions.append("You see the following objects: ")
+#     # returns the position of seen objects relative to you
+#     for i in view_field_dictionary.keys():
+#         for j in view_field_dictionary[i].keys():
+#             if i != agent_pos_vx or j != agent_pos_vy:
+#                 object = view_field_dictionary[i][j]
+#
+#                 # # don't show npc
+#                 # if IDX_TO_OBJECT[object[0]] == "npc":
+#                 #     continue
+#
+#                 front_dist = agent_pos_vy - j
+#                 left_right_dist = i - agent_pos_vx
+#
+#                 loc_descr = ""
+#                 if front_dist == 1 and left_right_dist == 0:
+#                     loc_descr += "Right in front of you "
+#
+#                 elif left_right_dist == 1 and front_dist == 0:
+#                     loc_descr += "Just to the right of you"
+#
+#                 elif left_right_dist == -1 and front_dist == 0:
+#                     loc_descr += "Just to the left of you"
+#
+#                 else:
+#                     front_str = str(front_dist) + " steps in front of you " if front_dist > 0 else ""
+#
+#                     loc_descr += front_str
+#
+#                     suff = "s" if abs(left_right_dist) > 0 else ""
+#                     and_ = "and" if loc_descr != "" else ""
+#
+#                     if left_right_dist < 0:
+#                         left_right_str = f"{and_} {-left_right_dist} step{suff} to the left"
+#                         loc_descr += left_right_str
+#
+#                     elif left_right_dist > 0:
+#                         left_right_str = f"{and_} {left_right_dist} step{suff} to the right"
+#                         loc_descr += left_right_str
+#
+#                     else:
+#                         left_right_str = ""
+#                         loc_descr += left_right_str
+#
+#                 loc_descr += f" there is a "
+#
+#                 obj_type = IDX_TO_OBJECT[object[0]]
+#                 if obj_type == "npc":
+#                     IDX_TO_STATE = {0: 'friendly', 1: 'antagonistic'}
+#
+#                     description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} peer. "
+#
+#                     # gaze
+#                     gaze_dir = {
+#                         0: "towards you",
+#                         1: "to the left of you",
+#                         2: "in the same direction as you",
+#                         3: "to the right of you",
+#                     }
+#                     description += f"It is looking {gaze_dir[object[3]]}. "
+#
+#                     # point
+#                     point_dir = {
+#                         0: "towards you",
+#                         1: "to the left of you",
+#                         2: "in the same direction as you",
+#                         3: "to the right of you",
+#                     }
+#
+#                     if object[4] != 255:
+#                         description += f"It is pointing {point_dir[object[4]]}. "
+#
+#                     # last action
+#                     last_action = {v: k for k, v in npc_actions_dict.items()}[object[5]]
+#
+#                     last_action = {
+#                         "go_forward": "foward",
+#                         "rotate_left": "turn left",
+#                         "rotate_right": "turn right",
+#                         "toggle_action": "toggle",
+#                         "point_stop_point": "stop pointing",
+#                         "point_E": "",
+#                         "point_S": "",
+#                         "point_W": "",
+#                         "point_N": "",
+#                         "stop_point": "stop pointing",
+#                         "no_op": ""
+#                     }[last_action]
+#
+#                     if last_action not in ["no_op", ""]:
+#                         description += f"It's last action is {last_action}. "
+#
+#                 elif obj_type in ["switch", "apple", "generatorplatform", "marble", "marbletee", "fence"]:
+#                     # todo: this assumes that Switch.no_light == True
+#                     description = f"{IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
+#                     assert object[2:].mean() == 0
+#
+#                 elif obj_type == "lockablebox":
+#                     IDX_TO_STATE = {0: 'open', 1: 'closed', 2: 'locked'}
+#                     description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
+#                     assert object[3:].mean() == 0
+#
+#                 elif obj_type == "applegenerator":
+#                     IDX_TO_STATE = {1: 'square', 2: 'round'}
+#                     description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
+#                     assert object[3:].mean() == 0
+#
+#                 elif obj_type == "remotedoor":
+#                     IDX_TO_STATE = {0: 'open', 1: 'closed'}
+#                     description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
+#                     assert object[3:].mean() == 0
+#
+#                 elif obj_type == "door":
+#                     IDX_TO_STATE = {0: 'open', 1: 'closed', 2: 'locked'}
+#                     description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
+#                     assert object[3:].mean() == 0
+#
+#                 elif obj_type == "lever":
+#                     IDX_TO_STATE = {1: 'activated', 0: 'unactivated'}
+#                     if object[3] == 255:
+#                         countdown_txt = ""
+#                     else:
+#                         countdown_txt = f"with {object[3]} timesteps left. "
+#
+#                     description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} {countdown_txt}"
+#
+#                     assert object[4:].mean() == 0
+#                 else:
+#                     raise ValueError(f"Undefined object type {obj_type}")
+#
+#                 full_destr = loc_descr + description + "\n"
+#
+#                 list_textual_descriptions.append(full_destr)
+#
+#     if len(list_textual_descriptions) == 0:
+#         list_textual_descriptions.append("\n")
+#
+#     return list_textual_descriptions
 def plt_2_rgb(env):
     # data = np.frombuffer(env.window.fig.canvas.tostring_rgb(), dtype=np.uint8)
     print("-------------------------- IN CONTEXT EXAMPLES --------------------------")
     print(in_context_examples)
     print("-------------------------------------------------------------------------")
     return in_context_examples

textworld_utils/__init__.py ADDED Viewed

	@@ -0,0 +1 @@


1	+ from .utils import *

textworld_utils/utils.py ADDED Viewed

	@@ -0,0 +1,226 @@

+from gym_minigrid.minigrid import COLOR_TO_IDX, OBJECT_TO_IDX
+def generate_text_obs(obs, info):
+    text_observation = obs_to_text(info)
+    llm_prompt = "Obs : "
+    llm_prompt += "".join(text_observation)
+    # add utterances
+    if obs["utterance_history"] != "Conversation: \n":
+        utt_hist = obs['utterance_history']
+        utt_hist = utt_hist.replace("Conversation: \n","")
+        llm_prompt += utt_hist
+    return llm_prompt
+def obs_to_text(info):
+    image, vis_mask = info["image"], info["vis_mask"]
+    carrying = info["carrying"]
+    agent_pos_vx, agent_pos_vy = info["agent_pos_vx"], info["agent_pos_vy"]
+    npc_actions_dict = info["npc_actions_dict"]
+    # (OBJECT_TO_IDX[self.type], COLOR_TO_IDX[self.color], state)
+    # State, 0: open, 1: closed, 2: locked
+    IDX_TO_COLOR = dict(zip(COLOR_TO_IDX.values(), COLOR_TO_IDX.keys()))
+    IDX_TO_OBJECT = dict(zip(OBJECT_TO_IDX.values(), OBJECT_TO_IDX.keys()))
+    list_textual_descriptions = []
+    if carrying is not None:
+        list_textual_descriptions.append("You carry a {} {}".format(carrying.color, carrying.type))
+    # agent_pos_vx, agent_pos_vy = self.get_view_coords(self.agent_pos[0], self.agent_pos[1])
+    view_field_dictionary = dict()
+    for i in range(image.shape[0]):
+        for j in range(image.shape[1]):
+            if image[i][j][0] != 0 and image[i][j][0] != 1 and image[i][j][0] != 2:
+                if i not in view_field_dictionary.keys():
+                    view_field_dictionary[i] = dict()
+                    view_field_dictionary[i][j] = image[i][j]
+                else:
+                    view_field_dictionary[i][j] = image[i][j]
+    # Find the wall if any
+    #  We describe a wall only if there is no objects between the agent and the wall in straight line
+    # Find wall in front
+    add_wall_descr = False
+    if add_wall_descr:
+        j = agent_pos_vy - 1
+        object_seen = False
+        while j >= 0 and not object_seen:
+            if image[agent_pos_vx][j][0] != 0 and image[agent_pos_vx][j][0] != 1:
+                if image[agent_pos_vx][j][0] == 2:
+                    list_textual_descriptions.append(
+                        f"A wall is {agent_pos_vy - j} steps in front of you. \n")  # forward
+                    object_seen = True
+                else:
+                    object_seen = True
+            j -= 1
+        # Find wall left
+        i = agent_pos_vx - 1
+        object_seen = False
+        while i >= 0 and not object_seen:
+            if image[i][agent_pos_vy][0] != 0 and image[i][agent_pos_vy][0] != 1:
+                if image[i][agent_pos_vy][0] == 2:
+                    list_textual_descriptions.append(
+                        f"A wall is {agent_pos_vx - i} steps to the left. \n")  # left
+                    object_seen = True
+                else:
+                    object_seen = True
+            i -= 1
+        # Find wall right
+        i = agent_pos_vx + 1
+        object_seen = False
+        while i < image.shape[0] and not object_seen:
+            if image[i][agent_pos_vy][0] != 0 and image[i][agent_pos_vy][0] != 1:
+                if image[i][agent_pos_vy][0] == 2:
+                    list_textual_descriptions.append(
+                        f"A wall is {i - agent_pos_vx} steps to the right. \n")  # right
+                    object_seen = True
+                else:
+                    object_seen = True
+            i += 1
+    # list_textual_descriptions.append("You see the following objects: ")
+    # returns the position of seen objects relative to you
+    for i in view_field_dictionary.keys():
+        for j in view_field_dictionary[i].keys():
+            if i != agent_pos_vx or j != agent_pos_vy:
+                object = view_field_dictionary[i][j]
+                # # don't show npc
+                # if IDX_TO_OBJECT[object[0]] == "npc":
+                #     continue
+                front_dist = agent_pos_vy - j
+                left_right_dist = i - agent_pos_vx
+                loc_descr = ""
+                if front_dist == 1 and left_right_dist == 0:
+                    loc_descr += "Right in front of you "
+                elif left_right_dist == 1 and front_dist == 0:
+                    loc_descr += "Just to the right of you"
+                elif left_right_dist == -1 and front_dist == 0:
+                    loc_descr += "Just to the left of you"
+                else:
+                    front_str = str(front_dist) + " steps in front of you " if front_dist > 0 else ""
+                    loc_descr += front_str
+                    suff = "s" if abs(left_right_dist) > 0 else ""
+                    and_ = "and" if loc_descr != "" else ""
+                    if left_right_dist < 0:
+                        left_right_str = f"{and_} {-left_right_dist} step{suff} to the left"
+                        loc_descr += left_right_str
+                    elif left_right_dist > 0:
+                        left_right_str = f"{and_} {left_right_dist} step{suff} to the right"
+                        loc_descr += left_right_str
+                    else:
+                        left_right_str = ""
+                        loc_descr += left_right_str
+                loc_descr += f" there is a "
+                obj_type = IDX_TO_OBJECT[object[0]]
+                if obj_type == "npc":
+                    IDX_TO_STATE = {0: 'friendly', 1: 'antagonistic'}
+                    description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} peer. "
+                    # gaze
+                    gaze_dir = {
+                        0: "towards you",
+                        1: "to the left of you",
+                        2: "in the same direction as you",
+                        3: "to the right of you",
+                    }
+                    description += f"It is looking {gaze_dir[object[3]]}. "
+                    # point
+                    point_dir = {
+                        0: "towards you",
+                        1: "to the left of you",
+                        2: "in the same direction as you",
+                        3: "to the right of you",
+                    }
+                    if object[4] != 255:
+                        description += f"It is pointing {point_dir[object[4]]}. "
+                    # last action
+                    last_action = {v: k for k, v in npc_actions_dict.items()}[object[5]]
+                    last_action = {
+                        "go_forward": "foward",
+                        "rotate_left": "turn left",
+                        "rotate_right": "turn right",
+                        "toggle_action": "toggle",
+                        "point_stop_point": "stop pointing",
+                        "point_E": "",
+                        "point_S": "",
+                        "point_W": "",
+                        "point_N": "",
+                        "stop_point": "stop pointing",
+                        "no_op": ""
+                    }[last_action]
+                    if last_action not in ["no_op", ""]:
+                        description += f"It's last action is {last_action}. "
+                elif obj_type in ["switch", "apple", "generatorplatform", "marble", "marbletee", "fence"]:
+                    # todo: this assumes that Switch.no_light == True
+                    description = f"{IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
+                    assert object[2:].mean() == 0
+                elif obj_type == "lockablebox":
+                    IDX_TO_STATE = {0: 'open', 1: 'closed', 2: 'locked'}
+                    description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
+                    assert object[3:].mean() == 0
+                elif obj_type == "applegenerator":
+                    IDX_TO_STATE = {1: 'square', 2: 'round'}
+                    description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
+                    assert object[3:].mean() == 0
+                elif obj_type == "remotedoor":
+                    IDX_TO_STATE = {0: 'open', 1: 'closed'}
+                    description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
+                    assert object[3:].mean() == 0
+                elif obj_type == "door":
+                    IDX_TO_STATE = {0: 'open', 1: 'closed', 2: 'locked'}
+                    description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
+                    assert object[3:].mean() == 0
+                elif obj_type == "lever":
+                    IDX_TO_STATE = {1: 'activated', 0: 'unactivated'}
+                    if object[3] == 255:
+                        countdown_txt = ""
+                    else:
+                        countdown_txt = f"with {object[3]} timesteps left. "
+                    description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} {countdown_txt}"
+                    assert object[4:].mean() == 0
+                else:
+                    raise ValueError(f"Undefined object type {obj_type}")
+                full_destr = loc_descr + description + "\n"
+                list_textual_descriptions.append(full_destr)
+    if len(list_textual_descriptions) == 0:
+        list_textual_descriptions.append("\n")
+    return list_textual_descriptions