Spaces:

flowers-team
/

SocialAISchool

Sleeping

App Files Files Community

SocialAISchool / gym-minigrid /gym_minigrid /minigrid.py

grg

Moving LLM obs to text in textworld utils, bugfixes.

11bd154 9 months ago

raw

history blame

No virus

110 kB

	import math
	import random
	import hashlib
	import gym
	from enum import IntEnum
	import numpy as np
	from gym import error, spaces, utils
	from gym.utils import seeding
	from .rendering import *
	from abc import ABC, abstractmethod
	import warnings
	import astar

	import traceback
	import warnings
	from functools import wraps

	SocialAINPCActionsDict = {
	"go_forward": 0,
	"rotate_left": 1,
	"rotate_right": 2,
	"toggle_action": 3,
	"point_stop_point": 4,
	"point_E": 5,
	"point_S": 6,
	"point_W": 7,
	"point_N": 8,
	"stop_point": 9,
	"no_op": 10
	}

	point_dir_encoding = {
	"point_E": 0,
	"point_S": 1,
	"point_W": 2,
	"point_N": 3,
	}

	def get_traceback():
	tb = traceback.extract_stack()
	return "".join(traceback.format_list(tb)[:-1])


	# Size in pixels of a tile in the full-scale human view
	TILE_PIXELS = 32

	# Map of color names to RGB values
	COLORS = {
	'red' : np.array([255, 0, 0]),
	'green' : np.array([0, 255, 0]),
	'blue' : np.array([0, 0, 255]),
	'purple': np.array([112, 39, 195]),
	'yellow': np.array([255, 255, 0]),
	'grey' : np.array([100, 100, 100]),
	'brown': np.array([82, 36, 19])
	}

	COLOR_NAMES = sorted(list(COLORS.keys()))

	# Used to map colors to integers
	COLOR_TO_IDX = {
	'red' : 0,
	'green' : 1,
	'blue' : 2,
	'purple': 3,
	'yellow': 4,
	'grey' : 5,
	'brown' : 6,
	}

	IDX_TO_COLOR = dict(zip(COLOR_TO_IDX.values(), COLOR_TO_IDX.keys()))

	# Map of object type to integers
	OBJECT_TO_IDX = {
	'unseen' : 0,
	'empty' : 1,
	'wall' : 2,
	'floor' : 3,
	'door' : 4,
	'key' : 5,
	'ball' : 6,
	'box' : 7,
	'goal' : 8,
	'lava' : 9,
	'agent' : 10,
	'npc' : 11,
	'switch' : 12,
	'lockablebox' : 13,
	'apple' : 14,
	'applegenerator' : 15,
	'generatorplatform': 16,
	'marble' : 17,
	'marbletee' : 18,
	'fence' : 19,
	'remotedoor' : 20,
	'lever' : 21,
	}

	IDX_TO_OBJECT = dict(zip(OBJECT_TO_IDX.values(), OBJECT_TO_IDX.keys()))

	# Map of state names to integers
	STATE_TO_IDX = {
	'open' : 0,
	'closed': 1,
	'locked': 2,
	}

	# Map of agent direction indices to vectors
	DIR_TO_VEC = [
	# Pointing right (positive X)
	np.array((1, 0)),
	# Down (positive Y)
	np.array((0, 1)),
	# Pointing left (negative X)
	np.array((-1, 0)),
	# Up (negative Y)
	np.array((0, -1)),
	]

	class WorldObj:
	"""
	Base class for grid world objects
	"""

	def __init__(self, type, color):
	assert type in OBJECT_TO_IDX, type
	assert color in COLOR_TO_IDX, color
	self.type = type
	self.color = color
	self.contains = None

	# Initial position of the object
	self.init_pos = None

	# Current position of the object
	self.cur_pos = np.array((0, 0))

	def can_overlap(self):
	"""Can the agent overlap with this?"""
	return False

	def can_push(self):
	"""Can the agent push the object?"""
	return False

	def can_pickup(self):
	"""Can the agent pick this up?"""
	return False

	def can_contain(self):
	"""Can this contain another object?"""
	return False

	def see_behind(self):
	"""Can the agent see behind this object?"""
	return True

	def toggle(self, env, pos):
	"""Method to trigger/toggle an action this object performs"""
	return False

	def encode(self, nb_dims=3, absolute_coordinates=False):
	"""Encode the a description of this object as a nb_dims-tuple of integers"""
	if absolute_coordinates:
	core = (OBJECT_TO_IDX[self.type], *self.cur_pos, COLOR_TO_IDX[self.color])
	else:
	core = (OBJECT_TO_IDX[self.type], COLOR_TO_IDX[self.color])

	return core + (0,) * (nb_dims - len(core))

	def cache(self, args, *kwargs):
	"""Used for cached rendering."""
	return self.encode(args, *kwargs)

	@staticmethod
	def decode(type_idx, color_idx, state):
	"""Create an object from a 3-tuple state description"""

	obj_type = IDX_TO_OBJECT[type_idx]
	color = IDX_TO_COLOR[color_idx]

	if obj_type == 'empty' or obj_type == 'unseen':
	return None

	if obj_type == 'wall':
	v = Wall(color)
	elif obj_type == 'floor':
	v = Floor(color)
	elif obj_type == 'ball':
	v = Ball(color)
	elif obj_type == 'marble':
	v = Marble(color)
	elif obj_type == 'apple':
	eaten = state == 1
	v = Apple(color, eaten=eaten)
	elif obj_type == 'apple_generator':
	is_pressed = state == 2
	v = AppleGenerator(color, is_pressed=is_pressed)
	elif obj_type == 'key':
	v = Key(color)
	elif obj_type == 'box':
	v = Box(color)
	elif obj_type == 'lockablebox':
	is_locked = state == 2
	v = LockableBox(color, is_locked=is_locked)
	elif obj_type == 'door':
	# State, 0: open, 1: closed, 2: locked
	is_open = state == 0
	is_locked = state == 2
	v = Door(color, is_open, is_locked)
	elif obj_type == 'remotedoor':
	# State, 0: open, 1: closed
	is_open = state == 0
	v = RemoteDoor(color, is_open)
	elif obj_type == 'goal':
	v = Goal()
	elif obj_type == 'lava':
	v = Lava()
	elif obj_type == 'fence':
	v = Fence()
	elif obj_type == 'switch':
	v = Switch(color, is_on=state)
	elif obj_type == 'lever':
	v = Lever(color, is_on=state)
	elif obj_type == 'npc':
	warnings.warn("NPC's internal state cannot be decoded. Only the icon is shown.")
	v = NPC(color)
	v.npc_type=0
	else:
	assert False, "unknown object type in decode '%s'" % obj_type

	return v

	def render(self, r):
	"""Draw this object with the given renderer"""
	raise NotImplementedError


	class BlockableWorldObj(WorldObj):

	def __init__(self, type, color, block_set):
	super(BlockableWorldObj, self).__init__(type, color)
	self.block_set = block_set
	self.blocked = False


	def can_push(self):
	return True

	def push(self, args, *kwargs):
	return self.block_block_set()

	def toggle(self, args, *kwargs):
	return self.block_block_set()

	def block_block_set(self):
	"""A function that blocks the block set"""
	if not self.blocked:
	if self.block_set is not None:
	# cprint("BLOCKED!", "red")
	for e in self.block_set:
	e.block()

	return True

	else:
	return False

	def block(self):
	self.blocked = True


	class Goal(WorldObj):
	def __init__(self):
	super().__init__('goal', 'green')

	def can_overlap(self):
	return True

	def render(self, img):
	fill_coords(img, point_in_rect(0, 1, 0, 1), COLORS[self.color])


	class Floor(WorldObj):
	"""
	Colored floor tile the agent can walk over
	"""

	def __init__(self, color='blue'):
	super().__init__('floor', color)

	def can_overlap(self):
	return True

	def render(self, img):
	# Give the floor a pale color
	color = COLORS[self.color] / 2
	fill_coords(img, point_in_rect(0.031, 1, 0.031, 1), color)


	class Lava(WorldObj):
	def __init__(self):
	super().__init__('lava', 'red')

	def can_overlap(self):
	return True

	def render(self, img):
	c = (255, 128, 0)

	# Background color
	fill_coords(img, point_in_rect(0, 1, 0, 1), c)

	# Little waves
	for i in range(3):
	ylo = 0.3 + 0.2 * i
	yhi = 0.4 + 0.2 * i
	fill_coords(img, point_in_line(0.1, ylo, 0.3, yhi, r=0.03), (0,0,0))
	fill_coords(img, point_in_line(0.3, yhi, 0.5, ylo, r=0.03), (0,0,0))
	fill_coords(img, point_in_line(0.5, ylo, 0.7, yhi, r=0.03), (0,0,0))
	fill_coords(img, point_in_line(0.7, yhi, 0.9, ylo, r=0.03), (0,0,0))


	class Fence(WorldObj):
	"""
	Same as Lava but can't overlap.
	"""
	def __init__(self):
	super().__init__('fence', 'grey')

	def can_overlap(self):
	return False

	def render(self, img):
	c = COLORS[self.color]

	# ugly fence
	fill_coords(img, point_in_rect(
	0.1, 0.9, 0.5, 0.9
	# (0.15, 0.9),
	# (0.10, 0.5),
	# (0.95, 0.9),
	# (0.90, 0.5),
	# (0.10, 0.9),
	# (0.10, 0.5),
	# (0.95, 0.9),
	# (0.95, 0.5),
	), c)
	fill_coords(img, point_in_quadrangle(
	# (0.15, 0.9),
	# (0.10, 0.5),
	# (0.95, 0.9),
	# (0.90, 0.5),
	(0.10, 0.9),
	(0.10, 0.5),
	(0.95, 0.9),
	(0.95, 0.5),
	), c)
	return

	# preety fence
	fill_coords(img, point_in_quadrangle(
	(0.15, 0.3125),
	(0.15, 0.4125),
	(0.85, 0.4875),
	(0.85, 0.5875),
	), c)

	# h2
	fill_coords(img, point_in_quadrangle(
	(0.15, 0.6125),
	(0.15, 0.7125),
	(0.85, 0.7875),
	(0.85, 0.8875),
	), c)

	# vm
	fill_coords(img, point_in_quadrangle(
	(0.45, 0.2875),
	(0.45, 0.8875),
	(0.55, 0.3125),
	(0.55, 0.9125),
	), c)
	fill_coords(img, point_in_triangle(
	(0.45, 0.2875),
	(0.55, 0.3125),
	(0.5, 0.25),
	), c)

	# vl
	fill_coords(img, point_in_quadrangle(
	(0.25, 0.2375),
	(0.25, 0.8375),
	(0.35, 0.2625),
	(0.35, 0.8625),
	), c)
	# vl
	fill_coords(img, point_in_triangle(
	(0.25, 0.2375),
	(0.35, 0.2625),
	(0.3, 0.2),
	), c)


	# vr
	fill_coords(img, point_in_quadrangle(
	(0.65, 0.3375),
	(0.65, 0.9375),
	(0.75, 0.3625),
	(0.75, 0.9625),
	), c)
	fill_coords(img, point_in_triangle(
	(0.65, 0.3375),
	(0.75, 0.3625),
	(0.7, 0.3),
	), c)


	class Wall(WorldObj):
	def __init__(self, color='grey'):
	super().__init__('wall', color)

	def see_behind(self):
	return False

	def render(self, img):
	fill_coords(img, point_in_rect(0, 1, 0, 1), COLORS[self.color])


	class Lever(BlockableWorldObj):
	def __init__(self, color, object=None, is_on=False, block_set=None, active_steps=None):
	super().__init__('lever', color, block_set)
	self.is_on = is_on
	self.object = object

	self.active_steps = active_steps
	self.countdown = None # countdown timer

	self.was_activated = False

	if self.block_set is not None:
	if self.is_on:
	raise ValueError("If using a block set, a Switch must be initialized as OFF")

	def can_overlap(self):
	"""The agent can only walk over this cell when the door is open"""
	return False

	def see_behind(self):
	return True

	def step(self):
	if self.countdown is not None:
	self.countdown = self.countdown - 1

	if self.countdown <= 0:
	# if nothing is on the door, close the door and deactivate timer
	self.toggle()
	self.countdown = None

	def toggle(self, env=None, pos=None):

	if self.blocked:
	return False

	if self.was_activated and not self.is_on:
	# cannot be activated twice
	return False

	self.is_on = not self.is_on

	if self.is_on:
	if self.active_steps is not None:
	# activate countdown to shutdown
	self.countdown = self.active_steps
	self.was_activated = True

	if self.object is not None:
	# open object
	self.object.open_close()

	if self.is_on:
	self.block_block_set()

	return True

	def block(self):
	self.blocked = True

	def encode(self, nb_dims=3, absolute_coordinates=False):
	"""Encode the a description of this object as a 3-tuple of integers"""

	# State, 0: off, 1: on
	state = 1 if self.is_on else 0

	count = self.countdown if self.countdown is not None else 255

	if absolute_coordinates:
	v = (OBJECT_TO_IDX[self.type], *self.cur_pos, COLOR_TO_IDX[self.color], state, count)
	else:
	v = (OBJECT_TO_IDX[self.type], COLOR_TO_IDX[self.color], state, count)

	v += (0,) * (nb_dims-len(v))

	return v

	def render(self, img):
	c = COLORS[self.color]
	black = (0, 0, 0)

	# off_angle = -math.pi/3
	off_angle = -math.pi/2
	on_angle = -math.pi/8


	rotating_lever_shapes = []
	rotating_lever_shapes.append((point_in_rect(0.5, 0.9, 0.77, 0.83), c))
	rotating_lever_shapes.append((point_in_circle(0.9, 0.8, 0.1), c))

	rotating_lever_shapes.append((point_in_circle(0.5, 0.8, 0.08), c))

	if self.is_on:
	if self.countdown is None:
	angle = on_angle
	else:
	angle = (self.countdown/self.active_steps) * (on_angle-off_angle) + off_angle

	else:
	angle = off_angle

	fill_coords(img, point_in_circle_clip(0.5, 0.8, 0.12, theta_end=-math.pi), c)
	# fill_coords(img, point_in_circle_clip(0.5, 0.8, 0.08, theta_end=-math.pi), black)

	rotating_lever_shapes = [(rotate_fn(v, cx=0.5, cy=0.8, theta=angle), col) for v, col in rotating_lever_shapes]

	for v, col in rotating_lever_shapes:
	fill_coords(img, v, col)

	fill_coords(img, point_in_rect(0.2, 0.8, 0.78, 0.82), c)
	fill_coords(img, point_in_circle(0.5, 0.8, 0.03), (0, 0, 0))


	class RemoteDoor(BlockableWorldObj):
	"""Door that are unlocked by a lever"""
	def __init__(self, color, is_open=False, block_set=None):
	super().__init__('remotedoor', color, block_set)
	self.is_open = is_open

	def can_overlap(self):
	"""The agent can only walk over this cell when the door is open"""
	return self.is_open

	def see_behind(self):
	return self.is_open

	# def toggle(self, env, pos=None):
	# return False

	def open_close(self):
	# If the player has the right key to open the door

	self.is_open = not self.is_open
	return True

	def encode(self, nb_dims=3, absolute_coordinates=False):
	"""Encode the a description of this object as a 3-tuple of integers"""

	# State, 0: open, 1: closed
	state = 0 if self.is_open else 1

	if absolute_coordinates:
	v = (OBJECT_TO_IDX[self.type], *self.cur_pos, COLOR_TO_IDX[self.color], state)
	else:
	v = (OBJECT_TO_IDX[self.type], COLOR_TO_IDX[self.color], state)

	v += (0,) * (nb_dims-len(v))
	return v

	def block(self):
	self.blocked = True

	def render(self, img):
	c = COLORS[self.color]

	if self.is_open:
	fill_coords(img, point_in_rect(0.88, 1.00, 0.00, 1.00), c)
	fill_coords(img, point_in_rect(0.92, 0.96, 0.04, 0.96), (0,0,0))
	else:

	fill_coords(img, point_in_rect(0.00, 1.00, 0.00, 1.00), c)
	fill_coords(img, point_in_rect(0.04, 0.96, 0.04, 0.96), (0,0,0))
	fill_coords(img, point_in_rect(0.08, 0.92, 0.08, 0.92), c)
	fill_coords(img, point_in_rect(0.12, 0.88, 0.12, 0.88), (0,0,0))

	# wifi symbol
	fill_coords(img, point_in_circle_clip(cx=0.5, cy=0.8, r=0.5, theta_start=-np.pi/3, theta_end=-2*np.pi/3), c)
	fill_coords(img, point_in_circle_clip(cx=0.5, cy=0.8, r=0.45, theta_start=-np.pi/3, theta_end=-2*np.pi/3), (0,0,0))
	fill_coords(img, point_in_circle_clip(cx=0.5, cy=0.8, r=0.4, theta_start=-np.pi/3, theta_end=-2*np.pi/3), c)
	fill_coords(img, point_in_circle_clip(cx=0.5, cy=0.8, r=0.35, theta_start=-np.pi/3, theta_end=-2*np.pi/3), (0,0,0))
	fill_coords(img, point_in_circle_clip(cx=0.5, cy=0.8, r=0.3, theta_start=-np.pi/3, theta_end=-2*np.pi/3), c)
	fill_coords(img, point_in_circle_clip(cx=0.5, cy=0.8, r=0.25, theta_start=-np.pi/3, theta_end=-2*np.pi/3), (0,0,0))
	fill_coords(img, point_in_circle_clip(cx=0.5, cy=0.8, r=0.2, theta_start=-np.pi/3, theta_end=-2*np.pi/3), c)
	fill_coords(img, point_in_circle_clip(cx=0.5, cy=0.8, r=0.15, theta_start=-np.pi/3, theta_end=-2*np.pi/3), (0,0,0))
	fill_coords(img, point_in_circle_clip(cx=0.5, cy=0.8, r=0.1, theta_start=-np.pi/3, theta_end=-2*np.pi/3), c)

	return


	class Door(BlockableWorldObj):
	def __init__(self, color, is_open=False, is_locked=False, block_set=None):
	super().__init__('door', color, block_set)
	self.is_open = is_open
	self.is_locked = is_locked

	def can_overlap(self):
	"""The agent can only walk over this cell when the door is open"""
	return self.is_open

	def see_behind(self):
	return self.is_open

	def toggle(self, env, pos=None):

	if self.blocked:
	return False

	# If the player has the right key to open the door
	if self.is_locked:
	if isinstance(env.carrying, Key) and env.carrying.color == self.color:
	self.is_locked = False
	self.is_open = True
	ret = True
	ret = False

	else:
	self.is_open = not self.is_open
	ret = True

	self.block_block_set()

	return ret


	def encode(self, nb_dims=3, absolute_coordinates=False):
	"""Encode the a description of this object as a 3-tuple of integers"""

	# State, 0: open, 1: closed, 2: locked
	if self.is_open:
	state = 0
	elif self.is_locked:
	state = 2
	elif not self.is_open:
	state = 1

	if absolute_coordinates:
	v = (OBJECT_TO_IDX[self.type], *self.cur_pos, COLOR_TO_IDX[self.color], state)
	else:
	v = (OBJECT_TO_IDX[self.type], COLOR_TO_IDX[self.color], state)

	v += (0,) * (nb_dims-len(v))
	return v

	def render(self, img):
	c = COLORS[self.color]

	if self.is_open:
	fill_coords(img, point_in_rect(0.88, 1.00, 0.00, 1.00), c)
	fill_coords(img, point_in_rect(0.92, 0.96, 0.04, 0.96), (0,0,0))
	return

	# Door frame and door
	if self.is_locked:
	fill_coords(img, point_in_rect(0.00, 1.00, 0.00, 1.00), c)
	fill_coords(img, point_in_rect(0.06, 0.94, 0.06, 0.94), 0.45 * np.array(c))

	# Draw key slot
	fill_coords(img, point_in_rect(0.52, 0.75, 0.50, 0.56), c)
	else:
	fill_coords(img, point_in_rect(0.00, 1.00, 0.00, 1.00), c)
	fill_coords(img, point_in_rect(0.04, 0.96, 0.04, 0.96), (0,0,0))
	fill_coords(img, point_in_rect(0.08, 0.92, 0.08, 0.92), c)
	fill_coords(img, point_in_rect(0.12, 0.88, 0.12, 0.88), (0,0,0))

	# Draw door handle
	fill_coords(img, point_in_circle(cx=0.75, cy=0.50, r=0.08), c)


	class Switch(BlockableWorldObj):
	def __init__(self, color, lockable_object=None, is_on=False, no_turn_off=True, no_light=True, locker_switch=False, block_set=None):
	super().__init__('switch', color, block_set)
	self.is_on = is_on
	self.lockable_object = lockable_object
	self.no_turn_off = no_turn_off
	self.no_light = no_light
	self.locker_switch = locker_switch

	if self.block_set is not None:

	if self.is_on:
	raise ValueError("If using a block set, a Switch must be initialized as OFF")

	if not self.no_turn_off:
	raise ValueError("If using a block set, a Switch must be initialized can't be turned off")


	def can_overlap(self):
	"""The agent can only walk over this cell when the door is open"""
	return False

	def see_behind(self):
	return True

	def toggle(self, env, pos=None):

	if self.blocked:
	return False

	if self.is_on:
	if self.no_turn_off:
	return False

	self.is_on = not self.is_on
	if self.lockable_object is not None:
	if self.locker_switch:
	# locker/unlocker switch
	self.lockable_object.is_locked = not self.lockable_object.is_locked
	else:
	# opener switch
	self.lockable_object.toggle(env, pos)


	if self.is_on:
	self.block_block_set()

	if self.no_turn_off:
	# assert that obj is toggled only once
	assert not hasattr(self, "was_toggled")
	self.was_toggled = True

	return True

	def block(self):
	self.blocked = True

	def encode(self, nb_dims=3, absolute_coordinates=False):
	"""Encode the a description of this object as a 3-tuple of integers"""

	# State, 0: off, 1: on
	state = 1 if self.is_on else 0

	if self.no_light:
	state = 0

	if absolute_coordinates:
	v = (OBJECT_TO_IDX[self.type], *self.cur_pos, COLOR_TO_IDX[self.color], state)
	else:
	v = (OBJECT_TO_IDX[self.type], COLOR_TO_IDX[self.color], state)

	v += (0,) * (nb_dims-len(v))

	return v


	def render(self, img):
	c = COLORS[self.color]

	# Door frame and door
	if self.is_on and not self.no_light:
	fill_coords(img, point_in_rect(0.00, 1.00, 0.00, 1.00), c)
	fill_coords(img, point_in_rect(0.04, 0.96, 0.04, 0.96), (0,0,0))
	fill_coords(img, point_in_rect(0.08, 0.92, 0.08, 0.92), c)
	fill_coords(img, point_in_rect(0.12, 0.88, 0.12, 0.88), 0.45 * np.array(c))

	else:

	fill_coords(img, point_in_rect(0.00, 1.00, 0.00, 1.00), c)
	fill_coords(img, point_in_rect(0.04, 0.96, 0.04, 0.96), (0,0,0))
	fill_coords(img, point_in_rect(0.08, 0.92, 0.08, 0.92), c)
	fill_coords(img, point_in_rect(0.12, 0.88, 0.12, 0.88), (0,0,0))


	class Key(WorldObj):
	def __init__(self, color='blue'):
	super(Key, self).__init__('key', color)

	def can_pickup(self):
	return True

	def render(self, img):
	c = COLORS[self.color]

	# Vertical quad
	fill_coords(img, point_in_rect(0.50, 0.63, 0.31, 0.88), c)

	# Teeth
	fill_coords(img, point_in_rect(0.38, 0.50, 0.59, 0.66), c)
	fill_coords(img, point_in_rect(0.38, 0.50, 0.81, 0.88), c)

	# Ring
	fill_coords(img, point_in_circle(cx=0.56, cy=0.28, r=0.190), c)
	fill_coords(img, point_in_circle(cx=0.56, cy=0.28, r=0.064), (0,0,0))


	class MarbleTee(WorldObj):
	def __init__(self, color="red"):
	super(MarbleTee, self).__init__('marbletee', color)

	def can_pickup(self):
	return False

	def can_push(self):
	return False

	def render(self, img):
	c = COLORS[self.color]

	fill_coords(img, point_in_quadrangle(
	(0.2, 0.5),
	(0.8, 0.5),
	(0.4, 0.6),
	(0.6, 0.6),
	), c)

	fill_coords(img, point_in_triangle(
	(0.4, 0.6),
	(0.6, 0.6),
	(0.5, 0.9),
	), c)


	class Marble(WorldObj):
	def __init__(self, color='blue', env=None):
	super(Marble, self).__init__('marble', color)
	self.is_tagged = False
	self.moving_dir = None
	self.env = env
	self.was_pushed = False
	self.tee = MarbleTee(color)
	self.tee_uncovered = False

	def can_pickup(self):
	return True

	def step(self):
	if self.moving_dir is not None:
	prev_pos = self.cur_pos
	self.go_forward()

	if any(prev_pos != self.cur_pos) and not self.tee_uncovered:
	assert self.was_pushed

	# if Marble was moved for the first time, uncover the Tee
	# self.env.grid.set(*prev_pos, self.tee)
	self.env.put_obj_np(self.tee, prev_pos)
	self.tee_uncovered = True

	@property
	def is_moving(self):
	return self.moving_dir is not None

	@property
	def dir_vec(self):
	"""
	Get the direction vector for the agent, pointing in the direction
	of forward movement.
	"""
	if self.moving_dir is not None:
	return DIR_TO_VEC[self.moving_dir]
	else:
	return np.array((0, 0))

	@property
	def front_pos(self):
	"""
	Get the position of the cell that is right in front of the agent
	"""
	return self.cur_pos + self.dir_vec

	def go_forward(self):
	# Get the position in front of the agent
	fwd_pos = self.front_pos

	# Get the contents of the cell in front of the agent
	fwd_cell = self.env.grid.get(*fwd_pos)
	# Don't move if you are going to collide
	if fwd_pos.tolist() != self.env.agent_pos.tolist() and (fwd_cell is None or fwd_cell.can_overlap()):
	self.env.grid.set(*self.cur_pos, None)
	self.env.grid.set(*fwd_pos, self)
	self.cur_pos = fwd_pos
	return True

	# push object if pushable
	if fwd_pos.tolist() != self.env.agent_pos.tolist() and (fwd_cell is not None and fwd_cell.can_push()):
	fwd_cell.push(push_dir=self.moving_dir, pusher=self)
	self.moving_dir = None
	return True

	else:
	self.moving_dir = None
	return False

	def can_push(self):
	return True

	def push(self, push_dir, pusher=None):
	if type(push_dir) is not int:
	raise ValueError("Direction must be of type int and is of type {}".format(type(push_dir)))

	self.moving_dir = push_dir
	if self.moving_dir is not None:
	self.was_pushed = True

	def render(self, img):
	color = COLORS[self.color]
	if self.is_tagged:
	color = color / 2

	fill_coords(img, point_in_circle(0.5, 0.5, 0.20), color)
	fill_coords(img, point_in_circle(0.55, 0.45, 0.07), (0, 0, 0))

	def tag(self,):
	self.is_tagged = True

	def encode(self, nb_dims=3, absolute_coordinates=False):
	"""Encode the a description of this object as a nb_dims-tuple of integers"""
	if absolute_coordinates:
	core = (OBJECT_TO_IDX[self.type], *self.cur_pos, COLOR_TO_IDX[self.color])
	else:
	core = (OBJECT_TO_IDX[self.type], COLOR_TO_IDX[self.color])

	return core + (1 if self.is_tagged else 0,) * (nb_dims - len(core))


	class Ball(WorldObj):
	def __init__(self, color='blue'):
	super(Ball, self).__init__('ball', color)
	self.is_tagged = False

	def can_pickup(self):
	return True

	def render(self, img):
	color = COLORS[self.color]
	if self.is_tagged:
	color = color / 2
	fill_coords(img, point_in_circle(0.5, 0.5, 0.31), color)

	def tag(self,):
	self.is_tagged = True

	def encode(self, nb_dims=3, absolute_coordinates=False):
	"""Encode the a description of this object as a nb_dims-tuple of integers"""
	if absolute_coordinates:
	core = (OBJECT_TO_IDX[self.type], *self.cur_pos, COLOR_TO_IDX[self.color])
	else:
	core = (OBJECT_TO_IDX[self.type], COLOR_TO_IDX[self.color])

	return core + (1 if self.is_tagged else 0,) * (nb_dims - len(core))


	class Apple(WorldObj):
	def __init__(self, color='red', eaten=False):
	super(Apple, self).__init__('apple', color)
	self.is_tagged = False
	self.eaten = eaten
	assert self.color != "yellow"

	def revert(self, color='red', eaten=False):
	self.color = color
	self.is_tagged = False
	self.eaten = eaten
	assert self.color != "yellow"

	def can_pickup(self):
	return False

	def render(self, img):
	color = COLORS[self.color]

	if self.is_tagged:
	color = color / 2

	fill_coords(img, point_in_circle(0.5, 0.5, 0.31), color)
	fill_coords(img, point_in_rect(0.1, 0.9, 0.1, 0.55), (0, 0, 0))
	fill_coords(img, point_in_circle(0.35, 0.5, 0.15), color)
	fill_coords(img, point_in_circle(0.65, 0.5, 0.15), color)

	fill_coords(img, point_in_rect(0.48, 0.52, 0.2, 0.45), COLORS["brown"])

	# quadrangle
	fill_coords(img, point_in_quadrangle(
	(0.52, 0.25),
	(0.65, 0.1),
	(0.75, 0.3),
	(0.90, 0.15),
	), COLORS["green"])


	if self.eaten:
	assert self.color == "yellow"
	fill_coords(img, point_in_circle(0.74, 0.6, 0.23), (0,0,0))
	fill_coords(img, point_in_circle(0.26, 0.6, 0.23), (0,0,0))

	def toggle(self, env, pos):
	if not self.eaten:
	self.eaten = True

	assert self.color != "yellow"
	self.color = "yellow"

	return True

	else:
	assert self.color == "yellow"
	return False

	def tag(self,):
	self.is_tagged = True

	def encode(self, nb_dims=3, absolute_coordinates=False):
	"""Encode the a description of this object as a nb_dims-tuple of integers"""

	# eaten <=> yellow
	assert self.eaten == (self.color == "yellow")
	if absolute_coordinates:
	core = (OBJECT_TO_IDX[self.type], *self.cur_pos, COLOR_TO_IDX[self.color])
	else:
	core = (OBJECT_TO_IDX[self.type], COLOR_TO_IDX[self.color])

	return core + (1 if self.is_tagged else 0,) * (nb_dims - len(core))


	class GeneratorPlatform(WorldObj):
	def __init__(self, color="red"):
	super(GeneratorPlatform, self).__init__('generatorplatform', color)

	def can_pickup(self):
	return False

	def can_push(self):
	return False

	def render(self, img):
	c = COLORS[self.color]

	fill_coords(img, point_in_circle(0.5, 0.5, 0.2), c)
	fill_coords(img, point_in_circle(0.5, 0.5, 0.18), (0,0,0))

	fill_coords(img, point_in_circle(0.5, 0.5, 0.16), c)
	fill_coords(img, point_in_circle(0.5, 0.5, 0.14), (0,0,0))


	class AppleGenerator(BlockableWorldObj):
	def __init__(self, color="red", is_pressed=False, block_set=None, on_push=None, marble_activation=False):
	super(AppleGenerator, self).__init__('applegenerator', color, block_set)
	self.is_pressed = is_pressed
	self.on_push = on_push
	self.marble_activation = marble_activation

	def can_pickup(self):
	return False

	def block(self):
	self.blocked = True

	def can_push(self):
	return True

	def push(self, push_dir=None, pusher=None):

	if self.marble_activation:
	# check that it is marble that pushed the generator
	if type(pusher) != Marble:
	return self.block_block_set()

	if not self.blocked:
	self.is_pressed = True

	if self.on_push:
	self.on_push()

	return self.block_block_set()

	else:
	return False

	def render(self, img):
	c = COLORS[self.color]

	if not self.marble_activation:
	# Outline
	fill_coords(img, point_in_rect(0.15, 0.85, 0.15, 0.85), c)
	# fill_coords(img, point_in_rect(0.17, 0.83, 0.17, 0.83), (0, 0, 0))
	fill_coords(img, point_in_rect(0.16, 0.84, 0.16, 0.84), (0, 0, 0))

	# Outline
	fill_coords(img, point_in_rect(0.22, 0.78, 0.22, 0.78), c)
	fill_coords(img, point_in_rect(0.24, 0.76, 0.24, 0.76), (0, 0, 0))
	else:
	# Outline
	fill_coords(img, point_in_circle(0.5, 0.5, 0.37), c)
	fill_coords(img, point_in_circle(0.5, 0.5, 0.35), (0, 0, 0))

	# Outline
	fill_coords(img, point_in_circle(0.5, 0.5, 0.32), c)
	fill_coords(img, point_in_circle(0.5, 0.5, 0.30), (0, 0, 0))

	# Apple inside
	fill_coords(img, point_in_circle(0.5, 0.5, 0.2), COLORS["red"])
	# fill_coords(img, point_in_rect(0.18, 0.82, 0.18, 0.55), (0, 0, 0))
	fill_coords(img, point_in_rect(0.30, 0.65, 0.30, 0.55), (0, 0, 0))
	fill_coords(img, point_in_circle(0.42, 0.5, 0.12), COLORS["red"])
	fill_coords(img, point_in_circle(0.58, 0.5, 0.12), COLORS["red"])

	# peteljka
	fill_coords(img, point_in_rect(0.49, 0.50, 0.25, 0.45), COLORS["brown"])

	# leaf
	fill_coords(img, point_in_quadrangle(
	(0.52, 0.32),
	(0.60, 0.21),
	(0.70, 0.34),
	(0.80, 0.23),
	), COLORS["green"])

	def encode(self, nb_dims=3, absolute_coordinates=False):
	"""Encode the a description of this object as a 3-tuple of integers"""

	type = 2 if self.marble_activation else 1

	if absolute_coordinates:
	v = (OBJECT_TO_IDX[self.type], *self.cur_pos, COLOR_TO_IDX[self.color], type)
	else:
	v = (OBJECT_TO_IDX[self.type], COLOR_TO_IDX[self.color], type)

	v += (0,) * (nb_dims - len(v))

	return v


	class Box(WorldObj):
	def __init__(self, color="red", contains=None):
	super(Box, self).__init__('box', color)
	self.contains = contains

	def can_pickup(self):
	return True

	def render(self, img):
	c = COLORS[self.color]

	# Outline
	fill_coords(img, point_in_rect(0.12, 0.88, 0.12, 0.88), c)
	fill_coords(img, point_in_rect(0.18, 0.82, 0.18, 0.82), (0,0,0))

	# Horizontal slit
	fill_coords(img, point_in_rect(0.16, 0.84, 0.47, 0.53), c)

	def toggle(self, env, pos):
	# Replace the box by its contents
	env.grid.set(*pos, self.contains)
	return True


	class LockableBox(BlockableWorldObj):
	def __init__(self, color="red", is_locked=False, contains=None, block_set=None):
	super(LockableBox, self).__init__('lockablebox', color, block_set)
	self.contains = contains
	self.is_locked = is_locked

	self.is_open = False

	def can_pickup(self):
	return True

	def encode(self, nb_dims=3, absolute_coordinates=False):
	"""Encode the a description of this object as a 3-tuple of integers"""

	# State, 0: open, 1: closed, 2: locked
	# 2 and 1 to be consistent with doors
	if self.is_locked:
	state = 2
	else:
	state = 1

	if absolute_coordinates:
	v = (OBJECT_TO_IDX[self.type], *self.cur_pos, COLOR_TO_IDX[self.color], state)
	else:
	v = (OBJECT_TO_IDX[self.type], COLOR_TO_IDX[self.color], state)

	v += (0,) * (nb_dims - len(v))

	return v

	def render(self, img):
	c = COLORS[self.color]

	# Outline
	fill_coords(img, point_in_rect(0.12, 0.88, 0.12, 0.88), c)

	if self.is_locked:
	fill_coords(img, point_in_rect(0.18, 0.82, 0.18, 0.82), 0.45 * np.array(c))
	else:
	fill_coords(img, point_in_rect(0.18, 0.82, 0.18, 0.82), (0, 0, 0))

	# Horizontal slit
	fill_coords(img, point_in_rect(0.16, 0.84, 0.47, 0.53), c)

	def toggle(self, env, pos):
	if self.blocked:
	return False

	if self.is_locked:
	if isinstance(env.carrying, Key) and env.carrying.color == self.color:
	self.is_locked = False
	self.is_open = True
	return True
	return False

	self.is_open = True
	# Replace the box by its contents
	env.grid.set(*pos, self.contains)

	self.block_block_set()

	# assert that obj is toggled only once
	assert not hasattr(self, "was_toggled")
	self.was_toggled = True

	return True

	def block(self):
	self.blocked = True


	class NPC(ABC, WorldObj):
	def __init__(self, color, view_size=7):
	super().__init__('npc', color)
	self.point_dir = 255 # initially no point
	self.introduction_statement = "Help please "
	self.list_of_possible_utterances = NPC.get_list_of_possible_utterances()
	self.view_size = view_size
	self.carrying = False
	self.prim_actions_dict = SocialAINPCActionsDict

	self.reset_last_action()

	@staticmethod
	def get_list_of_possible_utterances():
	return ["no_op"]

	def _npc_action(func):
	"""
	Decorator that logs the last action
	"""
	@wraps(func)
	def func_wrapper(self, args, *kwargs):

	if self.env.add_npc_last_prim_action:
	self.last_action = func.__name__

	return func(self, args, *kwargs)

	return func_wrapper

	def reset_last_action(self):
	self.last_action = "no_op"

	def step(self):
	self.reset_last_action()

	if self.env.hidden_npc:
	info = {
	"prim_action": "no_op",
	"utterance": "no_op",
	"was_introduced_to": self.was_introduced_to
	}
	return None, info

	else:
	return None, None

	def handle_introduction(self, utterance):
	reply, action = None, None
	# introduction and language
	if self.env.parameters.get("Pragmatic_frame_complexity", "No") == "No":

	# only once
	if not self.was_introduced_to:
	self.was_introduced_to = True

	elif self.env.parameters["Pragmatic_frame_complexity"] == "Eye_contact":

	# only first time at eye contact
	if self.is_eye_contact() and not self.was_introduced_to:
	self.was_introduced_to = True

	# if not self.was_introduced_to:
	# rotate to see the agent
	# action = self.look_at_action(self.env.agent_pos)

	elif self.env.parameters["Pragmatic_frame_complexity"] == "Ask":

	# every time asked
	if utterance == self.introduction_statement:
	self.was_introduced_to = True

	elif self.env.parameters["Pragmatic_frame_complexity"] == "Ask_Eye_contact":

	# only first time at eye contact with the introduction statement
	if (self.is_eye_contact() and utterance == self.introduction_statement) and not self.was_introduced_to:
	self.was_introduced_to = True

	# if not self.was_introduced_to:
	# # rotate to see the agent
	# action = self.look_at_action(self.env.agent_pos)

	else:
	raise NotImplementedError()

	return reply, action

	def look_at_action(self, target_pos):
	# rotate to see the target_pos
	wanted_dir = self.compute_wanted_dir(target_pos)
	action = self.compute_turn_action(wanted_dir)
	return action

	@_npc_action
	def rotate_left(self):
	self.npc_dir -= 1
	if self.npc_dir < 0:
	self.npc_dir += 4
	return True

	@_npc_action
	def rotate_right(self):
	self.npc_dir = (self.npc_dir + 1) % 4
	return True

	def path_to_toggle_pos(self, goal_pos):
	"""
	Return the next action from the path to toggling an object at goal_pos
	"""
	if type(goal_pos) != np.ndarray or goal_pos.shape != (2,):
	raise ValueError(f"goal_pos must be a np.ndarray of shape (2,) and is {goal_pos}")

	assert type(self.front_pos) == np.ndarray and self.front_pos.shape == (2,)

	if all(self.front_pos == goal_pos):
	# in front of door
	return self.toggle_action

	else:
	return self.path_to_pos(goal_pos)

	def turn_to_see_agent(self):
	wanted_dir = self.compute_wanted_dir(self.env.agent_pos)
	action = self.compute_turn_action(wanted_dir)
	return action

	def relative_coords(self, x, y):
	"""
	Check if a grid position belongs to the npc's field of view, and returns the corresponding coordinates
	"""

	vx, vy = self.get_view_coords(x, y)

	if vx < 0 or vy < 0 or vx >= self.view_size or vy >= self.view_size:
	return None

	return vx, vy


	def get_view_coords(self, i, j):
	"""
	Translate and rotate absolute grid coordinates (i, j) into the
	npc's partially observable view (sub-grid). Note that the resulting
	coordinates may be negative or outside of the npc's view size.
	"""

	ax, ay = self.cur_pos
	dx, dy = self.dir_vec
	rx, ry = self.right_vec

	# Compute the absolute coordinates of the top-left view corner
	sz = self.view_size
	hs = self.view_size // 2
	tx = ax + (dx * (sz-1)) - (rx * hs)
	ty = ay + (dy * (sz-1)) - (ry * hs)

	lx = i - tx
	ly = j - ty

	# Project the coordinates of the object relative to the top-left
	# corner onto the agent's own coordinate system
	vx = (rxlx + ryly)
	vy = -(dxlx + dyly)

	return vx, vy

	def is_pointing(self):
	return self.point_dir != 255

	def path_to_pos(self, goal_pos):
	"""
	Return the next action from the path to goal_pos
	"""

	if type(goal_pos) != np.ndarray or goal_pos.shape != (2,):
	raise ValueError(f"goal_pos must be a np.ndarray of shape (2,) and is {goal_pos}")

	def neighbors(n):

	n_nd = np.array(n)

	adjacent_positions = [
	n_nd + np.array([ 0, 1]),
	n_nd + np.array([ 0,-1]),
	n_nd + np.array([ 1, 0]),
	n_nd + np.array([-1, 0]),
	]
	adjacent_cells = map(lambda pos: self.env.grid.get(*pos), adjacent_positions)

	# keep the positions that don't have anything on or can_overlap
	neighbors = [
	tuple(pos) for pos, cell in
	zip(adjacent_positions, adjacent_cells) if (
	all(pos == goal_pos)
	or cell is None
	or cell.can_overlap()
	) and not all(pos == self.env.agent_pos)
	]

	for n1 in neighbors:
	yield n1

	def distance(n1, n2):
	return 1

	def cost(n, goal):
	# manhattan
	return int(np.abs(np.array(n) - np.array(goal)).sum())

	# def is_goal_reached(n, goal):
	# return all(n == goal)

	path = astar.find_path(
	# tuples because np.ndarray is not hashable
	tuple(self.cur_pos),
	tuple(goal_pos),
	neighbors_fnct=neighbors,
	heuristic_cost_estimate_fnct=cost,
	distance_between_fnct=distance,
	# is_goal_reached_fnct=is_goal_reached
	)

	if path is None:
	# no possible path
	return None

	path = list(path)
	assert all(path[0] == self.cur_pos)
	next_step = path[1]
	wanted_dir = self.compute_wanted_dir(next_step)

	if self.npc_dir == wanted_dir:
	return self.go_forward

	else:
	return self.compute_turn_action(wanted_dir)

	def gen_obs_grid(self):
	"""
	Generate the sub-grid observed by the npc.
	This method also outputs a visibility mask telling us which grid
	cells the npc can actually see.
	"""
	view_size = self.view_size

	topX, topY, botX, botY = self.env.get_view_exts(dir=self.npc_dir, view_size=view_size, pos=self.cur_pos)

	grid = self.env.grid.slice(topX, topY, view_size, view_size)

	for i in range(self.npc_dir + 1):
	grid = grid.rotate_left()

	# Process ocluders and visibility
	# Note that this incurs some performance cost
	if not self.env.see_through_walls:
	vis_mask = grid.process_vis(agent_pos=(view_size // 2, view_size - 1))
	else:
	vis_mask = np.ones(shape=(grid.width, grid.height), dtype=np.bool)

	# Make it so the npc sees what it's carrying
	# We do this by placing the carried object at the agent's position
	# in the agent's partially observable view
	npc_pos = grid.width // 2, grid.height - 1
	if self.carrying:
	grid.set(*npc_pos, self.carrying)
	else:
	grid.set(*npc_pos, None)

	return grid, vis_mask

	def is_near_agent(self):
	ax, ay = self.env.agent_pos
	wx, wy = self.cur_pos
	if (ax == wx and abs(ay - wy) == 1) or (ay == wy and abs(ax - wx) == 1):
	return True
	return False

	def is_eye_contact(self):
	"""
	Returns true if the agent and the NPC are looking at each other
	"""
	if self.cur_pos[1] == self.env.agent_pos[1]:
	# same y
	if self.cur_pos[0] > self.env.agent_pos[0]:
	return self.npc_dir == 2 and self.env.agent_dir == 0
	else:
	return self.npc_dir == 0 and self.env.agent_dir == 2

	if self.cur_pos[0] == self.env.agent_pos[0]:
	# same x
	if self.cur_pos[1] > self.env.agent_pos[1]:
	return self.npc_dir == 3 and self.env.agent_dir == 1
	else:
	return self.npc_dir == 1 and self.env.agent_dir == 3

	return False

	def compute_wanted_dir(self, target_pos):
	"""
	Computes the direction in which the NPC should look to see target pos
	"""

	distance_vec = target_pos - self.cur_pos
	angle = np.degrees(np.arctan2(*distance_vec))
	if angle < 0:
	angle += 360

	if angle < 45:
	wanted_dir = 1 # S
	elif angle < 135:
	wanted_dir = 0 # E
	elif angle < 225:
	wanted_dir = 3 # N
	elif angle < 315:
	wanted_dir = 2 # W
	elif angle < 360:
	wanted_dir = 1 # S

	return wanted_dir

	def compute_wanted_point_dir(self, target_pos):
	point_dir = self.compute_wanted_dir(target_pos)

	return point_dir

	# dir = 0 # E
	# dir = 1 # S
	# dir = 2 # W
	# dir = 3 # N
	# dir = 255 # no point

	@_npc_action
	def stop_point(self):
	self.point_dir = 255
	return True

	@_npc_action
	def point_E(self):
	self.point_dir = point_dir_encoding["point_E"]
	return True

	@_npc_action
	def point_S(self):
	self.point_dir = point_dir_encoding["point_S"]
	return True

	@_npc_action
	def point_W(self):
	self.point_dir = point_dir_encoding["point_W"]
	return True

	@_npc_action
	def point_N(self):
	self.point_dir = point_dir_encoding["point_N"]
	return True

	def compute_wanted_point_action(self, target_pos):
	point_dir = self.compute_wanted_dir(target_pos)

	if point_dir == point_dir_encoding["point_E"]:
	return self.point_E
	elif point_dir == point_dir_encoding["point_S"]:
	return self.point_S
	elif point_dir == point_dir_encoding["point_W"]:
	return self.point_W
	elif point_dir == point_dir_encoding["point_N"]:
	return self.point_N
	else:
	raise ValueError("Unknown direction {}".format(point_dir))


	def compute_turn_action(self, wanted_dir):
	"""
	Return the action turning for in the direction of wanted_dir
	"""
	if self.npc_dir == wanted_dir:
	# return lambda *args: None
	return None
	if (wanted_dir - self.npc_dir) == 1 or (wanted_dir == 0 and self.npc_dir == 3):
	return self.rotate_right
	if (wanted_dir - self.npc_dir) == - 1 or (wanted_dir == 3 and self.npc_dir == 0):
	return self.rotate_left
	else:
	return self.env._rand_elem([self.rotate_left, self.rotate_right])

	@_npc_action
	def go_forward(self):
	# Get the position in front of the agent
	fwd_pos = self.front_pos

	# Get the contents of the cell in front of the agent
	fwd_cell = self.env.grid.get(*fwd_pos)
	# Don't move if you are going to collide
	if fwd_pos.tolist() != self.env.agent_pos.tolist() and (fwd_cell is None or fwd_cell.can_overlap()):
	self.env.grid.set(*self.cur_pos, None)
	self.env.grid.set(*fwd_pos, self)
	self.cur_pos = fwd_pos
	return True

	# push object if pushable
	if fwd_pos.tolist() != self.env.agent_pos.tolist() and (fwd_cell is not None and fwd_cell.can_push()):
	fwd_cell.push(push_dir=self.npc_dir, pusher=self)

	else:
	return False

	@_npc_action
	def toggle_action(self):
	fwd_pos = self.front_pos
	fwd_cell = self.env.grid.get(*fwd_pos)
	if fwd_cell:
	return fwd_cell.toggle(self.env, fwd_pos)

	return False

	@property
	def dir_vec(self):
	"""
	Get the direction vector for the agent, pointing in the direction
	of forward movement.
	"""

	assert self.npc_dir >= 0 and self.npc_dir < 4
	return DIR_TO_VEC[self.npc_dir]

	@property
	def right_vec(self):
	"""
	Get the vector pointing to the right of the agent.
	"""

	dx, dy = self.dir_vec
	return np.array((-dy, dx))


	@property
	def front_pos(self):
	"""
	Get the position of the cell that is right in front of the agent
	"""

	return self.cur_pos + self.dir_vec

	@property
	def back_pos(self):
	"""
	Get the position of the cell that is right in front of the agent
	"""

	return self.cur_pos - self.dir_vec

	@property
	def right_pos(self):
	"""
	Get the position of the cell that is right in front of the agent
	"""

	return self.cur_pos + self.right_vec

	@property
	def left_pos(self):
	"""
	Get the position of the cell that is right in front of the agent
	"""

	return self.cur_pos - self.right_vec

	def draw_npc_face(self, c):
	assert self.npc_type == 0

	assert all(COLORS[self.color] == c)

	shapes = []
	shapes_colors = []

	# Draw eyes
	shapes.append(point_in_circle(cx=0.70, cy=0.50, r=0.10))
	shapes_colors.append(c)
	shapes.append(point_in_circle(cx=0.30, cy=0.50, r=0.10))
	shapes_colors.append(c)

	# Draw mouth
	shapes.append(point_in_rect(0.20, 0.80, 0.72, 0.81))
	shapes_colors.append(c)

	# Draw bottom hat
	shapes.append(point_in_triangle((0.15, 0.28),
	(0.85, 0.28),
	(0.50, 0.05)))
	shapes_colors.append(c)
	# Draw top hat
	shapes.append(point_in_rect(0.30, 0.70, 0.05, 0.28))
	shapes_colors.append(c)
	return shapes, shapes_colors

	def render(self, img):


	c = COLORS[self.color]

	npc_shapes = []
	npc_shapes_colors = []


	npc_face_shapes, npc_face_shapes_colors = self.draw_npc_face(c=c)

	npc_shapes.extend(npc_face_shapes)
	npc_shapes_colors.extend(npc_face_shapes_colors)

	if hasattr(self, "npc_dir"):
	# Pre-rotation to ensure npc_dir = 1 means NPC looks downwards
	npc_shapes = [rotate_fn(v, cx=0.5, cy=0.5, theta=-1*(math.pi / 2)) for v in npc_shapes]
	# Rotate npc based on its direction
	npc_shapes = [rotate_fn(v, cx=0.5, cy=0.5, theta=(math.pi/2) * self.npc_dir) for v in npc_shapes]

	if hasattr(self, "point_dir"):
	if self.is_pointing():
	# default points east
	finger = point_in_triangle((0.85, 0.1),
	(0.85, 0.3),
	(0.99, 0.2))
	finger = rotate_fn(finger, cx=0.5, cy=0.5, theta=(math.pi/2) * self.point_dir)

	npc_shapes.append(finger)
	npc_shapes_colors.append(c)

	if self.last_action == self.toggle_action.__name__:
	# T symbol
	t_symbol = [point_in_rect(0.8, 0.84, 0.02, 0.18), point_in_rect(0.8, 0.95, 0.08, 0.12)]
	t_symbol = [rotate_fn(v, cx=0.5, cy=0.5, theta=(math.pi/2) * self.npc_dir) for v in t_symbol]
	npc_shapes.extend(t_symbol)
	npc_shapes_colors.extend([c, c])

	elif self.last_action == self.go_forward.__name__:
	# symbol for Forward (ommited for speed)
	pass

	if self.env.hidden_npc:
	# crossed eye symbol
	dx, dy = 0.15, -0.2

	# draw eye
	npc_shapes.append(point_in_circle(cx=0.70+dx, cy=0.48+dy, r=0.11))
	npc_shapes_colors.append((128,128,128))

	npc_shapes.append(point_in_circle(cx=0.30+dx, cy=0.52+dy, r=0.11))
	npc_shapes_colors.append((128,128,128))

	npc_shapes.append(point_in_circle(0.5+dx, 0.5+dy, 0.25))
	npc_shapes_colors.append((128, 128, 128))

	npc_shapes.append(point_in_circle(0.5+dx, 0.5+dy, 0.20))
	npc_shapes_colors.append((0, 0, 0))

	npc_shapes.append(point_in_circle(0.5+dx, 0.5+dy, 0.1))
	npc_shapes_colors.append((128, 128, 128))

	# cross it
	npc_shapes.append(point_in_line(0.2+dx, 0.7+dy, 0.8+dx, 0.3+dy, 0.04))
	npc_shapes_colors.append((128, 128, 128))


	# Draw shapes
	for v, c in zip(npc_shapes, npc_shapes_colors):
	fill_coords(img, v, c)

	def cache(self, args, *kwargs):
	"""Used for cached rendering."""
	# adding npc_dir and point_dir because, when egocentric coordinates are used,
	# they can result in the same encoding but require new rendering
	return self.encode(args, *kwargs) + (self.npc_dir, self.point_dir,)

	def can_overlap(self):
	# If the NPC is hidden, agent can overlap on it
	return self.env.hidden_npc

	def encode(self, nb_dims=3, absolute_coordinates=False):
	if not hasattr(self, "npc_type"):
	raise ValueError("An NPC class must implement the npc_type (int)")

	if not hasattr(self, "env"):
	raise ValueError("An NPC class must have the env")

	assert nb_dims == 6+2*bool(absolute_coordinates)

	if self.env.hidden_npc:
	return (1,) + (0,) * (nb_dims-1)

	assert self.env.egocentric_observation == (not absolute_coordinates)

	if absolute_coordinates:
	v = (OBJECT_TO_IDX[self.type], *self.cur_pos, COLOR_TO_IDX[self.color], self.npc_type)
	else:
	v = (OBJECT_TO_IDX[self.type], COLOR_TO_IDX[self.color], self.npc_type)

	if self.env.add_npc_direction:
	assert hasattr(self, "npc_dir"), "4D but there is no npc dir in NPC state"
	assert self.npc_dir >= 0

	if self.env.egocentric_observation:
	assert self.env.agent_dir >= 0

	# 0 - eye contact; 2 - gaze in same direction; 1 - to left; 3 - to right
	npc_dir_enc = (self.npc_dir - self.env.agent_dir + 2) % 4

	v += (npc_dir_enc,)
	else:
	v += (self.npc_dir,)

	if self.env.add_npc_point_direction:
	assert hasattr(self, "point_dir"), "5D but there is no npc point dir in NPC state"

	if self.point_dir == 255:
	# no pointing
	v += (self.point_dir,)

	elif 0 <= self.point_dir <= 3:
	# pointing

	if self.env.egocentric_observation:
	assert self.env.agent_dir >= 0

	# 0 - pointing at agent; 2 - point in direction of agent gaze; 1 - to left; 3 - to right
	point_enc = (self.point_dir - self.env.agent_dir + 2) % 4
	v += (point_enc,)

	else:
	v += (self.point_dir,)

	else:
	raise ValueError(f"Undefined point direction {self.point_dir}")

	if self.env.add_npc_last_prim_action:
	assert hasattr(self, "last_action"), "6D but there is no last action in NPC state"

	if self.last_action in ["point_E", "point_S", "point_W", "point_N"] and self.env.egocentric_observation:

	# get the direction of the last point
	last_action_point_dir = point_dir_encoding[self.last_action]

	# convert to relative dir
	# 0 - pointing at agent; 2 - point in direction of agent gaze; 1 - to left; 3 - to right
	last_action_relative_point_dir = (last_action_point_dir - self.env.agent_dir + 2) % 4

	# the point_X action ids are in range [point_E, ... , point_N]
	# id of point_E is the starting one, we use the same range [E, S, W ,N ] -> [at, left, same, right]
	last_action_id = self.prim_actions_dict["point_E"] + last_action_relative_point_dir

	else:
	last_action_id = self.prim_actions_dict[self.last_action]

	v += (last_action_id,)

	assert self.point_dir >= 0
	assert len(v) == nb_dims

	return v


	class Grid:
	"""
	Represent a grid and operations on it
	"""

	# Static cache of pre-renderer tiles
	tile_cache = {}

	def __init__(self, width, height, nb_obj_dims):
	assert width >= 3
	assert height >= 3

	self.width = width
	self.height = height
	self.nb_obj_dims = nb_obj_dims

	self.grid = [None] * width * height

	def __contains__(self, key):
	if isinstance(key, WorldObj):
	for e in self.grid:
	if e is key:
	return True
	elif isinstance(key, tuple):
	for e in self.grid:
	if e is None:
	continue
	if (e.color, e.type) == key:
	return True
	if key[0] is None and key[1] == e.type:
	return True
	return False

	def __eq__(self, other):
	grid1 = self.encode()
	grid2 = other.encode()
	return np.array_equal(grid2, grid1)

	def __ne__(self, other):
	return not self == other

	def copy(self):
	from copy import deepcopy
	return deepcopy(self)

	def set(self, i, j, v):
	assert i >= 0 and i < self.width
	assert j >= 0 and j < self.height
	self.grid[j * self.width + i] = v

	def get(self, i, j):
	assert i >= 0 and i < self.width
	assert j >= 0 and j < self.height
	return self.grid[j * self.width + i]

	def horz_wall(self, x, y, length=None, obj_type=Wall):
	if length is None:
	length = self.width - x
	for i in range(0, length):
	o = obj_type()
	o.cur_pos = np.array((x+i, y))
	self.set(x + i, y, o)

	def vert_wall(self, x, y, length=None, obj_type=Wall):
	if length is None:
	length = self.height - y
	for j in range(0, length):
	o = obj_type()
	o.cur_pos = np.array((x, y+j))
	self.set(x, y + j, o)

	def wall_rect(self, x, y, w, h):
	self.horz_wall(x, y, w)
	self.horz_wall(x, y+h-1, w)
	self.vert_wall(x, y, h)
	self.vert_wall(x+w-1, y, h)

	def rotate_left(self):
	"""
	Rotate the grid to the left (counter-clockwise)
	"""

	grid = Grid(self.height, self.width, self.nb_obj_dims)

	for i in range(self.width):
	for j in range(self.height):
	v = self.get(i, j)
	grid.set(j, grid.height - 1 - i, v)

	return grid

	def slice(self, topX, topY, width, height):
	"""
	Get a subset of the grid
	"""

	grid = Grid(width, height, self.nb_obj_dims)

	for j in range(0, height):
	for i in range(0, width):
	x = topX + i
	y = topY + j

	if x >= 0 and x < self.width and \
	y >= 0 and y < self.height:
	v = self.get(x, y)
	else:
	v = Wall()

	grid.set(i, j, v)

	return grid

	@classmethod
	def render_tile(
	cls,
	obj,
	agent_dir=None,
	highlight=False,
	tile_size=TILE_PIXELS,
	subdivs=3,
	nb_obj_dims=3,
	mask_unobserved=False
	):
	"""
	Render a tile and cache the result
	"""
	# Hash map lookup key for the cache
	key = (agent_dir, highlight, tile_size, mask_unobserved)
	# key = obj.encode(nb_dims=nb_obj_dims) + key if obj else key
	key = obj.cache(nb_dims=nb_obj_dims) + key if obj else key

	if key in cls.tile_cache:
	return cls.tile_cache[key]

	img = np.zeros(shape=(tile_size * subdivs, tile_size * subdivs, 3), dtype=np.uint8) # 3D for rendering

	# Draw the grid lines (top and left edges)
	fill_coords(img, point_in_rect(0, 0.031, 0, 1), (100, 100, 100))
	fill_coords(img, point_in_rect(0, 1, 0, 0.031), (100, 100, 100))

	if obj != None:
	obj.render(img)

	# Overlay the agent on top
	if agent_dir is not None:
	tri_fn = point_in_triangle(
	(0.12, 0.19),
	(0.87, 0.50),
	(0.12, 0.81),
	)

	# Rotate the agent based on its direction
	tri_fn = rotate_fn(tri_fn, cx=0.5, cy=0.5, theta=0.5math.piagent_dir)
	fill_coords(img, tri_fn, (255, 0, 0))

	# Highlight the cell if needed
	if highlight:
	highlight_img(img)
	elif mask_unobserved:
	# mask unobserved and not highlighted -> unobserved by the agent
	img *= 0

	# Downsample the image to perform supersampling/anti-aliasing
	img = downsample(img, subdivs)

	# Cache the rendered tile
	cls.tile_cache[key] = img

	return img

	def render(
	self,
	tile_size,
	agent_pos=None,
	agent_dir=None,
	highlight_mask=None,
	mask_unobserved=False,
	):
	"""
	Render this grid at a given scale
	:param r: target renderer object
	:param tile_size: tile size in pixels
	"""

	if highlight_mask is None:
	highlight_mask = np.zeros(shape=(self.width, self.height), dtype=np.bool)

	# Compute the total grid size
	width_px = self.width * tile_size
	height_px = self.height * tile_size
	img = np.zeros(shape=(height_px, width_px, 3), dtype=np.uint8)

	# Render the grid
	for j in range(0, self.height):
	for i in range(0, self.width):
	cell = self.get(i, j)

	agent_here = np.array_equal(agent_pos, (i, j))
	tile_img = Grid.render_tile(
	cell,
	agent_dir=agent_dir if agent_here else None,
	highlight=highlight_mask[i, j],
	tile_size=tile_size,
	nb_obj_dims=self.nb_obj_dims,
	mask_unobserved=mask_unobserved
	)

	ymin = j * tile_size
	ymax = (j+1) * tile_size
	xmin = i * tile_size
	xmax = (i+1) * tile_size
	img[ymin:ymax, xmin:xmax, :] = tile_img

	return img

	def encode(self, vis_mask=None, absolute_coordinates=False):
	"""
	Produce a compact numpy encoding of the grid
	"""

	if vis_mask is None:
	vis_mask = np.ones((self.width, self.height), dtype=bool)

	array = np.zeros((self.width, self.height, self.nb_obj_dims), dtype='uint8')

	for i in range(self.width):
	for j in range(self.height):
	if vis_mask[i, j]:
	v = self.get(i, j)

	if v is None:
	array[i, j, 0] = OBJECT_TO_IDX['empty']
	array[i, j, 1:] = 0

	else:
	array[i, j, :] = v.encode(nb_dims=self.nb_obj_dims, absolute_coordinates=absolute_coordinates)

	return array

	@staticmethod
	def decode(array):
	"""
	Decode an array grid encoding back into a grid
	"""

	width, height, channels = array.shape
	assert channels in [5, 4, 3]

	vis_mask = np.ones(shape=(width, height), dtype=np.bool)

	grid = Grid(width, height, nb_obj_dims=channels)
	for i in range(width):
	for j in range(height):
	if len(array[i, j]) == 3:
	type_idx, color_idx, state = array[i, j]
	else:
	type_idx, color_idx, state, orient = array[i, j]

	v = WorldObj.decode(type_idx, color_idx, state)
	grid.set(i, j, v)
	vis_mask[i, j] = (type_idx != OBJECT_TO_IDX['unseen'])

	return grid, vis_mask

	def process_vis(grid, agent_pos):
	# mask = np.zeros(shape=(grid.width, grid.height), dtype=np.bool)
	#
	# mask[agent_pos[0], agent_pos[1]] = True
	#
	# for j in reversed(range(0, grid.height)):
	# for i in range(0, grid.width-1):
	# if not mask[i, j]:
	# continue
	#
	# cell = grid.get(i, j)
	# if cell and not cell.see_behind():
	# continue
	#
	# mask[i+1, j] = True
	# if j > 0:
	# mask[i+1, j-1] = True
	# mask[i, j-1] = True
	#
	# for i in reversed(range(1, grid.width)):
	# if not mask[i, j]:
	# continue
	#
	# cell = grid.get(i, j)
	# if cell and not cell.see_behind():
	# continue
	#
	# mask[i-1, j] = True
	# if j > 0:
	# mask[i-1, j-1] = True
	# mask[i, j-1] = True

	mask = np.ones(shape=(grid.width, grid.height), dtype=np.bool)
	# handle frontal occlusions

	# 45 deg
	for j in reversed(range(0, agent_pos[1]+1)):
	dy = abs(agent_pos[1] - j)

	# in front of the agent
	i = agent_pos[0]
	cell = grid.get(i, j)
	if (cell and not cell.see_behind()) or mask[i, j] == False:

	if j < agent_pos[1] and j > 0:
	# 45 deg
	mask[i-1,j-1] = False
	mask[i,j-1] = False
	mask[i+1,j-1] = False

	# agent -> to the left
	for i in reversed(range(1, agent_pos[0])):
	dx = abs(agent_pos[0] - i)
	cell = grid.get(i, j)

	if (cell and not cell.see_behind()) or mask[i,j] == False:
	# angle
	if dx >= dy:
	mask[i - 1, j] = False

	if j > 0:
	mask[i - 1, j - 1] = False
	if dy >= dx:
	mask[i, j - 1] = False

	# agent -> to the right
	for i in range(agent_pos[0]+1, grid.width-1):
	dx = abs(agent_pos[0] - i)
	cell = grid.get(i, j)

	if (cell and not cell.see_behind()) or mask[i,j] == False:
	# angle
	if dx >= dy:
	mask[i + 1, j] = False

	if j > 0:
	mask[i + 1, j - 1] = False
	if dy >= dx:
	mask[i, j - 1] = False

	# for i in range(0, grid.width):
	# cell = grid.get(i, j)
	# if (cell and not cell.see_behind()) or mask[i,j] == False:
	# mask[i, j-1] = False

	# grid
	# for j in reversed(range(0, agent_pos[1]+1)):
	#
	# i = agent_pos[0]
	# cell = grid.get(i, j)
	# if (cell and not cell.see_behind()) or mask[i, j] == False:
	# if j < agent_pos[1]:
	# # grid
	# mask[i,j-1] = False
	#
	# for i in reversed(range(1, agent_pos[0])):
	# # agent -> to the left
	# cell = grid.get(i, j)
	# if (cell and not cell.see_behind()) or mask[i,j] == False:
	# # grid
	# mask[i-1, j] = False
	# if j < agent_pos[1] and j > 0:
	# mask[i, j-1] = False
	#
	# for i in range(agent_pos[0]+1, grid.width-1):
	# # agent -> to the right
	# cell = grid.get(i, j)
	# if (cell and not cell.see_behind()) or mask[i,j] == False:
	# # grid
	# mask[i+1, j] = False
	# if j < agent_pos[1] and j > 0:
	# mask[i, j-1] = False

	for j in range(0, grid.height):
	for i in range(0, grid.width):
	if not mask[i, j]:
	grid.set(i, j, None)

	return mask


	class MiniGridEnv(gym.Env):
	"""
	2D grid world game environment
	"""

	metadata = {
	'render.modes': ['human', 'rgb_array'],
	'video.frames_per_second' : 10
	}

	# Enumeration of possible actions
	class Actions(IntEnum):
	# Turn left, turn right, move forward
	left = 0
	right = 1
	forward = 2

	# Pick up an object
	pickup = 3
	# Drop an object
	drop = 4
	# Toggle/activate an object
	toggle = 5

	# Done completing task
	done = 6

	def __init__(
	self,
	grid_size=None,
	width=None,
	height=None,
	max_steps=100,
	see_through_walls=False,
	full_obs=False,
	seed=None,
	agent_view_size=7,
	actions=None,
	action_space=None,
	add_npc_direction=False,
	add_npc_point_direction=False,
	add_npc_last_prim_action=False,
	reward_diminish_factor=0.9,
	egocentric_observation=True,
	):

	# sanity check params for SocialAI experiments
	if "SocialAI" in type(self).__name__:
	assert egocentric_observation
	assert grid_size == 10
	assert not see_through_walls
	assert max_steps == 80
	assert agent_view_size == 7
	assert not full_obs
	assert add_npc_direction and add_npc_point_direction and add_npc_last_prim_action

	self.egocentric_observation = egocentric_observation

	if hasattr(self, "lever_active_steps"):
	assert self.lever_active_steps == 10

	# Can't set both grid_size and width/height
	if grid_size:
	assert width == None and height == None
	width = grid_size
	height = grid_size

	# Action enumeration for this environment
	if actions:
	self.actions = actions
	else:
	self.actions = MiniGridEnv.Actions

	# Actions are discrete integer values
	if action_space:
	self.action_space = action_space
	else:
	self.action_space = spaces.MultiDiscrete([len(self.actions)])

	# Number of cells (width and height) in the agent view
	assert agent_view_size % 2 == 1
	assert agent_view_size >= 3
	self.agent_view_size = agent_view_size

	# Number of object dimensions (i.e. number of channels in symbolic image)
	self.add_npc_direction = add_npc_direction
	self.add_npc_point_direction = add_npc_point_direction
	self.add_npc_last_prim_action = add_npc_last_prim_action
	self.nb_obj_dims = 3 + 2*bool(not self.egocentric_observation) + int(self.add_npc_direction) + int(self.add_npc_point_direction) + int(self.add_npc_last_prim_action)

	# Observations are dictionaries containing an
	# encoding of the grid and a textual 'mission' string
	self.observation_space = spaces.Box(
	low=0,
	high=255,
	shape=(self.agent_view_size, self.agent_view_size, self.nb_obj_dims),
	dtype='uint8'
	)
	self.observation_space = spaces.Dict({
	'image': self.observation_space
	})

	# Range of possible rewards
	self.reward_range = (0, 1)

	# Window to use for human rendering mode
	self.window = None

	# Environment configuration
	self.width = width
	self.height = height
	self.max_steps = max_steps
	self.see_through_walls = see_through_walls
	self.full_obs = full_obs

	self.reward_diminish_factor = reward_diminish_factor

	# Current position and direction of the agent
	self.agent_pos = None
	self.agent_dir = None

	# Initialize the RNG
	self.seed(seed=seed)

	# Initialize the state
	self.reset()

	def reset(self):
	# Current position and direction of the agent
	self.agent_pos = None
	self.agent_dir = None

	# Generate a new random grid at the start of each episode
	# To keep the same grid for each episode, call env.seed() with
	# the same seed before calling env.reset()
	self._gen_grid(self.width, self.height)

	# These fields should be defined by _gen_grid
	assert self.agent_pos is not None
	assert self.agent_dir is not None

	# Check that the agent doesn't overlap with an object
	start_cell = self.grid.get(*self.agent_pos)
	assert start_cell is None or start_cell.can_overlap()

	# Item picked up, being carried, initially nothing
	self.carrying = None

	# Step count since episode start
	self.step_count = 0

	# Return first observation
	obs = self.gen_obs(full_obs=self.full_obs)
	return obs

	def reset_with_info(self, args, *kwargs):
	obs = self.reset(args, *kwargs)
	info = self.generate_info(done=False, reward=0)
	return obs, info

	def seed(self, seed=1337):
	# Seed the random number generator
	self.np_random, _ = seeding.np_random(seed)
	return [seed]

	def hash(self, size=16):
	"""Compute a hash that uniquely identifies the current state of the environment.
	:param size: Size of the hashing
	"""
	sample_hash = hashlib.sha256()

	to_encode = [self.grid.encode(), self.agent_pos, self.agent_dir]
	for item in to_encode:
	sample_hash.update(str(item).encode('utf8'))

	return sample_hash.hexdigest()[:size]

	@property
	def steps_remaining(self):
	return self.max_steps - self.step_count

	def is_near(self, pos1, pos2):
	ax, ay = pos1
	wx, wy = pos2
	if (ax == wx and abs(ay - wy) == 1) or (ay == wy and abs(ax - wx) == 1):
	return True
	return False

	def get_cell(self, x, y):
	return self.grid.get(x, y)

	def __str__(self):
	"""
	Produce a pretty string of the environment's grid along with the agent.
	A grid cell is represented by 2-character string, the first one for
	the object and the second one for the color.
	"""

	# Map of object types to short string
	OBJECT_TO_STR = {
	'wall' : 'W',
	'floor' : 'F',
	'door' : 'D',
	'key' : 'K',
	'ball' : 'A',
	'box' : 'B',
	'goal' : 'G',
	'lava' : 'V',
	}

	# Short string for opened door
	OPENDED_DOOR_IDS = '_'

	# Map agent's direction to short string
	AGENT_DIR_TO_STR = {
	0: '>',
	1: 'V',
	2: '<',
	3: '^'
	}

	str = ''

	for j in range(self.grid.height):

	for i in range(self.grid.width):
	if i == self.agent_pos[0] and j == self.agent_pos[1]:
	str += 2 * AGENT_DIR_TO_STR[self.agent_dir]
	continue

	c = self.grid.get(i, j)

	if c == None:
	str += ' '
	continue

	if c.type == 'door':
	if c.is_open:
	str += '__'
	elif c.is_locked:
	str += 'L' + c.color[0].upper()
	else:
	str += 'D' + c.color[0].upper()
	continue

	str += OBJECT_TO_STR[c.type] + c.color[0].upper()

	if j < self.grid.height - 1:
	str += '\n'

	return str

	def _gen_grid(self, width, height):
	assert False, "_gen_grid needs to be implemented by each environment"

	def _reward(self):
	"""
	Compute the reward to be given upon success
	"""

	return 1 - self.reward_diminish_factor * (self.step_count / self.max_steps)

	def _rand_int(self, low, high):
	"""
	Generate random integer in [low,high[
	"""
	return self.np_random.randint(low, high)

	def _rand_float(self, low, high):
	"""
	Generate random float in [low,high[
	"""

	return self.np_random.uniform(low, high)

	def _rand_bool(self):
	"""
	Generate random boolean value
	"""

	return (self.np_random.randint(0, 2) == 0)

	def _rand_elem(self, iterable):
	"""
	Pick a random element in a list
	"""

	lst = list(iterable)
	idx = self._rand_int(0, len(lst))
	return lst[idx]

	def _rand_subset(self, iterable, num_elems):
	"""
	Sample a random subset of distinct elements of a list
	"""

	lst = list(iterable)
	assert num_elems <= len(lst)

	out = []

	while len(out) < num_elems:
	elem = self._rand_elem(lst)
	lst.remove(elem)
	out.append(elem)

	return out

	def _rand_color(self):
	"""
	Generate a random color name (string)
	"""

	return self._rand_elem(COLOR_NAMES)

	def _rand_pos(self, xLow, xHigh, yLow, yHigh):
	"""
	Generate a random (x,y) position tuple
	"""

	return (
	self.np_random.randint(xLow, xHigh),
	self.np_random.randint(yLow, yHigh)
	)

	def find_loc(self,
	top=None,
	size=None,
	reject_fn=None,
	max_tries=math.inf,
	reject_agent_pos=True,
	reject_taken_pos=True
	):
	"""
	Place an object at an empty position in the grid

	:param top: top-left position of the rectangle where to place
	:param size: size of the rectangle where to place
	:param reject_fn: function to filter out potential positions
	"""

	if top is None:
	top = (0, 0)
	else:
	top = (max(top[0], 0), max(top[1], 0))

	if size is None:
	size = (self.grid.width, self.grid.height)

	num_tries = 0

	while True:
	# This is to handle with rare cases where rejection sampling
	# gets stuck in an infinite loop
	if num_tries > max_tries:
	raise RecursionError('rejection sampling failed in place_obj')
	if num_tries % 10000 == 0 and num_tries > 0:
	warnings.warn("num_tries = {}. This is probably an infinite loop. {}".format(num_tries, get_traceback()))
	# warnings.warn("num_tries = {}. This is probably an infinite loop.".format(num_tries))
	exit()
	break

	num_tries += 1

	pos = np.array((
	self._rand_int(top[0], min(top[0] + size[0], self.grid.width)),
	self._rand_int(top[1], min(top[1] + size[1], self.grid.height))
	))

	# Don't place the object on top of another object
	if reject_taken_pos:
	if self.grid.get(*pos) != None:
	continue

	# Don't place the object where the agent is
	if reject_agent_pos and np.array_equal(pos, self.agent_pos):
	continue

	# Check if there is a filtering criterion
	if reject_fn and reject_fn(self, pos):
	continue

	break

	return pos

	def place_obj(self,
	obj,
	top=None,
	size=None,
	reject_fn=None,
	max_tries=math.inf
	):
	"""
	Place an object at an empty position in the grid

	:param top: top-left position of the rectangle where to place
	:param size: size of the rectangle where to place
	:param reject_fn: function to filter out potential positions
	"""

	# if top is None:
	# top = (0, 0)
	# else:
	# top = (max(top[0], 0), max(top[1], 0))
	#
	# if size is None:
	# size = (self.grid.width, self.grid.height)
	#
	# num_tries = 0
	#
	# while True:
	# # This is to handle with rare cases where rejection sampling
	# # gets stuck in an infinite loop
	# if num_tries > max_tries:
	# raise RecursionError('rejection sampling failed in place_obj')
	#
	# num_tries += 1
	#
	# pos = np.array((
	# self._rand_int(top[0], min(top[0] + size[0], self.grid.width)),
	# self._rand_int(top[1], min(top[1] + size[1], self.grid.height))
	# ))
	#
	# # Don't place the object on top of another object
	# if self.grid.get(*pos) != None:
	# continue
	#
	# # Don't place the object where the agent is
	# if np.array_equal(pos, self.agent_pos):
	# continue
	#
	# # Check if there is a filtering criterion
	# if reject_fn and reject_fn(self, pos):
	# continue
	#
	# break
	#
	# self.grid.set(*pos, obj)
	#
	# if obj is not None:
	# obj.init_pos = pos
	# obj.cur_pos = pos
	#
	# return pos

	pos = self.find_loc(
	top=top,
	size=size,
	reject_fn=reject_fn,
	max_tries=max_tries
	)

	if obj is None:
	self.grid.set(*pos, obj)
	else:
	self.put_obj_np(obj, pos)

	return pos

	def put_obj_np(self, obj, pos):
	"""
	Put an object at a specific position in the grid
	"""

	assert isinstance(pos, np.ndarray)

	i, j = pos

	cell = self.grid.get(i, j)
	if cell is not None:
	raise ValueError("trying to put {} on {}".format(obj, cell))

	self.grid.set(i, j, obj)
	obj.init_pos = np.array((i, j))
	obj.cur_pos = np.array((i, j))

	def put_obj(self, obj, i, j):
	"""
	Put an object at a specific position in the grid
	"""
	warnings.warn(
	"This function is kept for backwards compatiblity with minigrid. It is recommended to use put_object_np()."
	)
	raise DeprecationWarning("Deprecated use put_obj_np. (or remove this Warning)")

	self.grid.set(i, j, obj)
	obj.init_pos = (i, j)
	obj.cur_pos = (i, j)

	def place_agent(
	self,
	top=None,
	size=None,
	rand_dir=True,
	max_tries=math.inf
	):
	"""
	Set the agent's starting point at an empty position in the grid
	"""

	self.agent_pos = None
	pos = self.place_obj(None, top, size, max_tries=max_tries)
	self.agent_pos = pos

	if rand_dir:
	self.agent_dir = self._rand_int(0, 4)

	return pos

	@property
	def dir_vec(self):
	"""
	Get the direction vector for the agent, pointing in the direction
	of forward movement.
	"""
	assert self.agent_dir >= 0 and self.agent_dir < 4
	return DIR_TO_VEC[self.agent_dir]

	@property
	def right_vec(self):
	"""
	Get the vector pointing to the right of the agent.
	"""

	dx, dy = self.dir_vec
	return np.array((-dy, dx))

	@property
	def front_pos(self):
	"""
	Get the position of the cell that is right in front of the agent
	"""

	return self.agent_pos + self.dir_vec

	@property
	def back_pos(self):
	"""
	Get the position of the cell that is right in front of the agent
	"""

	return self.agent_pos - self.dir_vec

	@property
	def right_pos(self):
	"""
	Get the position of the cell that is right in front of the agent
	"""

	return self.agent_pos + self.right_vec

	@property
	def left_pos(self):
	"""
	Get the position of the cell that is right in front of the agent
	"""

	return self.agent_pos - self.right_vec

	def get_view_coords(self, i, j):
	"""
	Translate and rotate absolute grid coordinates (i, j) into the
	agent's partially observable view (sub-grid). Note that the resulting
	coordinates may be negative or outside of the agent's view size.
	"""

	ax, ay = self.agent_pos
	dx, dy = self.dir_vec
	rx, ry = self.right_vec

	# Compute the absolute coordinates of the top-left view corner
	sz = self.agent_view_size
	hs = self.agent_view_size // 2
	tx = ax + (dx * (sz-1)) - (rx * hs)
	ty = ay + (dy * (sz-1)) - (ry * hs)

	lx = i - tx
	ly = j - ty

	# Project the coordinates of the object relative to the top-left
	# corner onto the agent's own coordinate system
	vx = (rxlx + ryly)
	vy = -(dxlx + dyly)

	return vx, vy

	def get_view_exts(self, dir=None, view_size=None, pos=None):
	"""
	Get the extents of the square set of tiles visible to the agent (or to an npc if specified
	Note: the bottom extent indices are not included in the set
	"""

	# by default compute view exts for agent
	if (dir is None) and (view_size is None) and (pos is None):
	dir = self.agent_dir
	view_size = self.agent_view_size
	pos = self.agent_pos

	# Facing right
	if dir == 0:
	topX = pos[0]
	topY = pos[1] - view_size // 2
	# Facing down
	elif dir == 1:
	topX = pos[0] - view_size // 2
	topY = pos[1]
	# Facing left
	elif dir == 2:
	topX = pos[0] - view_size + 1
	topY = pos[1] - view_size // 2
	# Facing up
	elif dir == 3:
	topX = pos[0] - view_size // 2
	topY = pos[1] - view_size + 1
	else:
	assert False, "invalid agent direction: {}".format(dir)

	botX = topX + view_size
	botY = topY + view_size

	return (topX, topY, botX, botY)

	def relative_coords(self, x, y):
	"""
	Check if a grid position belongs to the agent's field of view, and returns the corresponding coordinates
	"""

	vx, vy = self.get_view_coords(x, y)

	if vx < 0 or vy < 0 or vx >= self.agent_view_size or vy >= self.agent_view_size:
	return None

	return vx, vy

	def in_view(self, x, y):
	"""
	check if a grid position is visible to the agent
	"""

	return self.relative_coords(x, y) is not None

	def agent_sees(self, x, y):
	"""
	Check if a non-empty grid position is visible to the agent
	"""

	coordinates = self.relative_coords(x, y)
	if coordinates is None:
	return False
	vx, vy = coordinates
	assert not self.full_obs, "agent sees function not implemented with full_obs"
	obs = self.gen_obs()
	obs_grid, _ = Grid.decode(obs['image'])
	obs_cell = obs_grid.get(vx, vy)
	world_cell = self.grid.get(x, y)

	return obs_cell is not None and obs_cell.type == world_cell.type

	def step(self, action):
	self.step_count += 1

	reward = 0
	done = False

	# Get the position in front of the agent
	fwd_pos = self.front_pos

	# Get the contents of the cell in front of the agent
	fwd_cell = self.grid.get(*fwd_pos)

	# Rotate left
	if action == self.actions.left:
	self.agent_dir -= 1
	if self.agent_dir < 0:
	self.agent_dir += 4

	# Rotate right
	elif action == self.actions.right:
	self.agent_dir = (self.agent_dir + 1) % 4

	# Move forward
	elif action == self.actions.forward:
	if fwd_cell != None and fwd_cell.can_push():
	fwd_cell.push(push_dir=self.agent_dir, pusher="agent")

	if fwd_cell == None or fwd_cell.can_overlap():
	self.agent_pos = fwd_pos
	if fwd_cell != None and fwd_cell.type == 'goal':
	done = True
	reward = self._reward()
	if fwd_cell != None and fwd_cell.type == 'lava':
	done = True

	# Pick up an object
	elif hasattr(self.actions, "pickup") and action == self.actions.pickup:
	if fwd_cell and fwd_cell.can_pickup():
	if self.carrying is None:
	self.carrying = fwd_cell
	self.carrying.cur_pos = np.array([-1, -1])
	self.grid.set(*fwd_pos, None)

	# Drop an object
	elif hasattr(self.actions, "drop") and action == self.actions.drop:
	if not fwd_cell and self.carrying:
	self.grid.set(*fwd_pos, self.carrying)
	self.carrying.cur_pos = fwd_pos
	self.carrying = None

	# Toggle/activate an object
	elif action == self.actions.toggle:
	if fwd_cell:
	fwd_cell.toggle(self, fwd_pos)

	# Done action (not used by default)
	elif action == self.actions.done:
	pass

	elif action in map(int, self.actions):
	# action that was added in an inheriting class (ex. talk action)
	pass

	elif np.isnan(action):
	# action skip
	pass

	else:
	assert False, f"unknown action {action}"

	if self.step_count >= self.max_steps:
	done = True

	obs = self.gen_obs(full_obs=self.full_obs)

	info = self.generate_info(done, reward)

	return obs, reward, done, info

	def generate_info(self, done, reward):

	success = done and reward > 0

	info = {"success": success}

	gen_extra_info_dict = self.gen_extra_info() # add stuff needed for textual observations here

	assert not any(item in info for item in gen_extra_info_dict), "Duplicate keys found with gen_extra_info"

	info = {
	**info,
	**gen_extra_info_dict,
	}
	return info

	def gen_extra_info(self):
	grid, vis_mask = self.gen_obs_grid()
	carrying = self.carrying
	agent_pos_vx, agent_pos_vy = self.get_view_coords(self.agent_pos[0], self.agent_pos[1])
	npc_actions_dict = SocialAINPCActionsDict

	extra_info = {
	"image": grid.encode(vis_mask),
	"vis_mask": vis_mask,
	"carrying": carrying,
	"agent_pos_vx": agent_pos_vx,
	"agent_pos_vy": agent_pos_vy,
	"npc_actions_dict": npc_actions_dict
	}
	return extra_info

	def gen_obs_grid(self):
	"""
	Generate the sub-grid observed by the agent.
	This method also outputs a visibility mask telling us which grid
	cells the agent can actually see.
	"""

	topX, topY, botX, botY = self.get_view_exts()

	grid = self.grid.slice(topX, topY, self.agent_view_size, self.agent_view_size)

	for i in range(self.agent_dir + 1):
	grid = grid.rotate_left()

	# Process occluders and visibility
	# Note that this incurs some performance cost
	if not self.see_through_walls:
	vis_mask = grid.process_vis(agent_pos=(self.agent_view_size // 2, self.agent_view_size - 1))
	else:
	vis_mask = np.ones(shape=(grid.width, grid.height), dtype=np.bool)

	# Make it so the agent sees what it's carrying
	# We do this by placing the carried object at the agent's position
	# in the agent's partially observable view
	agent_pos = grid.width // 2, grid.height - 1
	if self.carrying:
	grid.set(*agent_pos, self.carrying)
	else:
	grid.set(*agent_pos, None)

	return grid, vis_mask

	def add_agent_to_grid(self, image):
	"""
	Add agent to symbolic pixel image, used only for full observation
	"""
	ax, ay = self.agent_pos
	image[ax,ay] = [9,9,9,self.agent_dir] # could be made cleaner by creating an Agent_id (here we use Lava_id)
	return image

	def gen_obs(self, full_obs=False):
	"""
	Generate the agent's view (partially observable, low-resolution encoding)
	Fully observable view can be returned when full_obs is set to True
	"""
	if full_obs:
	image = self.add_agent_to_grid(self.grid.encode())

	else:
	grid, vis_mask = self.gen_obs_grid()

	# Encode the partially observable view into a numpy array
	image = grid.encode(vis_mask, absolute_coordinates=not self.egocentric_observation)

	assert hasattr(self, 'mission'), "environments must define a textual mission string"

	# Observations are dictionaries containing:
	# - an image (partially observable view of the environment)
	# - the agent's direction/orientation (acting as a compass)
	# - a textual mission string (instructions for the agent)
	obs = {
	'image': image,
	'direction': self.agent_dir,
	'mission': self.mission
	}

	return obs

	def get_obs_render(self, obs, tile_size=TILE_PIXELS//2):
	"""
	Render an agent observation for visualization
	"""

	grid, vis_mask = Grid.decode(obs)

	# Render the whole grid
	img = grid.render(
	tile_size,
	agent_pos=(self.agent_view_size // 2, self.agent_view_size - 1),
	agent_dir=3,
	highlight_mask=vis_mask
	)

	return img

	def render(self, mode='human', close=False, highlight=True, tile_size=TILE_PIXELS, mask_unobserved=False):
	"""
	Render the whole-grid human view
	"""
	if mode == 'human' and close:
	if self.window:
	self.window.close()
	return

	if mode == 'human' and not self.window:
	import gym_minigrid.window
	self.window = gym_minigrid.window.Window('gym_minigrid')
	self.window.show(block=False)

	# Compute which cells are visible to the agent
	_, vis_mask = self.gen_obs_grid()

	# Compute the world coordinates of the bottom-left corner
	# of the agent's view area
	f_vec = self.dir_vec
	r_vec = self.right_vec
	top_left = self.agent_pos + f_vec * (self.agent_view_size-1) - r_vec * (self.agent_view_size // 2)

	# Mask of which cells to highlight
	highlight_mask = np.zeros(shape=(self.width, self.height), dtype=np.bool)

	# For each cell in the visibility mask
	for vis_j in range(0, self.agent_view_size):
	for vis_i in range(0, self.agent_view_size):
	# If this cell is not visible, don't highlight it
	if not vis_mask[vis_i, vis_j]:
	continue

	# Compute the world coordinates of this cell
	abs_i, abs_j = top_left - (f_vec * vis_j) + (r_vec * vis_i)

	if abs_i < 0 or abs_i >= self.width:
	continue
	if abs_j < 0 or abs_j >= self.height:
	continue

	# Mark this cell to be highlighted
	highlight_mask[abs_i, abs_j] = True

	# Render the whole grid
	img = self.grid.render(
	tile_size,
	self.agent_pos,
	self.agent_dir,
	highlight_mask=highlight_mask if highlight else None,
	mask_unobserved=mask_unobserved
	)
	if mode == 'human':
	# self.window.set_caption(self.mission)
	self.window.show_img(img)

	return img

	def get_mission(self):
	return self.mission

	def close(self):
	if self.window:
	self.window.close()
	return

	def gen_text_obs(self):
	grid, vis_mask = self.gen_obs_grid()

	# Encode the partially observable view into a numpy array
	image = grid.encode(vis_mask)

	# (OBJECT_TO_IDX[self.type], COLOR_TO_IDX[self.color], state)
	# State, 0: open, 1: closed, 2: locked
	IDX_TO_COLOR = dict(zip(COLOR_TO_IDX.values(), COLOR_TO_IDX.keys()))
	IDX_TO_OBJECT = dict(zip(OBJECT_TO_IDX.values(), OBJECT_TO_IDX.keys()))

	list_textual_descriptions = []

	if self.carrying is not None:
	list_textual_descriptions.append("You carry a {} {}".format(self.carrying.color, self.carrying.type))

	agent_pos_vx, agent_pos_vy = self.get_view_coords(self.agent_pos[0], self.agent_pos[1])

	view_field_dictionary = dict()

	for i in range(image.shape[0]):
	for j in range(image.shape[1]):
	if image[i][j][0] != 0 and image[i][j][0] != 1 and image[i][j][0] != 2:
	if i not in view_field_dictionary.keys():
	view_field_dictionary[i] = dict()
	view_field_dictionary[i][j] = image[i][j]
	else:
	view_field_dictionary[i][j] = image[i][j]

	# Find the wall if any
	# We describe a wall only if there is no objects between the agent and the wall in straight line

	# Find wall in front
	add_wall_descr = False
	if add_wall_descr:
	j = agent_pos_vy - 1
	object_seen = False
	while j >= 0 and not object_seen:
	if image[agent_pos_vx][j][0] != 0 and image[agent_pos_vx][j][0] != 1:
	if image[agent_pos_vx][j][0] == 2:
	list_textual_descriptions.append(
	f"A wall is {agent_pos_vy - j} steps in front of you. \n") # forward
	object_seen = True
	else:
	object_seen = True
	j -= 1
	# Find wall left
	i = agent_pos_vx - 1
	object_seen = False
	while i >= 0 and not object_seen:
	if image[i][agent_pos_vy][0] != 0 and image[i][agent_pos_vy][0] != 1:
	if image[i][agent_pos_vy][0] == 2:
	list_textual_descriptions.append(
	f"A wall is {agent_pos_vx - i} steps to the left. \n") # left
	object_seen = True
	else:
	object_seen = True
	i -= 1
	# Find wall right
	i = agent_pos_vx + 1
	object_seen = False
	while i < image.shape[0] and not object_seen:
	if image[i][agent_pos_vy][0] != 0 and image[i][agent_pos_vy][0] != 1:
	if image[i][agent_pos_vy][0] == 2:
	list_textual_descriptions.append(
	f"A wall is {i - agent_pos_vx} steps to the right. \n") # right
	object_seen = True
	else:
	object_seen = True
	i += 1

	# list_textual_descriptions.append("You see the following objects: ")
	# returns the position of seen objects relative to you
	for i in view_field_dictionary.keys():
	for j in view_field_dictionary[i].keys():
	if i != agent_pos_vx or j != agent_pos_vy:
	object = view_field_dictionary[i][j]

	front_dist = agent_pos_vy - j
	left_right_dist = i-agent_pos_vx

	loc_descr = ""
	if front_dist == 1 and left_right_dist == 0:
	loc_descr += "Right in front of you "

	elif left_right_dist == 1 and front_dist == 0:
	loc_descr += "Just to the right of you"

	elif left_right_dist == -1 and front_dist == 0:
	loc_descr += "Just to the left of you"

	else:
	front_str = str(front_dist)+" steps in front of you " if front_dist > 0 else ""

	loc_descr += front_str

	suff = "s" if abs(left_right_dist) > 0 else ""
	and_ = "and" if loc_descr != "" else ""

	if left_right_dist < 0:
	left_right_str = f"{and_} {-left_right_dist} step{suff} to the left"
	loc_descr += left_right_str

	elif left_right_dist > 0:
	left_right_str = f"{and_} {left_right_dist} step{suff} to the right"
	loc_descr += left_right_str

	else:
	left_right_str = ""
	loc_descr += left_right_str

	loc_descr += f" there is a "

	obj_type = IDX_TO_OBJECT[object[0]]
	if obj_type == "npc":
	IDX_TO_STATE = {0: 'friendly', 1: 'antagonistic'}

	description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} peer. "

	# gaze
	gaze_dir = {
	0: "towards you",
	1: "to the left of you",
	2: "in the same direction as you",
	3: "to the right of you",
	}
	description += f"It is looking {gaze_dir[object[3]]}. "

	# point
	point_dir = {
	0: "towards you",
	1: "to the left of you",
	2: "in the same direction as you",
	3: "to the right of you",
	}

	if object[4] != 255:
	description += f"It is pointing {point_dir[object[4]]}. "

	# last action
	last_action = {v: k for k, v in SocialAINPCActionsDict.items()}[object[5]]


	last_action = {
	"go_forward": "foward",
	"rotate_left": "turn left",
	"rotate_right": "turn right",
	"toggle_action": "toggle",
	"point_stop_point": "stop pointing",
	"point_E": "",
	"point_S": "",
	"point_W": "",
	"point_N": "",
	"stop_point": "stop pointing",
	"no_op": ""
	}[last_action]

	if last_action not in ["no_op", ""]:
	description += f"It's last action is {last_action}. "

	elif obj_type in ["switch", "apple", "generatorplatform", "marble", "marbletee", "fence"]:

	if obj_type == "switch":
	# assumes that Switch.no_light == True
	assert object[-1] == 0

	description = f"{IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
	assert object[2:].mean() == 0

	elif obj_type == "lockablebox":
	IDX_TO_STATE = {0: 'open', 1: 'closed', 2: 'locked'}
	description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
	assert object[3:].mean() == 0

	elif obj_type == "applegenerator":
	IDX_TO_STATE = {1: 'square', 2: 'round'}
	description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
	assert object[3:].mean() == 0

	elif obj_type == "remotedoor":
	IDX_TO_STATE = {0: 'open', 1: 'closed'}
	description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
	assert object[3:].mean() == 0

	elif obj_type == "door":
	IDX_TO_STATE = {0: 'open', 1: 'closed', 2: 'locked'}
	description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} "
	assert object[3:].mean() == 0

	elif obj_type == "lever":
	IDX_TO_STATE = {1: 'activated', 0: 'unactivated'}
	if object[3] == 255:
	countdown_txt = ""
	else:
	countdown_txt = f"with {object[3]} timesteps left. "

	description = f"{IDX_TO_STATE[object[2]]} {IDX_TO_COLOR[object[1]]} {IDX_TO_OBJECT[object[0]]} {countdown_txt}"

	assert object[4:].mean() == 0
	else:
	raise ValueError(f"Undefined object type {obj_type}")

	full_destr = loc_descr + description + "\n"

	list_textual_descriptions.append(full_destr)

	if len(list_textual_descriptions) == 0:
	list_textual_descriptions.append("\n")

	return {'descriptions': list_textual_descriptions}

	class MultiModalMiniGridEnv(MiniGridEnv):

	grammar = None

	def reset(self, args, *kwargs):
	obs = super().reset()
	self.append_existing_utterance_to_history()
	obs = self.add_utterance_to_observation(obs)
	self.reset_utterance()
	return obs

	def append_existing_utterance_to_history(self):
	if self.utterance != self.empty_symbol:
	if self.utterance.startswith(self.empty_symbol):
	self.utterance_history += self.utterance[len(self.empty_symbol):]
	else:
	assert self.utterance == self.beginning_string
	self.utterance_history += self.utterance

	def add_utterance_to_observation(self, obs):
	obs["utterance"] = self.utterance
	obs["utterance_history"] = self.utterance_history
	obs["mission"] = "Hidden"
	return obs

	def reset_utterance(self):
	# set utterance to empty indicator
	self.utterance = self.empty_symbol

	def render(self, args, show_dialogue=True, *kwargs):

	obs = super().render(args, *kwargs)

	if args and args[0] == 'human':
	# draw text to the side of the image
	self.window.clear_text() # erase previous text
	if show_dialogue:
	self.window.set_caption(self.full_conversation)

	# self.window.ax.set_title("correct color: {}".format(self.box.target_color), loc="left", fontsize=10)

	if self.outcome_info:
	color = None
	if "SUCCESS" in self.outcome_info:
	color = "lime"
	elif "FAILURE" in self.outcome_info:
	color = "red"
	self.window.add_text(*(0.01, 0.85, self.outcome_info),
	**{'fontsize': 15, 'color': color, 'weight': "bold"})

	self.window.show_img(obs) # re-draw image to add changes to window
	return obs

	def add_obstacles(self):
	self.obstacles = self.parameters.get("Obstacles", "No") if self.parameters else "No"

	if self.obstacles != "No":
	n_stumps_range = {
	"A_bit": (1, 2),
	"Medium": (3, 4),
	"A_lot": (5, 6),
	}[self.obstacles]

	n_stumps = random.randint(*n_stumps_range)

	for _ in range(n_stumps):
	self.wall_start_x = self._rand_int(1, self.current_width - 2)
	self.wall_start_y = self._rand_int(1, self.current_height - 2)
	if random.choice([True, False]):
	self.grid.horz_wall(
	x=self.wall_start_x,
	y=self.wall_start_y,
	length=1
	)
	else:
	self.grid.horz_wall(
	x=self.wall_start_x,
	y=self.wall_start_y,
	length=1
	)