Spaces:

flowers-team
/

SocialAISchool

Sleeping

App Files Files Community

SocialAISchool / gym-minigrid /gym_minigrid /backup_envs /cointhief.py

grg

Cleaned old git history

be5548b about 1 year ago

raw

history blame contribute delete

14.1 kB

	from gym_minigrid.minigrid import *
	from gym_minigrid.register import register
	import time
	from collections import deque


	class Thief(NPC):
	"""
	A dancing NPC that the agent has to copy
	NPC executes a sequence of movement and utterances
	"""

	def __init__(self, color, name, env, thief_pos, hidden_npc=False, tag_visible_coins=False, view_size=5, look_around=True):
	super().__init__(color)
	self.name = name
	self.npc_type = 0
	self.env = env
	self.cur_pos = thief_pos
	self.init_pos = thief_pos
	self.view_size = view_size
	self.npc_dir = self._look_at_agent() # Thief stares at its victim
	self.init_dir = self.npc_dir
	self.actions = self.env.possible_actions
	self.tag_visible_coins = tag_visible_coins

	self.nb_given_coins = None
	self.look_around = look_around
	if self.look_around: # randomly select in which direction NPC is looking around
	if np.random.random() > 0.5: # will look left
	self.look = self.rotate_left
	self.look_back = self.rotate_right
	else: # will look right
	self.look = self.rotate_right
	self.look_back = self.rotate_left

	self.nb_seen_coins = self._count_coins() # This is how much coins Thief wants, at least
	self.add_npc_direction = True
	self.nb_steps = 0
	self.hidden_npc = hidden_npc

	def step(self, agent_action, agent_utterance):
	agent_disobeyed = False
	agent_gave_coins = False
	utterance = None

	if self.nb_steps == 0:
	utterance = "Freeze! Give me all the coins you have!"

	if self.nb_steps >= 0 and self.look_around:
	if self.npc_dir == self.init_dir: # start to look around
	self.look()
	else: # resume looking to agent
	self.look_back()

	if not(agent_utterance is None):
	self.nb_given_coins = int(agent_utterance[-2])

	if self.nb_given_coins >= self.nb_seen_coins:
	agent_gave_coins = True
	else: # agent gave not enough coins
	agent_disobeyed = True

	# The thief forbids the agent to move, apart from looking around (rotating)
	if not (np.isnan(agent_action) or agent_action == self.actions.left or agent_action == self.actions.right):
	agent_disobeyed = True

	self.nb_steps += 1
	return agent_disobeyed, agent_gave_coins, utterance

	def get_status_str(self):
	return "thief sees: {} \n agent gives: {}".format(self.nb_seen_coins, self.nb_given_coins)

	def _count_coins(self):
	# get seen coins
	coins_pos = self.get_pos_visible_coins()

	if self.look_around:
	self.look()
	# add coins visible from this new direction
	coins_pos += self.get_pos_visible_coins()
	# remove coins that we already saw
	if len(coins_pos) > 0:
	coins_pos = np.unique(coins_pos, axis=0).tolist()
	self.look_back()

	return len(coins_pos)

	def _look_at_agent(self):
	npc_dir = None
	ax, ay = self.env.agent_pos
	tx, ty = self.cur_pos
	delta_x, delta_y = ax - tx, ay - ty
	if delta_x == 1:
	npc_dir = 0
	elif delta_x == -1:
	npc_dir = 2
	elif delta_y == 1:
	npc_dir = 1
	elif delta_y == -1:
	npc_dir = 3
	else:
	raise NotImplementedError

	return npc_dir

	def gen_npc_obs_grid(self):
	"""
	Generate the sub-grid observed by the npc.
	This method also outputs a visibility mask telling us which grid
	cells the npc can actually see.
	"""
	view_size = self.view_size

	topX, topY, botX, botY = self.env.get_view_exts(dir=self.npc_dir, view_size=view_size, pos=self.cur_pos)

	grid = self.env.grid.slice(topX, topY, view_size, view_size)

	for i in range(self.npc_dir + 1):
	grid = grid.rotate_left()

	# Process occluders and visibility
	# Note that this incurs some performance cost
	if not self.env.see_through_walls:
	vis_mask = grid.process_vis(agent_pos=(view_size // 2, view_size - 1))
	else:
	vis_mask = np.ones(shape=(grid.width, grid.height), dtype=np.bool)

	# Make it so the agent sees what it's carrying
	# We do this by placing the carried object at the agent's position
	# in the agent's partially observable view
	# agent_pos = grid.width // 2, grid.height - 1
	# if self.carrying:
	# grid.set(*agent_pos, self.carrying)
	# else:
	# grid.set(*agent_pos, None)

	return grid, vis_mask

	def get_pos_visible_coins(self):
	"""
	Generate the npc's view (partially observable, low-resolution encoding)
	return the list of unique visible coins
	"""

	grid, vis_mask = self.gen_npc_obs_grid()

	coins_pos = []

	for obj in grid.grid:
	if isinstance(obj, Ball):
	coins_pos.append(obj.cur_pos)
	if self.tag_visible_coins:
	obj.tag()

	return coins_pos

	def can_overlap(self):
	# If the NPC is hidden, agent can overlap on it
	return self.hidden_npc


	class CoinThiefGrammar(object):

	templates = ["Here is"]
	things = ["0","1","2","3","4","5","6"]

	grammar_action_space = spaces.MultiDiscrete([len(templates), len(things)])

	@classmethod
	def construct_utterance(cls, action):
	return cls.templates[int(action[0])] + " " + cls.things[int(action[1])] + " "

	@classmethod
	def random_utterance(cls):
	return np.random.choice(cls.templates) + " " + np.random.choice(cls.things) + " "


	class ThiefActions(IntEnum):
	# Turn left, turn right, move forward
	left = 0
	right = 1
	forward = 2


	class CoinThiefEnv(MultiModalMiniGridEnv):
	"""
	Environment in which the agent is instructed to go to a given object
	named using an English text string
	"""

	def __init__(
	self,
	size=5,
	hear_yourself=False,
	diminished_reward=True,
	step_penalty=False,
	hidden_npc=False,
	max_steps=20,
	full_obs=False,
	few_actions=False,
	tag_visible_coins=False,
	nb_coins=6,
	npc_view_size=5,
	npc_look_around=True

	):
	assert size >= 5
	self.empty_symbol = "NA \n"
	self.hear_yourself = hear_yourself
	self.diminished_reward = diminished_reward
	self.step_penalty = step_penalty
	self.hidden_npc = hidden_npc
	self.few_actions = few_actions
	self.possible_actions = ThiefActions if self.few_actions else MiniGridEnv.Actions
	self.nb_coins = nb_coins
	self.tag_visible_coins = tag_visible_coins
	self.npc_view_size = npc_view_size
	self.npc_look_around = npc_look_around
	if max_steps is None:
	max_steps = 5size*2

	super().__init__(
	grid_size=size,
	max_steps=max_steps,
	# Set this to True for maximum speed
	see_through_walls=True,
	full_obs=full_obs,
	actions=MiniGridEnv.Actions,
	action_space=spaces.MultiDiscrete([
	len(self.possible_actions),
	*CoinThiefGrammar.grammar_action_space.nvec
	]),
	add_npc_direction=True
	)

	print({
	"size": size,
	"hear_yourself": hear_yourself,
	"diminished_reward": diminished_reward,
	"step_penalty": step_penalty,
	})

	def _gen_grid(self, width, height):
	# Create the grid
	self.grid = Grid(width, height, nb_obj_dims=4)

	# Randomly vary the room width and height
	# width = self._rand_int(5, width+1)
	# height = self._rand_int(5, height+1)

	# Generate the surrounding walls
	self.grid.wall_rect(0, 0, width, height)

	# Generate the surrounding walls
	self.grid.wall_rect(0, 0, width, height)

	# Randomize the agent's start position and orientation
	self.place_agent(size=(width, height))

	# Get possible near-agent positions, and place thief in one of them
	ax, ay = self.agent_pos
	near_agent_pos = [[ax, ay + 1], [ax, ay - 1], [ax - 1, ay], [ax + 1, ay]]
	# get empty cells positions
	available_pos = []
	for p in near_agent_pos:
	if self.grid.get(*p) is None:
	available_pos.append(p)
	thief_pos = self._rand_elem(available_pos)

	# Add randomly placed coins
	# Types and colors of objects we can generate
	types = ['ball']
	objs = []
	objPos = []

	# Until we have generated all the objects
	while len(objs) < self.nb_coins:
	objType = self._rand_elem(types)
	objColor = 'yellow'

	if objType == 'ball':
	obj = Ball(objColor)
	else:
	raise NotImplementedError

	pos = self.place_obj(obj, reject_fn=lambda env,pos: pos.tolist() == thief_pos)
	objs.append((objType, objColor))
	objPos.append(pos)

	# Set a randomly coloured Thief NPC next to the agent
	color = self._rand_elem(COLOR_NAMES)

	self.thief = Thief(color, "Eve", self, thief_pos,
	hidden_npc=self.hidden_npc,
	tag_visible_coins=self.tag_visible_coins,
	view_size=self.npc_view_size,
	look_around=self.npc_look_around)

	self.grid.set(*thief_pos, self.thief)

	# Generate the mission string
	self.mission = 'save as much coins as possible'

	# Dummy beginning string
	self.beginning_string = "This is what you hear. \n"
	self.utterance = self.beginning_string

	# utterance appended at the end of each step
	self.utterance_history = ""

	# used for rendering
	self.conversation = self.utterance
	self.outcome_info = None

	def step(self, action):
	p_action = action[0] if np.isnan(action[0]) else int(action[0])
	if len(action) == 1: # agent cannot speak
	utterance_action = [np.nan, np.nan]
	else:
	utterance_action = action[1:]

	obs, reward, done, info = super().step(p_action)

	# assert all nan or neither nan
	assert len(set(np.isnan(utterance_action))) == 1
	speak_flag = not all(np.isnan(utterance_action))

	if speak_flag:
	utterance = CoinThiefGrammar.construct_utterance(utterance_action)
	self.conversation += "{}: {} \n".format("Agent", utterance)

	# Don't let the agent open any doors
	if not self.few_actions and p_action == self.actions.toggle:
	done = True

	if not self.few_actions and p_action == self.actions.done:
	done = True

	# npc's turn
	agent_disobeyed, agent_gave_coins, npc_utterance = self.thief.step(p_action, utterance if speak_flag else None)

	if self.hidden_npc:
	npc_utterance = None

	if npc_utterance:
	self.utterance += "{} \n".format(npc_utterance)
	self.conversation += "{}: {} \n".format(self.thief.name, npc_utterance)

	if agent_disobeyed:
	done = True

	if agent_gave_coins:
	done = True
	if self.thief.nb_seen_coins == self.thief.nb_given_coins:
	reward = self._reward()
	self.outcome_info = "SUCCESS: agent got {} reward \n".format(np.round(reward,1))

	if done and reward == 0:
	self.outcome_info = "FAILURE: agent got {} reward \n".format(reward)

	# discount
	if self.step_penalty:
	reward = reward - 0.01

	if self.hidden_npc:
	# remove npc from agent view
	npc_obs_idx = np.argwhere(obs['image'] == 11)
	if npc_obs_idx.size != 0: # agent sees npc
	obs['image'][npc_obs_idx[0][0], npc_obs_idx[0][1], :] = [1, 0, 0, 0]

	# fill observation with text
	self.append_existing_utterance_to_history()
	obs = self.add_utterance_to_observation(obs)
	self.reset_utterance()

	return obs, reward, done, info

	def _reward(self):
	if self.diminished_reward:
	return super()._reward()
	else:
	return 1.0

	def render(self, args, *kwargs):
	obs = super().render(args, *kwargs)

	print("conversation:\n", self.conversation)
	print("utterance_history:\n", self.utterance_history)

	self.window.clear_text() # erase previous text

	self.window.set_caption(self.conversation) # overwrites super class caption
	self.window.ax.set_title(self.thief.get_status_str(), loc="left")
	if self.outcome_info:
	color = None
	if "SUCCESS" in self.outcome_info:
	color = "lime"
	elif "FAILURE" in self.outcome_info:
	color = "red"
	self.window.add_text(*(0.01, 0.85, self.outcome_info),
	**{'fontsize':15, 'color':color, 'weight':"bold"})

	self.window.show_img(obs) # re-draw image to add changes to window

	return obs


	class CoinThief8x8Env(CoinThiefEnv):
	def __init__(self, **kwargs):
	super().__init__(size=8, **kwargs)


	class CoinThief6x6Env(CoinThiefEnv):
	def __init__(self, **kwargs):
	super().__init__(size=6, **kwargs)


	register(
	id='MiniGrid-CoinThief-5x5-v0',
	entry_point='gym_minigrid.envs:CoinThiefEnv'
	)

	register(
	id='MiniGrid-CoinThief-6x6-v0',
	entry_point='gym_minigrid.envs:CoinThief6x6Env'
	)

	register(
	id='MiniGrid-CoinThief-8x8-v0',
	entry_point='gym_minigrid.envs:CoinThief8x8Env'
	)