Spaces:

flowers-team
/

SocialAISchool

Sleeping

App Files Files Community

SocialAISchool / gym-minigrid /gym_minigrid /backup_envs /dancewithonenpc.py

grg

Cleaned old git history

be5548b 9 months ago

raw

history blame contribute delete

No virus

12 kB

	from gym_minigrid.minigrid import *
	from gym_minigrid.register import register

	import time
	from collections import deque


	class Dancer(NPC):
	"""
	A dancing NPC that the agent has to copy
	NPC executes a sequence of movement and utterances
	"""

	def __init__(self, color, name, env, dancing_pattern=None,
	dance_len=3, p_sing=.5, hidden_npc=False, sing_only=False):
	super().__init__(color)
	self.name = name
	self.npc_dir = 1 # NPC initially looks downward
	self.npc_type = 0
	self.env = env
	self.actions = self.env.possible_actions
	self.p_sing = p_sing
	self.sing_only = sing_only
	if self.sing_only:
	p_sing = 1
	self.dancing_pattern = dancing_pattern if dancing_pattern else self._gen_dancing_pattern(dance_len, p_sing)
	self.agent_actions = deque(maxlen=len(self.dancing_pattern))
	self.movement_id_to_fun = {self.actions.left: self.rotate_left,
	self.actions.right: self.rotate_right,
	self.actions.forward: self.go_forward}
	# for vizualisation only
	self.movement_id_to_str = {self.actions.left: "left",
	self.actions.right: "right",
	self.actions.forward: "forward",
	self.actions.pickup: "pickup",
	self.actions.drop: "drop",
	self.actions.toggle: "toggle",
	self.actions.done: "done",
	None: "None"}
	self.dancing_step_idx = 0
	self.done_dancing = False
	self.add_npc_direction = True
	self.nb_steps = 0
	self.hidden_npc = hidden_npc

	def step(self, agent_action, agent_utterance):
	agent_matched_moves = False
	utterance = None

	if self.nb_steps == 0:
	utterance = "Look at me!"
	if self.nb_steps >= 2: # Wait a couple steps before dancing
	if not self.done_dancing:
	if self.dancing_step_idx == len(self.dancing_pattern):
	self.done_dancing = True
	utterance = "Now repeat my moves!"
	else:
	# NPC moves and speaks according to dance step
	move_id, utterance = self.dancing_pattern[self.dancing_step_idx]
	self.movement_id_to_fun[move_id]()

	self.dancing_step_idx += 1
	else: # record agent dancing pattern
	self.agent_actions.append((agent_action, agent_utterance))

	if not self.sing_only and list(self.agent_actions) == list(self.dancing_pattern):
	agent_matched_moves = True
	if self.sing_only: # only compare utterances
	if [x[1] for x in self.agent_actions] == [x[1] for x in self.dancing_pattern]:
	agent_matched_moves = True

	self.nb_steps += 1
	return agent_matched_moves, utterance

	def get_status_str(self):
	readable_dancing_pattern = [(self.movement_id_to_str[dp[0]], dp[1]) for dp in self.dancing_pattern]
	readable_agent_actions = [(self.movement_id_to_str[aa[0]], aa[1]) for aa in self.agent_actions]
	return "dance: {} \n agent: {}".format(readable_dancing_pattern, readable_agent_actions)

	def _gen_dancing_pattern(self, dance_len, p_sing):
	available_moves = [self.actions.left, self.actions.right, self.actions.forward]
	dance_pattern = []
	for _ in range(dance_len):
	move = self.env._rand_elem(available_moves)
	sing = None
	if np.random.random() < p_sing:
	sing = DanceWithOneNPCGrammar.random_utterance()
	dance_pattern.append((move, sing))
	return dance_pattern

	def can_overlap(self):
	# If the NPC is hidden, agent can overlap on it
	return self.hidden_npc



	class DanceWithOneNPCGrammar(object):

	templates = ["Move your", "Shake your"]
	things = ["body", "head"]

	grammar_action_space = spaces.MultiDiscrete([len(templates), len(things)])

	@classmethod
	def construct_utterance(cls, action):
	return cls.templates[int(action[0])] + " " + cls.things[int(action[1])] + " "

	@classmethod
	def random_utterance(cls):
	return np.random.choice(cls.templates) + " " + np.random.choice(cls.things) + " "



	class DanceActions(IntEnum):
	# Turn left, turn right, move forward
	left = 0
	right = 1
	forward = 2


	class DanceWithOneNPCEnv(MultiModalMiniGridEnv):
	"""
	Environment in which the agent is instructed to go to a given object
	named using an English text string
	"""

	def __init__(
	self,
	size=5,
	hear_yourself=False,
	diminished_reward=True,
	step_penalty=False,
	dance_len=3,
	hidden_npc=False,
	p_sing=.5,
	max_steps=20,
	full_obs=False,
	few_actions=False,
	sing_only=False

	):
	assert size >= 5
	self.empty_symbol = "NA \n"
	self.hear_yourself = hear_yourself
	self.diminished_reward = diminished_reward
	self.step_penalty = step_penalty
	self.dance_len = dance_len
	self.hidden_npc = hidden_npc
	self.p_sing = p_sing
	self.few_actions = few_actions
	self.possible_actions = DanceActions if self.few_actions else MiniGridEnv.Actions
	self.sing_only = sing_only
	if max_steps is None:
	max_steps = 5size*2

	super().__init__(
	grid_size=size,
	max_steps=max_steps,
	# Set this to True for maximum speed
	see_through_walls=True,
	full_obs=full_obs,
	actions=MiniGridEnv.Actions,
	action_space=spaces.MultiDiscrete([
	len(self.possible_actions),
	*DanceWithOneNPCGrammar.grammar_action_space.nvec
	]),
	add_npc_direction=True
	)

	print({
	"size": size,
	"hear_yourself": hear_yourself,
	"diminished_reward": diminished_reward,
	"step_penalty": step_penalty,
	})

	def _gen_grid(self, width, height):
	# Create the grid
	self.grid = Grid(width, height, nb_obj_dims=4)

	# Randomly vary the room width and height
	width = self._rand_int(5, width+1)
	height = self._rand_int(5, height+1)

	# Generate the surrounding walls
	self.grid.wall_rect(0, 0, width, height)

	# Generate the surrounding walls
	self.grid.wall_rect(0, 0, width, height)


	# Set a randomly coloured Dancer NPC
	color = self._rand_elem(COLOR_NAMES)
	self.dancer = Dancer(color, "Ren", self, dance_len=self.dance_len,
	p_sing=self.p_sing, hidden_npc=self.hidden_npc, sing_only=self.sing_only)

	# Place it on the middle left side of the room
	left_pos = (int((width / 2) - 1), int(height / 2))
	#right_pos = [(width / 2) + 1, height / 2]

	self.grid.set(*left_pos, self.dancer)
	self.dancer.init_pos = left_pos
	self.dancer.cur_pos = left_pos

	# Place it randomly left or right
	#self.place_obj(self.dancer,
	# size=(width, height))

	# Randomize the agent's start position and orientation
	self.place_agent(size=(width, height))

	# Generate the mission string
	self.mission = 'watch dancer and repeat his moves afterwards'

	# Dummy beginning string
	self.beginning_string = "This is what you hear. \n"
	self.utterance = self.beginning_string

	# utterance appended at the end of each step
	self.utterance_history = ""

	# used for rendering
	self.conversation = self.utterance
	self.outcome_info = None

	def step(self, action):
	p_action = action[0] if np.isnan(action[0]) else int(action[0])
	if len(action) == 1: # agent cannot speak
	assert self.p_sing == 0, "Non speaking agent used in a dance env requiring to speak"
	utterance_action = [np.nan, np.nan]
	else:
	utterance_action = action[1:]

	obs, reward, done, info = super().step(p_action)

	if np.isnan(p_action):
	pass


	# assert all nan or neither nan
	assert len(set(np.isnan(utterance_action))) == 1
	speak_flag = not all(np.isnan(utterance_action))

	if speak_flag:
	utterance = DanceWithOneNPCGrammar.construct_utterance(utterance_action)
	self.conversation += "{}: {} \n".format("Agent", utterance)

	# Don't let the agent open any of the doors
	if not self.few_actions and p_action == self.actions.toggle:
	done = True

	if not self.few_actions and p_action == self.actions.done:
	done = True

	# npc's turn
	agent_matched_moves, npc_utterance = self.dancer.step(p_action if not np.isnan(p_action) else None,
	utterance if speak_flag else None)
	if self.hidden_npc:
	npc_utterance = None
	if npc_utterance:
	self.utterance += "{} \n".format(npc_utterance)
	self.conversation += "{}: {} \n".format(self.dancer.name, npc_utterance)
	if agent_matched_moves:
	reward = self._reward()
	self.outcome_info = "SUCCESS: agent got {} reward \n".format(np.round(reward, 1))
	done = True

	# discount
	if self.step_penalty:
	reward = reward - 0.01

	if self.hidden_npc:
	# remove npc from agent view
	npc_obs_idx = np.argwhere(obs['image'] == 11)
	if npc_obs_idx.size != 0: # agent sees npc
	obs['image'][npc_obs_idx[0][0], npc_obs_idx[0][1], :] = [1, 0, 0, 0]

	if done and reward == 0:
	self.outcome_info = "FAILURE: agent got {} reward \n".format(reward)

	# fill observation with text
	self.append_existing_utterance_to_history()
	obs = self.add_utterance_to_observation(obs)
	self.reset_utterance()

	return obs, reward, done, info


	def _reward(self):
	if self.diminished_reward:
	return super()._reward()
	else:
	return 1.0

	def render(self, args, *kwargs):
	obs = super().render(args, *kwargs)

	print("conversation:\n", self.conversation)
	print("utterance_history:\n", self.utterance_history)

	self.window.clear_text() # erase previous text

	self.window.set_caption(self.conversation) # overwrites super class caption
	self.window.ax.set_title(self.dancer.get_status_str(), loc="left", fontsize=10)
	if self.outcome_info:
	color = None
	if "SUCCESS" in self.outcome_info:
	color = "lime"
	elif "FAILURE" in self.outcome_info:
	color = "red"
	self.window.add_text(*(0.01, 0.85, self.outcome_info),
	**{'fontsize':15, 'color':color, 'weight':"bold"})

	self.window.show_img(obs) # re-draw image to add changes to window

	return obs




	class DanceWithOneNPC8x8Env(DanceWithOneNPCEnv):
	def __init__(self, **kwargs):
	super().__init__(size=8, **kwargs)

	class DanceWithOneNPC6x6Env(DanceWithOneNPCEnv):
	def __init__(self, **kwargs):
	super().__init__(size=6, **kwargs)



	register(
	id='MiniGrid-DanceWithOneNPC-5x5-v0',
	entry_point='gym_minigrid.envs:DanceWithOneNPCEnv'
	)

	register(
	id='MiniGrid-DanceWithOneNPC-6x6-v0',
	entry_point='gym_minigrid.envs:DanceWithOneNPC6x6Env'
	)

	register(
	id='MiniGrid-DanceWithOneNPC-8x8-v0',
	entry_point='gym_minigrid.envs:DanceWithOneNPC8x8Env'
	)