Spaces:
Running
Running
# Copyright 2018 The TensorFlow Authors All Rights Reserved. | |
# | |
# Licensed under the Apache License, Version 2.0 (the "License"); | |
# you may not use this file except in compliance with the License. | |
# You may obtain a copy of the License at | |
# | |
# http://www.apache.org/licenses/LICENSE-2.0 | |
# | |
# Unless required by applicable law or agreed to in writing, software | |
# distributed under the License is distributed on an "AS IS" BASIS, | |
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |
# See the License for the specific language governing permissions and | |
# limitations under the License. | |
# ============================================================================== | |
"""A library of tasks. | |
This interface is intended to implement a wide variety of navigation | |
tasks. See go/navigation_tasks for a list. | |
""" | |
import abc | |
import collections | |
import math | |
import threading | |
import networkx as nx | |
import numpy as np | |
import tensorflow as tf | |
#from pyglib import logging | |
#import gin | |
from envs import task_env | |
from envs import util as envs_util | |
# Utility functions. | |
def _pad_or_clip_array(np_arr, arr_len, is_front_clip=True, output_mask=False): | |
"""Make np_arr array to have length arr_len. | |
If the array is shorter than arr_len, then it is padded from the front with | |
zeros. If it is longer, then it is clipped either from the back or from the | |
front. Only the first dimension is modified. | |
Args: | |
np_arr: numpy array. | |
arr_len: integer scalar. | |
is_front_clip: a boolean. If true then clipping is done in the front, | |
otherwise in the back. | |
output_mask: If True, outputs a numpy array of rank 1 which represents | |
a mask of which values have been added (0 - added, 1 - actual output). | |
Returns: | |
A numpy array and the size of padding (as a python int32). This size is | |
negative is the array is clipped. | |
""" | |
shape = list(np_arr.shape) | |
pad_size = arr_len - shape[0] | |
padded_or_clipped = None | |
if pad_size < 0: | |
if is_front_clip: | |
padded_or_clipped = np_arr[-pad_size:, :] | |
else: | |
padded_or_clipped = np_arr[:arr_len, :] | |
elif pad_size > 0: | |
padding = np.zeros([pad_size] + shape[1:], dtype=np_arr.dtype) | |
padded_or_clipped = np.concatenate([np_arr, padding], axis=0) | |
else: | |
padded_or_clipped = np_arr | |
if output_mask: | |
mask = np.ones((arr_len,), dtype=np.int) | |
if pad_size > 0: | |
mask[-pad_size:] = 0 | |
return padded_or_clipped, pad_size, mask | |
else: | |
return padded_or_clipped, pad_size | |
def classification_loss(truth, predicted, weights=None, is_one_hot=True): | |
"""A cross entropy loss. | |
Computes the mean of cross entropy losses for all pairs of true labels and | |
predictions. It wraps around a tf implementation of the cross entropy loss | |
with additional reformating of the inputs. If the truth and predicted are | |
n-rank Tensors with n > 2, then these are reshaped to 2-rank Tensors. It | |
allows for truth to be specified as one hot vector or class indices. Finally, | |
a weight can be specified for each element in truth and predicted. | |
Args: | |
truth: an n-rank or (n-1)-rank Tensor containing labels. If is_one_hot is | |
True, then n-rank Tensor is expected, otherwise (n-1) rank one. | |
predicted: an n-rank float Tensor containing prediction probabilities. | |
weights: an (n-1)-rank float Tensor of weights | |
is_one_hot: a boolean. | |
Returns: | |
A TF float scalar. | |
""" | |
num_labels = predicted.get_shape().as_list()[-1] | |
if not is_one_hot: | |
truth = tf.reshape(truth, [-1]) | |
truth = tf.one_hot( | |
truth, depth=num_labels, on_value=1.0, off_value=0.0, axis=-1) | |
else: | |
truth = tf.reshape(truth, [-1, num_labels]) | |
predicted = tf.reshape(predicted, [-1, num_labels]) | |
losses = tf.nn.softmax_cross_entropy_with_logits( | |
labels=truth, logits=predicted) | |
if weights is not None: | |
losses = tf.boolean_mask(losses, | |
tf.cast(tf.reshape(weights, [-1]), dtype=tf.bool)) | |
return tf.reduce_mean(losses) | |
class UnrolledTaskIOConfig(object): | |
"""Configuration of task inputs and outputs. | |
A task can have multiple inputs, which define the context, and a task query | |
which defines what is to be executed in this context. The desired execution | |
is encoded in an output. The config defines the shapes of the inputs, the | |
query and the outputs. | |
""" | |
def __init__(self, inputs, output, query=None): | |
"""Constructs a Task input/output config. | |
Args: | |
inputs: a list of tuples. Each tuple represents the configuration of an | |
input, with first element being the type (a string value) and the second | |
element the shape. | |
output: a tuple representing the configuration of the output. | |
query: a tuple representing the configuration of the query. If no query, | |
then None. | |
""" | |
# A configuration of a single input, output or query. Consists of the type, | |
# which can be one of the three specified above, and a shape. The shape must | |
# be consistent with the type, e.g. if type == 'image', then shape is a 3 | |
# valued list. | |
io_config = collections.namedtuple('IOConfig', ['type', 'shape']) | |
def assert_config(config): | |
if not isinstance(config, tuple): | |
raise ValueError('config must be a tuple. Received {}'.format( | |
type(config))) | |
if len(config) != 2: | |
raise ValueError('config must have 2 elements, has %d' % len(config)) | |
if not isinstance(config[0], tf.DType): | |
raise ValueError('First element of config must be a tf.DType.') | |
if not isinstance(config[1], list): | |
raise ValueError('Second element of config must be a list.') | |
assert isinstance(inputs, collections.OrderedDict) | |
for modality_type in inputs: | |
assert_config(inputs[modality_type]) | |
self._inputs = collections.OrderedDict( | |
[(k, io_config(*value)) for k, value in inputs.iteritems()]) | |
if query is not None: | |
assert_config(query) | |
self._query = io_config(*query) | |
else: | |
self._query = None | |
assert_config(output) | |
self._output = io_config(*output) | |
def inputs(self): | |
return self._inputs | |
def output(self): | |
return self._output | |
def query(self): | |
return self._query | |
class UnrolledTask(object): | |
"""An interface for a Task which can be unrolled during training. | |
Each example is called episode and consists of inputs and target output, where | |
the output can be considered as desired unrolled sequence of actions for the | |
inputs. For the specified tasks, these action sequences are to be | |
unambiguously definable. | |
""" | |
__metaclass__ = abc.ABCMeta | |
def __init__(self, config): | |
assert isinstance(config, UnrolledTaskIOConfig) | |
self._config = config | |
# A dict of bookkeeping variables. | |
self.info = {} | |
# Tensorflow input is multithreaded and this lock is needed to prevent | |
# race condition in the environment. Without the lock, non-thread safe | |
# environments crash. | |
self._lock = threading.Lock() | |
def config(self): | |
return self._config | |
def episode(self): | |
"""Returns data needed to train and test a single episode. | |
Each episode consists of inputs, which define the context of the task, a | |
query which defines the task, and a target output, which defines a | |
sequence of actions to be executed for this query. This sequence should not | |
require feedback, i.e. can be predicted purely from input and query.] | |
Returns: | |
inputs, query, output, where inputs is a list of numpy arrays and query | |
and output are numpy arrays. These arrays must be of shape and type as | |
specified in the task configuration. | |
""" | |
pass | |
def reset(self, observation): | |
"""Called after the environment is reset.""" | |
pass | |
def episode_batch(self, batch_size): | |
"""Returns a batch of episodes. | |
Args: | |
batch_size: size of batch. | |
Returns: | |
(inputs, query, output, masks) where inputs is list of numpy arrays and | |
query, output, and mask are numpy arrays. These arrays must be of shape | |
and type as specified in the task configuration with one additional | |
preceding dimension corresponding to the batch. | |
Raises: | |
ValueError: if self.episode() returns illegal values. | |
""" | |
batched_inputs = collections.OrderedDict( | |
[[mtype, []] for mtype in self.config.inputs]) | |
batched_queries = [] | |
batched_outputs = [] | |
batched_masks = [] | |
for _ in range(int(batch_size)): | |
with self._lock: | |
# The episode function needs to be thread-safe. Since the current | |
# implementation for the envs are not thread safe we need to have lock | |
# the operations here. | |
inputs, query, outputs = self.episode() | |
if not isinstance(outputs, tuple): | |
raise ValueError('Outputs return value must be tuple.') | |
if len(outputs) != 2: | |
raise ValueError('Output tuple must be of size 2.') | |
if inputs is not None: | |
for modality_type in batched_inputs: | |
batched_inputs[modality_type].append( | |
np.expand_dims(inputs[modality_type], axis=0)) | |
if query is not None: | |
batched_queries.append(np.expand_dims(query, axis=0)) | |
batched_outputs.append(np.expand_dims(outputs[0], axis=0)) | |
if outputs[1] is not None: | |
batched_masks.append(np.expand_dims(outputs[1], axis=0)) | |
batched_inputs = { | |
k: np.concatenate(i, axis=0) for k, i in batched_inputs.iteritems() | |
} | |
if batched_queries: | |
batched_queries = np.concatenate(batched_queries, axis=0) | |
batched_outputs = np.concatenate(batched_outputs, axis=0) | |
if batched_masks: | |
batched_masks = np.concatenate(batched_masks, axis=0).astype(np.float32) | |
else: | |
# When the array is empty, the default np.dtype is float64 which causes | |
# py_func to crash in the tests. | |
batched_masks = np.array([], dtype=np.float32) | |
batched_inputs = [batched_inputs[k] for k in self._config.inputs] | |
return batched_inputs, batched_queries, batched_outputs, batched_masks | |
def tf_episode_batch(self, batch_size): | |
"""A batch of episodes as TF Tensors. | |
Same as episode_batch with the difference that the return values are TF | |
Tensors. | |
Args: | |
batch_size: a python float for the batch size. | |
Returns: | |
inputs, query, output, mask where inputs is a dictionary of tf.Tensor | |
where the keys are the modality types specified in the config.inputs. | |
query, output, and mask are TF Tensors. These tensors must | |
be of shape and type as specified in the task configuration with one | |
additional preceding dimension corresponding to the batch. Both mask and | |
output have the same shape as output. | |
""" | |
# Define TF outputs. | |
touts = [] | |
shapes = [] | |
for _, i in self._config.inputs.iteritems(): | |
touts.append(i.type) | |
shapes.append(i.shape) | |
if self._config.query is not None: | |
touts.append(self._config.query.type) | |
shapes.append(self._config.query.shape) | |
# Shapes and types for batched_outputs. | |
touts.append(self._config.output.type) | |
shapes.append(self._config.output.shape) | |
# Shapes and types for batched_masks. | |
touts.append(self._config.output.type) | |
shapes.append(self._config.output.shape[0:1]) | |
def episode_batch_func(): | |
if self.config.query is None: | |
inp, _, output, masks = self.episode_batch(int(batch_size)) | |
return tuple(inp) + (output, masks) | |
else: | |
inp, query, output, masks = self.episode_batch(int(batch_size)) | |
return tuple(inp) + (query, output, masks) | |
tf_episode_batch = tf.py_func(episode_batch_func, [], touts, | |
stateful=True, name='taskdata') | |
for episode, shape in zip(tf_episode_batch, shapes): | |
episode.set_shape([batch_size] + shape) | |
tf_episode_batch_dict = collections.OrderedDict([ | |
(mtype, episode) | |
for mtype, episode in zip(self.config.inputs.keys(), tf_episode_batch) | |
]) | |
cur_index = len(self.config.inputs.keys()) | |
tf_query = None | |
if self.config.query is not None: | |
tf_query = tf_episode_batch[cur_index] | |
cur_index += 1 | |
tf_outputs = tf_episode_batch[cur_index] | |
tf_masks = tf_episode_batch[cur_index + 1] | |
return tf_episode_batch_dict, tf_query, tf_outputs, tf_masks | |
def target_loss(self, true_targets, targets, weights=None): | |
"""A loss for training a task model. | |
This loss measures the discrepancy between the task outputs, the true and | |
predicted ones. | |
Args: | |
true_targets: tf.Tensor of shape and type as defined in the task config | |
containing the true outputs. | |
targets: tf.Tensor of shape and type as defined in the task config | |
containing the predicted outputs. | |
weights: a bool tf.Tensor of shape as targets. Only true values are | |
considered when formulating the loss. | |
""" | |
pass | |
def reward(self, obs, done, info): | |
"""Returns a reward. | |
The tasks has to compute a reward based on the state of the environment. The | |
reward computation, though, is task specific. The task is to use the | |
environment interface, as defined in task_env.py, to compute the reward. If | |
this interface does not expose enough information, it is to be updated. | |
Args: | |
obs: Observation from environment's step function. | |
done: Done flag from environment's step function. | |
info: Info dict from environment's step function. | |
Returns: | |
obs: Observation. | |
reward: Floating point value. | |
done: Done flag. | |
info: Info dict. | |
""" | |
# Default implementation does not do anything. | |
return obs, 0.0, done, info | |
class RandomExplorationBasedTask(UnrolledTask): | |
"""A Task which starts with a random exploration of the environment.""" | |
def __init__(self, | |
env, | |
seed, | |
add_query_noise=False, | |
query_noise_var=0.0, | |
*args, | |
**kwargs): # pylint: disable=keyword-arg-before-vararg | |
"""Initializes a Task using a random exploration runs. | |
Args: | |
env: an instance of type TaskEnv and gym.Env. | |
seed: a random seed. | |
add_query_noise: boolean, if True then whatever queries are generated, | |
they are randomly perturbed. The semantics of the queries depends on the | |
concrete task implementation. | |
query_noise_var: float, the variance of Gaussian noise used for query | |
perturbation. Used iff add_query_noise==True. | |
*args: see super class. | |
**kwargs: see super class. | |
""" | |
super(RandomExplorationBasedTask, self).__init__(*args, **kwargs) | |
assert isinstance(env, task_env.TaskEnv) | |
self._env = env | |
self._env.set_task(self) | |
self._rng = np.random.RandomState(seed) | |
self._add_query_noise = add_query_noise | |
self._query_noise_var = query_noise_var | |
# GoToStaticXTask can also take empty config but for the rest of the classes | |
# the number of modality types is 1. | |
if len(self.config.inputs.keys()) > 1: | |
raise NotImplementedError('current implementation supports input ' | |
'with only one modality type or less.') | |
def _exploration(self): | |
"""Generates a random exploration run. | |
The function uses the environment to generate a run. | |
Returns: | |
A tuple of numpy arrays. The i-th array contains observation of type and | |
shape as specified in config.inputs[i]. | |
A list of states along the exploration path. | |
A list of vertex indices corresponding to the path of the exploration. | |
""" | |
in_seq_len = self._config.inputs.values()[0].shape[0] | |
path, _, states, step_outputs = self._env.random_step_sequence( | |
min_len=in_seq_len) | |
obs = {modality_type: [] for modality_type in self._config.inputs} | |
for o in step_outputs: | |
step_obs, _, done, _ = o | |
# It is expected that each value of step_obs is a dict of observations, | |
# whose dimensions are consistent with the config.inputs sizes. | |
for modality_type in self._config.inputs: | |
assert modality_type in step_obs, '{}'.format(type(step_obs)) | |
o = step_obs[modality_type] | |
i = self._config.inputs[modality_type] | |
assert len(o.shape) == len(i.shape) - 1 | |
for dim_o, dim_i in zip(o.shape, i.shape[1:]): | |
assert dim_o == dim_i, '{} != {}'.format(dim_o, dim_i) | |
obs[modality_type].append(o) | |
if done: | |
break | |
if not obs: | |
return obs, states, path | |
max_path_len = int( | |
round(in_seq_len * float(len(path)) / float(len(obs.values()[0])))) | |
path = path[-max_path_len:] | |
states = states[-in_seq_len:] | |
# The above obs is a list of tuples of np,array. Re-format them as tuple of | |
# np.array, each array containing all observations from all steps. | |
def regroup(obs, i): | |
"""Regroups observations. | |
Args: | |
obs: a list of tuples of same size. The k-th tuple contains all the | |
observations from k-th step. Each observation is a numpy array. | |
i: the index of the observation in each tuple to be grouped. | |
Returns: | |
A numpy array of shape config.inputs[i] which contains all i-th | |
observations from all steps. These are concatenated along the first | |
dimension. In addition, if the number of observations is different from | |
the one specified in config.inputs[i].shape[0], then the array is either | |
padded from front or clipped. | |
""" | |
grouped_obs = np.concatenate( | |
[np.expand_dims(o, axis=0) for o in obs[i]], axis=0) | |
in_seq_len = self._config.inputs[i].shape[0] | |
# pylint: disable=unbalanced-tuple-unpacking | |
grouped_obs, _ = _pad_or_clip_array( | |
grouped_obs, in_seq_len, is_front_clip=True) | |
return grouped_obs | |
all_obs = {i: regroup(obs, i) for i in self._config.inputs} | |
return all_obs, states, path | |
def _obs_to_state(self, path, states): | |
"""Computes mapping between path nodes and states.""" | |
# Generate a numpy array of locations corresponding to the path vertices. | |
path_coordinates = map(self._env.vertex_to_pose, path) | |
path_coordinates = np.concatenate( | |
[np.reshape(p, [1, 2]) for p in path_coordinates]) | |
# The observations are taken along a smoothed trajectory following the path. | |
# We compute a mapping between the obeservations and the map vertices. | |
path_to_obs = collections.defaultdict(list) | |
obs_to_state = [] | |
for i, s in enumerate(states): | |
location = np.reshape(s[0:2], [1, 2]) | |
index = np.argmin( | |
np.reshape( | |
np.sum(np.power(path_coordinates - location, 2), axis=1), [-1])) | |
index = path[index] | |
path_to_obs[index].append(i) | |
obs_to_state.append(index) | |
return path_to_obs, obs_to_state | |
def _perturb_state(self, state, noise_var): | |
"""Perturbes the state. | |
The location are purturbed using a Gaussian noise with variance | |
noise_var. The orientation is uniformly sampled. | |
Args: | |
state: a numpy array containing an env state (x, y locations). | |
noise_var: float | |
Returns: | |
The perturbed state. | |
""" | |
def normal(v, std): | |
if std > 0: | |
n = self._rng.normal(0.0, std) | |
n = min(n, 2.0 * std) | |
n = max(n, -2.0 * std) | |
return v + n | |
else: | |
return v | |
state = state.copy() | |
state[0] = normal(state[0], noise_var) | |
state[1] = normal(state[1], noise_var) | |
if state.size > 2: | |
state[2] = self._rng.uniform(-math.pi, math.pi) | |
return state | |
def _sample_obs(self, | |
indices, | |
observations, | |
observation_states, | |
path_to_obs, | |
max_obs_index=None, | |
use_exploration_obs=True): | |
"""Samples one observation which corresponds to vertex_index in path. | |
In addition, the sampled observation must have index in observations less | |
than max_obs_index. If these two conditions cannot be satisfied the | |
function returns None. | |
Args: | |
indices: a list of integers. | |
observations: a list of numpy arrays containing all the observations. | |
observation_states: a list of numpy arrays, each array representing the | |
state of the observation. | |
path_to_obs: a dict of path indices to lists of observation indices. | |
max_obs_index: an integer. | |
use_exploration_obs: if True, then the observation is sampled among the | |
specified observations, otherwise it is obtained from the environment. | |
Returns: | |
A tuple of: | |
-- A numpy array of size width x height x 3 representing the sampled | |
observation. | |
-- The index of the sampld observation among the input observations. | |
-- The state at which the observation is captured. | |
Raises: | |
ValueError: if the observation and observation_states lists are of | |
different lengths. | |
""" | |
if len(observations) != len(observation_states): | |
raise ValueError('observation and observation_states lists must have ' | |
'equal lengths') | |
if not indices: | |
return None, None, None | |
vertex_index = self._rng.choice(indices) | |
if use_exploration_obs: | |
obs_indices = path_to_obs[vertex_index] | |
if max_obs_index is not None: | |
obs_indices = [i for i in obs_indices if i < max_obs_index] | |
if obs_indices: | |
index = self._rng.choice(obs_indices) | |
if self._add_query_noise: | |
xytheta = self._perturb_state(observation_states[index], | |
self._query_noise_var) | |
return self._env.observation(xytheta), index, xytheta | |
else: | |
return observations[index], index, observation_states[index] | |
else: | |
return None, None, None | |
else: | |
xy = self._env.vertex_to_pose(vertex_index) | |
xytheta = np.array([xy[0], xy[1], 0.0]) | |
xytheta = self._perturb_state(xytheta, self._query_noise_var) | |
return self._env.observation(xytheta), None, xytheta | |
class AreNearbyTask(RandomExplorationBasedTask): | |
"""A task of identifying whether a query is nearby current location or not. | |
The query is guaranteed to be in proximity of an already visited location, | |
i.e. close to one of the observations. For each observation we have one | |
query, which is either close or not to this observation. | |
""" | |
def __init__( | |
self, | |
max_distance=0, | |
*args, | |
**kwargs): # pylint: disable=keyword-arg-before-vararg | |
super(AreNearbyTask, self).__init__(*args, **kwargs) | |
self._max_distance = max_distance | |
if len(self.config.inputs.keys()) != 1: | |
raise NotImplementedError('current implementation supports input ' | |
'with only one modality type') | |
def episode(self): | |
"""Episode data. | |
Returns: | |
observations: a tuple with one element. This element is a numpy array of | |
size in_seq_len x observation_size x observation_size x 3 containing | |
in_seq_len images. | |
query: a numpy array of size | |
in_seq_len x observation_size X observation_size x 3 containing a query | |
image. | |
A tuple of size two. First element is a in_seq_len x 2 numpy array of | |
either 1.0 or 0.0. The i-th element denotes whether the i-th query | |
image is neraby (value 1.0) or not (value 0.0) to the i-th observation. | |
The second element in the tuple is a mask, a numpy array of size | |
in_seq_len x 1 and values 1.0 or 0.0 denoting whether the query is | |
valid or not (it can happen that the query is not valid, e.g. there are | |
not enough observations to have a meaningful queries). | |
""" | |
observations, states, path = self._exploration() | |
assert len(observations.values()[0]) == len(states) | |
# The observations are taken along a smoothed trajectory following the path. | |
# We compute a mapping between the obeservations and the map vertices. | |
path_to_obs, obs_to_path = self._obs_to_state(path, states) | |
# Go over all observations, and sample a query. With probability 0.5 this | |
# query is a nearby observation (defined as belonging to the same vertex | |
# in path). | |
g = self._env.graph | |
queries = [] | |
labels = [] | |
validity_masks = [] | |
query_index_in_observations = [] | |
for i, curr_o in enumerate(observations.values()[0]): | |
p = obs_to_path[i] | |
low = max(0, i - self._max_distance) | |
# A list of lists of vertex indices. Each list in this group corresponds | |
# to one possible label. | |
index_groups = [[], [], []] | |
# Nearby visited indices, label 1. | |
nearby_visited = [ | |
ii for ii in path[low:i + 1] + g[p].keys() if ii in obs_to_path[:i] | |
] | |
nearby_visited = [ii for ii in index_groups[1] if ii in path_to_obs] | |
# NOT Nearby visited indices, label 0. | |
not_nearby_visited = [ii for ii in path[:low] if ii not in g[p].keys()] | |
not_nearby_visited = [ii for ii in index_groups[0] if ii in path_to_obs] | |
# NOT visited indices, label 2. | |
not_visited = [ | |
ii for ii in range(g.number_of_nodes()) if ii not in path[:i + 1] | |
] | |
index_groups = [not_nearby_visited, nearby_visited, not_visited] | |
# Consider only labels for which there are indices. | |
allowed_labels = [ii for ii, group in enumerate(index_groups) if group] | |
label = self._rng.choice(allowed_labels) | |
indices = list(set(index_groups[label])) | |
max_obs_index = None if label == 2 else i | |
use_exploration_obs = False if label == 2 else True | |
o, obs_index, _ = self._sample_obs( | |
indices=indices, | |
observations=observations.values()[0], | |
observation_states=states, | |
path_to_obs=path_to_obs, | |
max_obs_index=max_obs_index, | |
use_exploration_obs=use_exploration_obs) | |
query_index_in_observations.append(obs_index) | |
# If we cannot sample a valid query, we mark it as not valid in mask. | |
if o is None: | |
label = 0.0 | |
o = curr_o | |
validity_masks.append(0) | |
else: | |
validity_masks.append(1) | |
queries.append(o.values()[0]) | |
labels.append(label) | |
query = np.concatenate([np.expand_dims(q, axis=0) for q in queries], axis=0) | |
def one_hot(label, num_labels=3): | |
a = np.zeros((num_labels,), dtype=np.float) | |
a[int(label)] = 1.0 | |
return a | |
outputs = np.stack([one_hot(l) for l in labels], axis=0) | |
validity_mask = np.reshape( | |
np.array(validity_masks, dtype=np.int32), [-1, 1]) | |
self.info['query_index_in_observations'] = query_index_in_observations | |
self.info['observation_states'] = states | |
return observations, query, (outputs, validity_mask) | |
def target_loss(self, truth, predicted, weights=None): | |
pass | |
class NeighboringQueriesTask(RandomExplorationBasedTask): | |
"""A task of identifying whether two queries are closeby or not. | |
The proximity between queries is defined by the length of the shorest path | |
between them. | |
""" | |
def __init__( | |
self, | |
max_distance=1, | |
*args, | |
**kwargs): # pylint: disable=keyword-arg-before-vararg | |
"""Initializes a NeighboringQueriesTask. | |
Args: | |
max_distance: integer, the maximum distance in terms of number of vertices | |
between the two queries, so that they are considered neighboring. | |
*args: for super class. | |
**kwargs: for super class. | |
""" | |
super(NeighboringQueriesTask, self).__init__(*args, **kwargs) | |
self._max_distance = max_distance | |
if len(self.config.inputs.keys()) != 1: | |
raise NotImplementedError('current implementation supports input ' | |
'with only one modality type') | |
def episode(self): | |
"""Episode data. | |
Returns: | |
observations: a tuple with one element. This element is a numpy array of | |
size in_seq_len x observation_size x observation_size x 3 containing | |
in_seq_len images. | |
query: a numpy array of size | |
2 x observation_size X observation_size x 3 containing a pair of query | |
images. | |
A tuple of size two. First element is a numpy array of size 2 containing | |
a one hot vector of whether the two observations are neighobring. Second | |
element is a boolean numpy value denoting whether this is a valid | |
episode. | |
""" | |
observations, states, path = self._exploration() | |
assert len(observations.values()[0]) == len(states) | |
path_to_obs, _ = self._obs_to_state(path, states) | |
# Restrict path to ones for which observations have been generated. | |
path = [p for p in path if p in path_to_obs] | |
# Sample first query. | |
query1_index = self._rng.choice(path) | |
# Sample label. | |
label = self._rng.randint(2) | |
# Sample second query. | |
# If label == 1, then second query must be nearby, otherwise not. | |
closest_indices = nx.single_source_shortest_path( | |
self._env.graph, query1_index, self._max_distance).keys() | |
if label == 0: | |
# Closest indices on the path. | |
indices = [p for p in path if p not in closest_indices] | |
else: | |
# Indices which are not closest on the path. | |
indices = [p for p in closest_indices if p in path] | |
query2_index = self._rng.choice(indices) | |
# Generate an observation. | |
query1, query1_index, _ = self._sample_obs( | |
[query1_index], | |
observations.values()[0], | |
states, | |
path_to_obs, | |
max_obs_index=None, | |
use_exploration_obs=True) | |
query2, query2_index, _ = self._sample_obs( | |
[query2_index], | |
observations.values()[0], | |
states, | |
path_to_obs, | |
max_obs_index=None, | |
use_exploration_obs=True) | |
queries = np.concatenate( | |
[np.expand_dims(q, axis=0) for q in [query1, query2]]) | |
labels = np.array([0, 0]) | |
labels[label] = 1 | |
is_valid = np.array([1]) | |
self.info['observation_states'] = states | |
self.info['query_indices_in_observations'] = [query1_index, query2_index] | |
return observations, queries, (labels, is_valid) | |
def target_loss(self, truth, predicted, weights=None): | |
pass | |
#@gin.configurable | |
class GotoStaticXTask(RandomExplorationBasedTask): | |
"""Task go to a static X. | |
If continuous reward is used only one goal is allowed so that the reward can | |
be computed as a delta-distance to that goal.. | |
""" | |
def __init__(self, | |
step_reward=0.0, | |
goal_reward=1.0, | |
hit_wall_reward=-1.0, | |
done_at_target=False, | |
use_continuous_reward=False, | |
*args, | |
**kwargs): # pylint: disable=keyword-arg-before-vararg | |
super(GotoStaticXTask, self).__init__(*args, **kwargs) | |
if len(self.config.inputs.keys()) > 1: | |
raise NotImplementedError('current implementation supports input ' | |
'with only one modality type or less.') | |
self._step_reward = step_reward | |
self._goal_reward = goal_reward | |
self._hit_wall_reward = hit_wall_reward | |
self._done_at_target = done_at_target | |
self._use_continuous_reward = use_continuous_reward | |
self._previous_path_length = None | |
def episode(self): | |
observations, _, path = self._exploration() | |
if len(path) < 2: | |
raise ValueError('The exploration path has only one node.') | |
g = self._env.graph | |
start = path[-1] | |
while True: | |
goal = self._rng.choice(path[:-1]) | |
if goal != start: | |
break | |
goal_path = nx.shortest_path(g, start, goal) | |
init_orientation = self._rng.uniform(0, np.pi, (1,)) | |
trajectory = np.array( | |
[list(self._env.vertex_to_pose(p)) for p in goal_path]) | |
init_xy = np.reshape(trajectory[0, :], [-1]) | |
init_state = np.concatenate([init_xy, init_orientation], 0) | |
trajectory = trajectory[1:, :] | |
deltas = envs_util.trajectory_to_deltas(trajectory, init_state) | |
output_seq_len = self._config.output.shape[0] | |
arr = _pad_or_clip_array(deltas, output_seq_len, output_mask=True) | |
# pylint: disable=unbalanced-tuple-unpacking | |
thetas, _, thetas_mask = arr | |
query = self._env.observation(self._env.vertex_to_pose(goal)).values()[0] | |
return observations, query, (thetas, thetas_mask) | |
def reward(self, obs, done, info): | |
if 'wall_collision' in info and info['wall_collision']: | |
return obs, self._hit_wall_reward, done, info | |
reward = 0.0 | |
current_vertex = self._env.pose_to_vertex(self._env.state) | |
if current_vertex in self._env.targets(): | |
if self._done_at_target: | |
done = True | |
else: | |
obs = self._env.reset() | |
reward = self._goal_reward | |
else: | |
if self._use_continuous_reward: | |
if len(self._env.targets()) != 1: | |
raise ValueError( | |
'FindX task with continuous reward is assuming only one target.') | |
goal_vertex = self._env.targets()[0] | |
path_length = self._compute_path_length(goal_vertex) | |
reward = self._previous_path_length - path_length | |
self._previous_path_length = path_length | |
else: | |
reward = self._step_reward | |
return obs, reward, done, info | |
def _compute_path_length(self, goal_vertex): | |
current_vertex = self._env.pose_to_vertex(self._env.state) | |
path = nx.shortest_path(self._env.graph, current_vertex, goal_vertex) | |
assert len(path) >= 2 | |
curr_xy = np.array(self._env.state[:2]) | |
next_xy = np.array(self._env.vertex_to_pose(path[1])) | |
last_step_distance = np.linalg.norm(next_xy - curr_xy) | |
return (len(path) - 2) * self._env.cell_size_px + last_step_distance | |
def reset(self, observation): | |
if self._use_continuous_reward: | |
if len(self._env.targets()) != 1: | |
raise ValueError( | |
'FindX task with continuous reward is assuming only one target.') | |
goal_vertex = self._env.targets()[0] | |
self._previous_path_length = self._compute_path_length(goal_vertex) | |
def target_loss(self, truth, predicted, weights=None): | |
"""Action classification loss. | |
Args: | |
truth: a batch_size x sequence length x number of labels float | |
Tensor containing a one hot vector for each label in each batch and | |
time. | |
predicted: a batch_size x sequence length x number of labels float | |
Tensor containing a predicted distribution over all actions. | |
weights: a batch_size x sequence_length float Tensor of bool | |
denoting which actions are valid. | |
Returns: | |
An average cross entropy over all batches and elements in sequence. | |
""" | |
return classification_loss( | |
truth=truth, predicted=predicted, weights=weights, is_one_hot=True) | |
class RelativeLocationTask(RandomExplorationBasedTask): | |
"""A task of estimating the relative location of a query w.r.t current. | |
It is to be used for debugging. It is designed such that the output is a | |
single value, out of a discrete set of values, so that it can be phrased as | |
a classification problem. | |
""" | |
def __init__(self, num_labels, *args, **kwargs): | |
"""Initializes a relative location task. | |
Args: | |
num_labels: integer, number of orientations to bin the relative | |
orientation into. | |
*args: see super class. | |
**kwargs: see super class. | |
""" | |
super(RelativeLocationTask, self).__init__(*args, **kwargs) | |
self._num_labels = num_labels | |
if len(self.config.inputs.keys()) != 1: | |
raise NotImplementedError('current implementation supports input ' | |
'with only one modality type') | |
def episode(self): | |
observations, states, path = self._exploration() | |
# Select a random element from history. | |
path_to_obs, _ = self._obs_to_state(path, states) | |
use_exploration_obs = not self._add_query_noise | |
query, _, query_state = self._sample_obs( | |
path[:-1], | |
observations.values()[0], | |
states, | |
path_to_obs, | |
max_obs_index=None, | |
use_exploration_obs=use_exploration_obs) | |
x, y, theta = tuple(states[-1]) | |
q_x, q_y, _ = tuple(query_state) | |
t_x, t_y = q_x - x, q_y - y | |
(rt_x, rt_y) = (np.sin(theta) * t_x - np.cos(theta) * t_y, | |
np.cos(theta) * t_x + np.sin(theta) * t_y) | |
# Bins are [a(i), a(i+1)] for a(i) = -pi + 0.5 * bin_size + i * bin_size. | |
shift = np.pi * (1 - 1.0 / (2.0 * self._num_labels)) | |
orientation = np.arctan2(rt_y, rt_x) + shift | |
if orientation < 0: | |
orientation += 2 * np.pi | |
label = int(np.floor(self._num_labels * orientation / (2 * np.pi))) | |
out_shape = self._config.output.shape | |
if len(out_shape) != 1: | |
raise ValueError('Output shape should be of rank 1.') | |
if out_shape[0] != self._num_labels: | |
raise ValueError('Output shape must be of size %d' % self._num_labels) | |
output = np.zeros(out_shape, dtype=np.float32) | |
output[label] = 1 | |
return observations, query, (output, None) | |
def target_loss(self, truth, predicted, weights=None): | |
return classification_loss( | |
truth=truth, predicted=predicted, weights=weights, is_one_hot=True) | |
class LocationClassificationTask(UnrolledTask): | |
"""A task of classifying a location as one of several classes. | |
The task does not have an input, but just a query and an output. The query | |
is an observation of the current location, e.g. an image taken from the | |
current state. The output is a label classifying this location in one of | |
predefined set of locations (or landmarks). | |
The current implementation classifies locations as intersections based on the | |
number and directions of biforcations. It is expected that a location can have | |
at most 4 different directions, aligned with the axes. As each of these four | |
directions might be present or not, the number of possible intersections are | |
2^4 = 16. | |
""" | |
def __init__(self, env, seed, *args, **kwargs): | |
super(LocationClassificationTask, self).__init__(*args, **kwargs) | |
self._env = env | |
self._rng = np.random.RandomState(seed) | |
# A location property which can be set. If not set, a random one is | |
# generated. | |
self._location = None | |
if len(self.config.inputs.keys()) > 1: | |
raise NotImplementedError('current implementation supports input ' | |
'with only one modality type or less.') | |
def location(self): | |
return self._location | |
def location(self, location): | |
self._location = location | |
def episode(self): | |
# Get a location. If not set, sample on at a vertex with a random | |
# orientation | |
location = self._location | |
if location is None: | |
num_nodes = self._env.graph.number_of_nodes() | |
vertex = int(math.floor(self._rng.uniform(0, num_nodes))) | |
xy = self._env.vertex_to_pose(vertex) | |
theta = self._rng.uniform(0, 2 * math.pi) | |
location = np.concatenate( | |
[np.reshape(xy, [-1]), np.array([theta])], axis=0) | |
else: | |
vertex = self._env.pose_to_vertex(location) | |
theta = location[2] | |
neighbors = self._env.graph.neighbors(vertex) | |
xy_s = [self._env.vertex_to_pose(n) for n in neighbors] | |
def rotate(xy, theta): | |
"""Rotates a vector around the origin by angle theta. | |
Args: | |
xy: a numpy darray of shape (2, ) of floats containing the x and y | |
coordinates of a vector. | |
theta: a python float containing the rotation angle in radians. | |
Returns: | |
A numpy darray of floats of shape (2,) containing the x and y | |
coordinates rotated xy. | |
""" | |
rotated_x = np.cos(theta) * xy[0] - np.sin(theta) * xy[1] | |
rotated_y = np.sin(theta) * xy[0] + np.cos(theta) * xy[1] | |
return np.array([rotated_x, rotated_y]) | |
# Rotate all intersection biforcation by the orientation of the agent as the | |
# intersection label is defined in an agent centered fashion. | |
xy_s = [ | |
rotate(xy - location[0:2], -location[2] - math.pi / 4) for xy in xy_s | |
] | |
th_s = [np.arctan2(xy[1], xy[0]) for xy in xy_s] | |
out_shape = self._config.output.shape | |
if len(out_shape) != 1: | |
raise ValueError('Output shape should be of rank 1.') | |
num_labels = out_shape[0] | |
if num_labels != 16: | |
raise ValueError('Currently only 16 labels are supported ' | |
'(there are 16 different 4 way intersection types).') | |
th_s = set([int(math.floor(4 * (th / (2 * np.pi) + 0.5))) for th in th_s]) | |
one_hot_label = np.zeros((num_labels,), dtype=np.float32) | |
label = 0 | |
for th in th_s: | |
label += pow(2, th) | |
one_hot_label[int(label)] = 1.0 | |
query = self._env.observation(location).values()[0] | |
return [], query, (one_hot_label, None) | |
def reward(self, obs, done, info): | |
raise ValueError('Do not call.') | |
def target_loss(self, truth, predicted, weights=None): | |
return classification_loss( | |
truth=truth, predicted=predicted, weights=weights, is_one_hot=True) | |
class GotoStaticXNoExplorationTask(UnrolledTask): | |
"""An interface for findX tasks without exploration. | |
The agent is initialized a random location in a random world and a random goal | |
and the objective is for the agent to move toward the goal. This class | |
generates episode for such task. Each generates a sequence of observations x | |
and target outputs y. x is the observations and is an OrderedDict with keys | |
provided from config.inputs.keys() and the shapes provided in the | |
config.inputs. The output is a numpy arrays with the shape specified in the | |
config.output. The shape of the array is (sequence_length x action_size) where | |
action is the number of actions that can be done in the environment. Note that | |
config.output.shape should be set according to the number of actions that can | |
be done in the env. | |
target outputs y are the groundtruth value of each action that is computed | |
from the environment graph. The target output for each action is proportional | |
to the progress that each action makes. Target value of 1 means that the | |
action takes the agent one step closer, -1 means the action takes the agent | |
one step farther. Value of -2 means that action should not take place at all. | |
This can be because the action leads to collision or it wants to terminate the | |
episode prematurely. | |
""" | |
def __init__(self, env, *args, **kwargs): | |
super(GotoStaticXNoExplorationTask, self).__init__(*args, **kwargs) | |
if self._config.query is not None: | |
raise ValueError('query should be None.') | |
if len(self._config.output.shape) != 2: | |
raise ValueError('output should only have two dimensions:' | |
'(sequence_length x number_of_actions)') | |
for input_config in self._config.inputs.values(): | |
if input_config.shape[0] != self._config.output.shape[0]: | |
raise ValueError('the first dimension of the input and output should' | |
'be the same.') | |
if len(self._config.output.shape) != 2: | |
raise ValueError('output shape should be ' | |
'(sequence_length x number_of_actions)') | |
self._env = env | |
def _compute_shortest_path_length(self, vertex, target_vertices): | |
"""Computes length of the shortest path from vertex to any target vertexes. | |
Args: | |
vertex: integer, index of the vertex in the environment graph. | |
target_vertices: list of the target vertexes | |
Returns: | |
integer, minimum distance from the vertex to any of the target_vertices. | |
Raises: | |
ValueError: if there is no path between the vertex and at least one of | |
the target_vertices. | |
""" | |
try: | |
return np.min([ | |
len(nx.shortest_path(self._env.graph, vertex, t)) | |
for t in target_vertices | |
]) | |
except: | |
#logging.error('there is no path between vertex %d and at least one of ' | |
# 'the targets %r', vertex, target_vertices) | |
raise | |
def _compute_gt_value(self, vertex, target_vertices): | |
"""Computes groundtruth value of all the actions at the vertex. | |
The value of each action is the difference each action makes in the length | |
of the shortest path to the goal. If an action takes the agent one step | |
closer to the goal the value is 1. In case, it takes the agent one step away | |
from the goal it would be -1. If it leads to collision or if the agent uses | |
action stop before reaching to the goal it is -2. To avoid scale issues the | |
gt_values are multipled by 0.5. | |
Args: | |
vertex: integer, the index of current vertex. | |
target_vertices: list of the integer indexes of the target views. | |
Returns: | |
numpy array with shape (action_size,) and each element is the groundtruth | |
value of each action based on the progress each action makes. | |
""" | |
action_size = self._config.output.shape[1] | |
output_value = np.ones((action_size), dtype=np.float32) * -2 | |
my_distance = self._compute_shortest_path_length(vertex, target_vertices) | |
for adj in self._env.graph[vertex]: | |
adj_distance = self._compute_shortest_path_length(adj, target_vertices) | |
if adj_distance is None: | |
continue | |
action_index = self._env.action( | |
self._env.vertex_to_pose(vertex), self._env.vertex_to_pose(adj)) | |
assert action_index is not None, ('{} is not adjacent to {}. There might ' | |
'be a problem in environment graph ' | |
'connectivity because there is no ' | |
'direct edge between the given ' | |
'vertices').format( | |
self._env.vertex_to_pose(vertex), | |
self._env.vertex_to_pose(adj)) | |
output_value[action_index] = my_distance - adj_distance | |
return output_value * 0.5 | |
def episode(self): | |
"""Returns data needed to train and test a single episode. | |
Returns: | |
(inputs, None, output) where inputs is a dictionary of modality types to | |
numpy arrays. The second element is query but we assume that the goal | |
is also given as part of observation so it should be None for this task, | |
and the outputs is the tuple of ground truth action values with the | |
shape of (sequence_length x action_size) that is coming from | |
config.output.shape and a numpy array with the shape of | |
(sequence_length,) that is 1 if the corresponding element of the | |
input and output should be used in the training optimization. | |
Raises: | |
ValueError: If the output values for env.random_step_sequence is not | |
valid. | |
ValueError: If the shape of observations coming from the env is not | |
consistent with the config. | |
ValueError: If there is a modality type specified in the config but the | |
environment does not return that. | |
""" | |
# Sequence length is the first dimension of any of the input tensors. | |
sequence_length = self._config.inputs.values()[0].shape[0] | |
modality_types = self._config.inputs.keys() | |
path, _, _, step_outputs = self._env.random_step_sequence( | |
max_len=sequence_length) | |
target_vertices = [self._env.pose_to_vertex(x) for x in self._env.targets()] | |
if len(path) != len(step_outputs): | |
raise ValueError('path, and step_outputs should have equal length' | |
' {}!={}'.format(len(path), len(step_outputs))) | |
# Building up observations. observations will be a OrderedDict of | |
# modality types. The values are numpy arrays that follow the given shape | |
# in the input config for each modality type. | |
observations = collections.OrderedDict([k, []] for k in modality_types) | |
for step_output in step_outputs: | |
obs_dict = step_output[0] | |
# Only going over the modality types that are specified in the input | |
# config. | |
for modality_type in modality_types: | |
if modality_type not in obs_dict: | |
raise ValueError('modality type is not returned from the environment.' | |
'{} not in {}'.format(modality_type, | |
obs_dict.keys())) | |
obs = obs_dict[modality_type] | |
if np.any( | |
obs.shape != tuple(self._config.inputs[modality_type].shape[1:])): | |
raise ValueError( | |
'The observations should have the same size as speicifed in' | |
'config for modality type {}. {} != {}'.format( | |
modality_type, obs.shape, | |
self._config.inputs[modality_type].shape[1:])) | |
observations[modality_type].append(obs) | |
gt_value = [self._compute_gt_value(v, target_vertices) for v in path] | |
# pylint: disable=unbalanced-tuple-unpacking | |
gt_value, _, value_mask = _pad_or_clip_array( | |
np.array(gt_value), | |
sequence_length, | |
is_front_clip=False, | |
output_mask=True, | |
) | |
for modality_type, obs in observations.iteritems(): | |
observations[modality_type], _, mask = _pad_or_clip_array( | |
np.array(obs), sequence_length, is_front_clip=False, output_mask=True) | |
assert np.all(mask == value_mask) | |
return observations, None, (gt_value, value_mask) | |
def reset(self, observation): | |
"""Called after the environment is reset.""" | |
pass | |
def target_loss(self, true_targets, targets, weights=None): | |
"""A loss for training a task model. | |
This loss measures the discrepancy between the task outputs, the true and | |
predicted ones. | |
Args: | |
true_targets: tf.Tensor of tf.float32 with the shape of | |
(batch_size x sequence_length x action_size). | |
targets: tf.Tensor of tf.float32 with the shape of | |
(batch_size x sequence_length x action_size). | |
weights: tf.Tensor of tf.bool with the shape of | |
(batch_size x sequence_length). | |
Raises: | |
ValueError: if the shapes of the input tensors are not consistent. | |
Returns: | |
L2 loss between the predicted action values and true action values. | |
""" | |
targets_shape = targets.get_shape().as_list() | |
true_targets_shape = true_targets.get_shape().as_list() | |
if len(targets_shape) != 3 or len(true_targets_shape) != 3: | |
raise ValueError('invalid shape for targets or true_targets_shape') | |
if np.any(targets_shape != true_targets_shape): | |
raise ValueError('the shape of targets and true_targets are not the same' | |
'{} != {}'.format(targets_shape, true_targets_shape)) | |
if weights is not None: | |
# Filtering targets and true_targets using weights. | |
weights_shape = weights.get_shape().as_list() | |
if np.any(weights_shape != targets_shape[0:2]): | |
raise ValueError('The first two elements of weights shape should match' | |
'target. {} != {}'.format(weights_shape, | |
targets_shape)) | |
true_targets = tf.boolean_mask(true_targets, weights) | |
targets = tf.boolean_mask(targets, weights) | |
return tf.losses.mean_squared_error(tf.reshape(targets, [-1]), | |
tf.reshape(true_targets, [-1])) | |
def reward(self, obs, done, info): | |
raise NotImplementedError('reward is not implemented for this task') | |
################################################################################ | |
class NewTask(UnrolledTask): | |
def __init__(self, env, *args, **kwargs): | |
super(NewTask, self).__init__(*args, **kwargs) | |
self._env = env | |
def _compute_shortest_path_length(self, vertex, target_vertices): | |
"""Computes length of the shortest path from vertex to any target vertexes. | |
Args: | |
vertex: integer, index of the vertex in the environment graph. | |
target_vertices: list of the target vertexes | |
Returns: | |
integer, minimum distance from the vertex to any of the target_vertices. | |
Raises: | |
ValueError: if there is no path between the vertex and at least one of | |
the target_vertices. | |
""" | |
try: | |
return np.min([ | |
len(nx.shortest_path(self._env.graph, vertex, t)) | |
for t in target_vertices | |
]) | |
except: | |
logging.error('there is no path between vertex %d and at least one of ' | |
'the targets %r', vertex, target_vertices) | |
raise | |
def _compute_gt_value(self, vertex, target_vertices): | |
"""Computes groundtruth value of all the actions at the vertex. | |
The value of each action is the difference each action makes in the length | |
of the shortest path to the goal. If an action takes the agent one step | |
closer to the goal the value is 1. In case, it takes the agent one step away | |
from the goal it would be -1. If it leads to collision or if the agent uses | |
action stop before reaching to the goal it is -2. To avoid scale issues the | |
gt_values are multipled by 0.5. | |
Args: | |
vertex: integer, the index of current vertex. | |
target_vertices: list of the integer indexes of the target views. | |
Returns: | |
numpy array with shape (action_size,) and each element is the groundtruth | |
value of each action based on the progress each action makes. | |
""" | |
action_size = self._config.output.shape[1] | |
output_value = np.ones((action_size), dtype=np.float32) * -2 | |
# own compute _compute_shortest_path_length - returnts float | |
my_distance = self._compute_shortest_path_length(vertex, target_vertices) | |
for adj in self._env.graph[vertex]: | |
adj_distance = self._compute_shortest_path_length(adj, target_vertices) | |
if adj_distance is None: | |
continue | |
action_index = self._env.action( | |
self._env.vertex_to_pose(vertex), self._env.vertex_to_pose(adj)) | |
assert action_index is not None, ('{} is not adjacent to {}. There might ' | |
'be a problem in environment graph ' | |
'connectivity because there is no ' | |
'direct edge between the given ' | |
'vertices').format( | |
self._env.vertex_to_pose(vertex), | |
self._env.vertex_to_pose(adj)) | |
output_value[action_index] = my_distance - adj_distance | |
return output_value * 0.5 | |
def episode(self): | |
"""Returns data needed to train and test a single episode. | |
Returns: | |
(inputs, None, output) where inputs is a dictionary of modality types to | |
numpy arrays. The second element is query but we assume that the goal | |
is also given as part of observation so it should be None for this task, | |
and the outputs is the tuple of ground truth action values with the | |
shape of (sequence_length x action_size) that is coming from | |
config.output.shape and a numpy array with the shape of | |
(sequence_length,) that is 1 if the corresponding element of the | |
input and output should be used in the training optimization. | |
Raises: | |
ValueError: If the output values for env.random_step_sequence is not | |
valid. | |
ValueError: If the shape of observations coming from the env is not | |
consistent with the config. | |
ValueError: If there is a modality type specified in the config but the | |
environment does not return that. | |
""" | |
# Sequence length is the first dimension of any of the input tensors. | |
sequence_length = self._config.inputs.values()[0].shape[0] | |
modality_types = self._config.inputs.keys() | |
path, _, _, step_outputs = self._env.random_step_sequence( | |
max_len=sequence_length) | |
target_vertices = [self._env.pose_to_vertex(x) for x in self._env.targets()] | |
if len(path) != len(step_outputs): | |
raise ValueError('path, and step_outputs should have equal length' | |
' {}!={}'.format(len(path), len(step_outputs))) | |
# Building up observations. observations will be a OrderedDict of | |
# modality types. The values are numpy arrays that follow the given shape | |
# in the input config for each modality type. | |
observations = collections.OrderedDict([k, []] for k in modality_types) | |
for step_output in step_outputs: | |
obs_dict = step_output[0] | |
# Only going over the modality types that are specified in the input | |
# config. | |
for modality_type in modality_types: | |
if modality_type not in obs_dict: | |
raise ValueError('modality type is not returned from the environment.' | |
'{} not in {}'.format(modality_type, | |
obs_dict.keys())) | |
obs = obs_dict[modality_type] | |
if np.any( | |
obs.shape != tuple(self._config.inputs[modality_type].shape[1:])): | |
raise ValueError( | |
'The observations should have the same size as speicifed in' | |
'config for modality type {}. {} != {}'.format( | |
modality_type, obs.shape, | |
self._config.inputs[modality_type].shape[1:])) | |
observations[modality_type].append(obs) | |
gt_value = [self._compute_gt_value(v, target_vertices) for v in path] | |
# pylint: disable=unbalanced-tuple-unpacking | |
gt_value, _, value_mask = _pad_or_clip_array( | |
np.array(gt_value), | |
sequence_length, | |
is_front_clip=False, | |
output_mask=True, | |
) | |
for modality_type, obs in observations.iteritems(): | |
observations[modality_type], _, mask = _pad_or_clip_array( | |
np.array(obs), sequence_length, is_front_clip=False, output_mask=True) | |
assert np.all(mask == value_mask) | |
return observations, None, (gt_value, value_mask) | |
def reset(self, observation): | |
"""Called after the environment is reset.""" | |
pass | |
def target_loss(self, true_targets, targets, weights=None): | |
"""A loss for training a task model. | |
This loss measures the discrepancy between the task outputs, the true and | |
predicted ones. | |
Args: | |
true_targets: tf.Tensor of tf.float32 with the shape of | |
(batch_size x sequence_length x action_size). | |
targets: tf.Tensor of tf.float32 with the shape of | |
(batch_size x sequence_length x action_size). | |
weights: tf.Tensor of tf.bool with the shape of | |
(batch_size x sequence_length). | |
Raises: | |
ValueError: if the shapes of the input tensors are not consistent. | |
Returns: | |
L2 loss between the predicted action values and true action values. | |
""" | |
targets_shape = targets.get_shape().as_list() | |
true_targets_shape = true_targets.get_shape().as_list() | |
if len(targets_shape) != 3 or len(true_targets_shape) != 3: | |
raise ValueError('invalid shape for targets or true_targets_shape') | |
if np.any(targets_shape != true_targets_shape): | |
raise ValueError('the shape of targets and true_targets are not the same' | |
'{} != {}'.format(targets_shape, true_targets_shape)) | |
if weights is not None: | |
# Filtering targets and true_targets using weights. | |
weights_shape = weights.get_shape().as_list() | |
if np.any(weights_shape != targets_shape[0:2]): | |
raise ValueError('The first two elements of weights shape should match' | |
'target. {} != {}'.format(weights_shape, | |
targets_shape)) | |
true_targets = tf.boolean_mask(true_targets, weights) | |
targets = tf.boolean_mask(targets, weights) | |
return tf.losses.mean_squared_error(tf.reshape(targets, [-1]), | |
tf.reshape(true_targets, [-1])) | |
def reward(self, obs, done, info): | |
raise NotImplementedError('reward is not implemented for this task') | |