| import tensorflow as tf |
| from baselines.common.models import get_network_builder |
|
|
|
|
| class Model(object): |
| def __init__(self, name, network='mlp', **network_kwargs): |
| self.name = name |
| self.network_builder = get_network_builder(network)(**network_kwargs) |
|
|
| @property |
| def vars(self): |
| return tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.GLOBAL_VARIABLES, scope=self.name) |
|
|
| @property |
| def trainable_vars(self): |
| return tf.compat.v1.get_collection(tf.compat.v1.GraphKeys.TRAINABLE_VARIABLES, scope=self.name) |
|
|
| @property |
| def perturbable_vars(self): |
| return [var for var in self.trainable_vars if 'LayerNorm' not in var.name] |
|
|
|
|
| class Actor(Model): |
| def __init__(self, nb_actions, name='actor', network='mlp', **network_kwargs): |
| super().__init__(name=name, network=network, **network_kwargs) |
| self.nb_actions = nb_actions |
|
|
| def __call__(self, obs, reuse=False): |
| with tf.compat.v1.variable_scope(self.name, reuse=tf.compat.v1.AUTO_REUSE): |
| x = self.network_builder(obs) |
| x = tf.compat.v1.layers.dense(x, self.nb_actions, kernel_initializer=tf.compat.v1.random_uniform_initializer(minval=-3e-3, maxval=3e-3)) |
| x = tf.nn.tanh(x) |
| return x |
|
|
|
|
| class Critic(Model): |
| def __init__(self, name='critic', network='mlp', **network_kwargs): |
| super().__init__(name=name, network=network, **network_kwargs) |
| self.layer_norm = True |
|
|
| def __call__(self, obs, action, reuse=False): |
| with tf.compat.v1.variable_scope(self.name, reuse=tf.compat.v1.AUTO_REUSE): |
| x = tf.concat([obs, action], axis=-1) |
| x = self.network_builder(x) |
| x = tf.compat.v1.layers.dense(x, 1, kernel_initializer=tf.compat.v1.random_uniform_initializer(minval=-3e-3, maxval=3e-3), name='output') |
| return x |
|
|
| @property |
| def output_vars(self): |
| output_vars = [var for var in self.trainable_vars if 'output' in var.name] |
| return output_vars |
|
|