sgoodfriend's picture
PPO playing QbertNoFrameskip-v4 from https://github.com/sgoodfriend/rl-algo-impls/tree/983cb75e43e51cf4ef57f177194ab9a4a1a8808b
a861765
from typing import Sequence, Type
import numpy as np
import torch
import torch.nn as nn
from rl_algo_impls.shared.encoder import EncoderOutDim
from rl_algo_impls.shared.module.utils import mlp
class CriticHead(nn.Module):
def __init__(
self,
in_dim: EncoderOutDim,
hidden_sizes: Sequence[int] = (),
activation: Type[nn.Module] = nn.Tanh,
init_layers_orthogonal: bool = True,
) -> None:
super().__init__()
seq = []
if isinstance(in_dim, tuple):
seq.append(nn.Flatten())
in_channels = int(np.prod(in_dim))
else:
in_channels = in_dim
layer_sizes = (in_channels,) + tuple(hidden_sizes) + (1,)
seq.append(
mlp(
layer_sizes,
activation,
init_layers_orthogonal=init_layers_orthogonal,
final_layer_gain=1.0,
hidden_layer_gain=1.0,
)
)
self._fc = nn.Sequential(*seq)
def forward(self, obs: torch.Tensor) -> torch.Tensor:
v = self._fc(obs)
return v.squeeze(-1)