import torch class ParameterisedPolicy(torch.nn.Module): """ REINFORCE RL agent class. Returns action when the ParameterisedPolicy.act(observation) is used. observation is a gym state vector. obs_len - length of the state vector act_space_len - length of the action vector """ def __init__(self, obs_len=8, act_space_len=2): super().__init__() self.deterministic = False self.continuous = True self.obs_len = obs_len self.act_space_len = act_space_len self.lin_1 = torch.nn.Linear(self.obs_len, 256) self.rel_1 = torch.nn.ReLU() self.lin_2 = torch.nn.Linear(256, 128) self.rel_2 = torch.nn.ReLU() self.lin_3 = torch.nn.Linear(128, self.act_space_len) self.lin_4 = torch.nn.Linear(128, self.act_space_len) self.elu = torch.nn.ELU() def forward(self, x): x = self.lin_1(x) x = self.rel_1(x) x = self.lin_2(x) x = self.rel_2(x) mu = self.lin_3(x) x = self.lin_4(x) sigma = self.elu(x) + 1.000001 return mu, sigma def act(self, observation): (mus, sigmas) = self.forward(torch.tensor(observation, dtype=torch.float32)) m = torch.distributions.normal.Normal(mus, sigmas) action = m.sample().detach().numpy() return action