File size: 1,458 Bytes
3207519
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
import torch

class ParameterisedPolicy(torch.nn.Module):
    """
    REINFORCE RL agent class. Returns action when the ParameterisedPolicy.act(observation) is used.
    observation is a gym state vector.
    obs_len - length of the state vector
    act_space_len - length of the action vector
    
    """
    def __init__(self, obs_len=8, act_space_len=2):
        super().__init__()
        self.deterministic = False
        self.continuous = True
        self.obs_len = obs_len
        self.act_space_len = act_space_len
        self.lin_1 = torch.nn.Linear(self.obs_len, 256)
        self.rel_1 = torch.nn.ReLU()
        
        self.lin_2 = torch.nn.Linear(256, 128)
        self.rel_2 = torch.nn.ReLU()
            
        self.lin_3 = torch.nn.Linear(128, self.act_space_len)
        
        self.lin_4 = torch.nn.Linear(128, self.act_space_len)
        self.elu = torch.nn.ELU()
        
        
    def forward(self, x):
        x = self.lin_1(x)
        x = self.rel_1(x)
        
        x = self.lin_2(x)
        x = self.rel_2(x)
        
        mu = self.lin_3(x)
        
        x = self.lin_4(x)
        sigma = self.elu(x) + 1.000001
        
        return mu, sigma
    
    def act(self, observation):
        
        (mus, sigmas) = self.forward(torch.tensor(observation, dtype=torch.float32))
        m = torch.distributions.normal.Normal(mus, sigmas)
        action = m.sample().detach().numpy()
        
        return action