rl-fin / test_return.py
bonadio's picture
A2C trading results
da4e4fb
from collections import deque
import numpy as np
returns = deque(maxlen=20)
rewards = [1,1,1,1,1]
n_steps = len(rewards)
for t in range(n_steps)[::-1]:
print("Step=======",t)
disc_return_t = (returns[0] if len(returns)>0 else 0)
print("return",disc_return_t)
print("reward",rewards[t] )
returns.appendleft( 0.95 * disc_return_t +rewards[t] )
print("appended ret",returns )
returns = np.array(returns)
print(returns)