bonadio commited on
Commit
174772e
1 Parent(s): 4fc7953

Using my own version of anytrading

Browse files
__pycache__/trading_env.cpython-38.pyc ADDED
Binary file (6.43 kB). View file
 
fin_rl_PPO_v1.ipynb CHANGED
The diff for this file is too large to render. See raw diff
 
trading_env.py ADDED
@@ -0,0 +1,261 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gym
2
+ from gym import spaces
3
+ from gym.utils import seeding
4
+ import numpy as np
5
+ from enum import Enum
6
+ import matplotlib.pyplot as plt
7
+
8
+
9
+ class Actions(Enum):
10
+ Sell = 0
11
+ Buy = 1
12
+ Do_nothing = 2
13
+
14
+
15
+
16
+ class TradingEnv(gym.Env):
17
+
18
+ metadata = {'render.modes': ['human']}
19
+
20
+ def __init__(self, df, window_size, frame_bound):
21
+ assert df.ndim == 2
22
+
23
+ assert len(frame_bound) == 2
24
+ self.frame_bound = frame_bound
25
+
26
+ self.seed()
27
+ self.df = df
28
+ self.window_size = window_size
29
+ self.prices, self.signal_features = self._process_data()
30
+ self.shape = (window_size, self.signal_features.shape[1])
31
+
32
+ # spaces
33
+ self.action_space = spaces.Discrete(len(Actions))
34
+ self.observation_space = spaces.Box(low=-np.inf, high=np.inf, shape=self.shape, dtype=np.float64)
35
+
36
+ # episode
37
+ self._start_tick = self.window_size
38
+ self._end_tick = len(self.prices) - 1
39
+ self._done = None
40
+ self._current_tick = None
41
+ self._last_trade_tick = None
42
+ self._position = None
43
+ self._position_history = None
44
+ self._total_reward = None
45
+ self._total_profit = None
46
+ self._first_rendering = None
47
+ self.history = None
48
+
49
+ # fees
50
+ self.trade_fee_bid_percent = 0.0005 # unit
51
+ self.trade_fee_ask_percent = 0.0005 # unit
52
+
53
+
54
+ def seed(self, seed=None):
55
+ self.np_random, seed = seeding.np_random(seed)
56
+ return [seed]
57
+
58
+
59
+ def reset(self):
60
+ self._done = False
61
+ self._current_tick = self._start_tick
62
+ self._last_trade_tick = self._current_tick - 1
63
+ self._position = 0
64
+ self._position_history = (self.window_size * [None])
65
+ # self._position_history = (self.window_size * [None]) + [self._position]
66
+ self._total_reward = 0.
67
+ self._total_profit = 0.
68
+ self.history = {}
69
+ return self._get_observation()
70
+
71
+
72
+ def _calculate_reward(self, action):
73
+ step_reward = 0
74
+
75
+ current_price = self.prices[self._current_tick]
76
+ last_price = self.prices[self._current_tick - 1]
77
+ price_diff = current_price - last_price
78
+
79
+ # OPEN BUY - 1
80
+ if action == Actions.Buy.value and self._position == 0:
81
+ self._position = 1
82
+ step_reward += price_diff
83
+ self._last_trade_tick = self._current_tick - 1
84
+ self._position_history.append(1)
85
+
86
+ elif action == Actions.Buy.value and self._position > 0:
87
+ step_reward += 0
88
+ self._position_history.append(-1)
89
+ # CLOSE SELL - 4
90
+ elif action == Actions.Buy.value and self._position < 0:
91
+ self._position = 0
92
+ step_reward += -1 * (self.prices[self._current_tick -1] - self.prices[self._last_trade_tick])
93
+ self._total_profit += step_reward
94
+ self._position_history.append(4)
95
+
96
+ # OPEN SELL - 3
97
+ elif action == Actions.Sell.value and self._position == 0:
98
+ self._position = -1
99
+ step_reward += -1 * price_diff
100
+ self._last_trade_tick = self._current_tick - 1
101
+ self._position_history.append(3)
102
+ # CLOSE BUY - 2
103
+ elif action == Actions.Sell.value and self._position > 0:
104
+ self._position = 0
105
+ step_reward += self.prices[self._current_tick -1] - self.prices[self._last_trade_tick]
106
+ self._total_profit += step_reward
107
+ self._position_history.append(2)
108
+ elif action == Actions.Sell.value and self._position < 0:
109
+ step_reward += 0
110
+ self._position_history.append(-1)
111
+
112
+ # DO NOTHING - 0
113
+ elif action == Actions.Do_nothing.value and self._position > 0:
114
+ step_reward += price_diff
115
+ self._position_history.append(0)
116
+ elif action == Actions.Do_nothing.value and self._position < 0:
117
+ step_reward += -1 * price_diff
118
+ self._position_history.append(0)
119
+ elif action == Actions.Do_nothing.value and self._position == 0:
120
+ step_reward += -1 * abs(price_diff)
121
+ self._position_history.append(0)
122
+
123
+ return step_reward
124
+
125
+
126
+ def step(self, action):
127
+ self._done = False
128
+ self._current_tick += 1
129
+
130
+ if self._current_tick == self._end_tick:
131
+ self._done = True
132
+
133
+ step_reward = self._calculate_reward(action)
134
+ self._total_reward += step_reward
135
+
136
+ observation = self._get_observation()
137
+ info = dict(
138
+ total_reward = self._total_reward,
139
+ total_profit = self._total_profit,
140
+ position = self._position
141
+ )
142
+ self._update_history(info)
143
+
144
+ return observation, step_reward, self._done, info
145
+
146
+
147
+ def _get_observation(self):
148
+ return self.signal_features[(self._current_tick-self.window_size+1):self._current_tick+1]
149
+
150
+
151
+ def _update_history(self, info):
152
+ if not self.history:
153
+ self.history = {key: [] for key in info.keys()}
154
+
155
+ for key, value in info.items():
156
+ self.history[key].append(value)
157
+
158
+
159
+ def render(self, mode='human'):
160
+ window_ticks = np.arange(len(self._position_history))
161
+ plt.plot(self.prices)
162
+
163
+ open_buy = []
164
+ close_buy = []
165
+ open_sell = []
166
+ close_sell = []
167
+ do_nothing = []
168
+
169
+ for i, tick in enumerate(window_ticks):
170
+ if self._position_history[i] is None:
171
+ continue
172
+
173
+ if self._position_history[i] == 1:
174
+ open_buy.append(tick)
175
+ elif self._position_history[i] == 2 :
176
+ close_buy.append(tick)
177
+ elif self._position_history[i] == 3 :
178
+ open_sell.append(tick)
179
+ elif self._position_history[i] == 4 :
180
+ close_sell.append(tick)
181
+ elif self._position_history[i] == 0 :
182
+ do_nothing.append(tick)
183
+
184
+ plt.plot(open_buy, self.prices[open_buy], 'go', marker="^")
185
+ plt.plot(close_buy, self.prices[close_buy], 'go', marker="v")
186
+ plt.plot(open_sell, self.prices[open_sell], 'ro', marker="v")
187
+ plt.plot(close_sell, self.prices[close_sell], 'ro', marker="^")
188
+
189
+ plt.plot(do_nothing, self.prices[do_nothing], 'yo')
190
+
191
+ plt.suptitle(
192
+ "Total Reward: %.6f" % self._total_reward + ' ~ ' +
193
+ "Total Profit: %.6f" % self._total_profit
194
+ )
195
+
196
+
197
+ def close(self):
198
+ plt.close()
199
+
200
+
201
+ def save_rendering(self, filepath):
202
+ plt.savefig(filepath)
203
+
204
+
205
+ def pause_rendering(self):
206
+ plt.show()
207
+
208
+
209
+ def _process_data(self):
210
+ prices = self.df.loc[:, 'Close'].to_numpy()
211
+
212
+ prices[self.frame_bound[0] - self.window_size] # validate index (TODO: Improve validation)
213
+ prices = prices[self.frame_bound[0]-self.window_size:self.frame_bound[1]]
214
+
215
+ diff = np.insert(np.diff(prices), 0, 0)
216
+ signal_features = np.column_stack((prices, diff))
217
+
218
+ return prices, signal_features
219
+
220
+
221
+ def _update_profit(self, action):
222
+ trade = False
223
+ if ((action == Actions.Buy.value and self._position == Positions.Short) or
224
+ (action == Actions.Sell.value and self._position == Positions.Long)):
225
+ trade = True
226
+
227
+ if trade or self._done:
228
+ current_price = self.prices[self._current_tick]
229
+ last_trade_price = self.prices[self._last_trade_tick]
230
+
231
+ if self._position == Positions.Long:
232
+ shares = (self._total_profit * (1 - self.trade_fee_ask_percent)) / last_trade_price
233
+ self._total_profit = (shares * (1 - self.trade_fee_bid_percent)) * current_price
234
+
235
+
236
+ def max_possible_profit(self):
237
+ current_tick = self._start_tick
238
+ last_trade_tick = current_tick - 1
239
+ profit = 1.
240
+
241
+ while current_tick <= self._end_tick:
242
+ position = None
243
+ if self.prices[current_tick] < self.prices[current_tick - 1]:
244
+ while (current_tick <= self._end_tick and
245
+ self.prices[current_tick] < self.prices[current_tick - 1]):
246
+ current_tick += 1
247
+ position = Positions.Short
248
+ else:
249
+ while (current_tick <= self._end_tick and
250
+ self.prices[current_tick] >= self.prices[current_tick - 1]):
251
+ current_tick += 1
252
+ position = Positions.Long
253
+
254
+ if position == Positions.Long:
255
+ current_price = self.prices[current_tick - 1]
256
+ last_trade_price = self.prices[last_trade_tick]
257
+ shares = profit / last_trade_price
258
+ profit = shares * current_price
259
+ last_trade_tick = current_tick - 1
260
+
261
+ return profit