boomsss commited on
Commit
a213ac9
โ€ข
1 Parent(s): 38de6e8

big changes, shorter code

Browse files
.streamlit/config.toml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [theme]
2
+ base="dark"
3
+ primaryColor="#3399cc"
app.py CHANGED
@@ -5,22 +5,27 @@ from sklearn.metrics import roc_auc_score, precision_score, recall_score
5
  from pandas.tseries.offsets import BDay
6
 
7
  st.set_page_config(
8
- page_title="Gameday Model for $SPX",
9
  page_icon="๐ŸŽฎ"
10
  )
11
 
12
  st.title('๐ŸŽฎ Gameday Model for $SPX')
13
  st.markdown('**PLEASE NOTE:** Model should be run at or after market open. Documentation on the model and its features [can be found here.](https://huggingface.co/spaces/boomsss/gamedayspx/blob/main/README.md)')
14
  with st.form("choose_model"):
 
 
 
 
15
 
16
- option = st.selectbox(
17
- 'Select a model, then run.',
18
- ('', '๐ŸŒž At Open', 'โŒš 30 Mins', 'โณ 60 Mins', '๐Ÿ•ฐ 90 Mins'))
19
  col1, col2 = st.columns(2)
20
- with col1:
21
- submitted = st.form_submit_button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run',use_container_width=True)
22
 
 
 
 
 
 
23
  with col2:
 
24
  cleared = st.form_submit_button('๐Ÿงน Clear All',use_container_width=True)
25
 
26
  if cleared:
@@ -31,10 +36,13 @@ with st.form("choose_model"):
31
 
32
  if submitted:
33
 
34
- if option == '๐ŸŒž At Open':
35
  # runday = st.button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run')
36
  # if runday:
37
  from model_day import *
 
 
 
38
  with st.spinner('Loading data...'):
39
  data, df_final, final_row = get_data()
40
  # st.success("โœ… Historical data")
@@ -83,226 +91,15 @@ with st.form("choose_model"):
83
  new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
84
  new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
85
 
86
- st.success("โœ… All done!")
87
- tab1, tab2, tab3, tab4 = st.tabs(["๐Ÿ”ฎ Prediction", "โœจ New Data", "๐Ÿ—„ Historical", "๐Ÿ“Š Performance"])
88
-
89
- seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
90
-
91
- green_proba = seq_proba[0]
92
- red_proba = 1 - green_proba
93
- do_not_play = (seq_proba[0] > 0.4) and (seq_proba[0] <= 0.6)
94
- stdev = 0.01
95
- score = None
96
- num_obs = None
97
- cond = None
98
- historical_proba = None
99
- text_cond = None
100
- operator = None
101
-
102
- if do_not_play:
103
- text_cond = '๐ŸŸจ'
104
- operator = ''
105
- score = seq_proba[0]
106
- cond = (res1['Predicted'] > 0.4) & (res1['Predicted'] <= 0.6)
107
- num_obs = len(res1.loc[cond])
108
- historical_proba = res1.loc[cond, 'True'].mean()
109
-
110
-
111
- elif green_proba > red_proba:
112
- # If the day is predicted to be green, say so
113
- text_cond = '๐ŸŸฉ'
114
- operator = '>='
115
- score = green_proba
116
- # How many with this score?
117
- cond = (res1['Predicted'] >= green_proba)
118
- num_obs = len(res1.loc[cond])
119
- # How often green?
120
- historical_proba = res1.loc[cond, 'True'].mean()
121
- # print(cond)
122
-
123
- elif green_proba <= red_proba:
124
- # If the day is predicted to be green, say so
125
- text_cond = '๐ŸŸฅ'
126
- operator = '<='
127
- score = red_proba
128
- # How many with this score?
129
- cond = (res1['Predicted'] <= seq_proba[0])
130
- num_obs = len(res1.loc[cond])
131
- # How often green?
132
- historical_proba = 1 - res1.loc[cond, 'True'].mean()
133
- # print(cond)
134
-
135
- score_fmt = f'{score:.1%}'
136
-
137
- results = pd.DataFrame(index=[
138
- 'PrevClose',
139
- 'Confidence Score',
140
- 'Success Rate',
141
- f'NumObs {operator} {"" if do_not_play else score_fmt}',
142
- ], data = [
143
- f"{data.loc[final_row,'Close']:.2f}",
144
- f'{text_cond} {score:.1%}',
145
- f'{historical_proba:.1%}',
146
- num_obs,
147
- ])
148
-
149
- results.columns = ['Outputs']
150
-
151
- # st.subheader('New Prediction')
152
-
153
- int_labels = ['(-โˆž, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, โˆž]']
154
- # df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})
155
-
156
- data['ClosePct'] = (data['Close'] / data['PrevClose']) - 1
157
- data['ClosePct'] = data['ClosePct'].shift(-1)
158
- res1 = res1.merge(data['ClosePct'], left_index=True,right_index=True)
159
- df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum],'ClosePct':[np.mean]})
160
- df_probas.columns = ['PctGreen','NumObs','NumGreen','AvgPerf']
161
- df_probas['AvgPerf'] = df_probas['AvgPerf'].apply(lambda x: f'{x:.2%}')
162
-
163
- roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
164
- precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
165
- recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
166
- len_all = len(res1)
167
-
168
- res2_filtered = res1.loc[(res1['Predicted'] > 0.6) | (res1['Predicted'] <= 0.4)]
169
-
170
- roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values)
171
- precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
172
- recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
173
- len_hi = len(res2_filtered)
174
-
175
- df_performance = pd.DataFrame(
176
- index=[
177
- 'N',
178
- 'ROC AUC',
179
- 'Precision',
180
- 'Recall'
181
- ],
182
- columns = [
183
- 'All',
184
- 'High Confidence'
185
- ],
186
- data = [
187
- [len_all, len_hi],
188
- [roc_auc_score_all, roc_auc_score_hi],
189
- [precision_score_all, precision_score_hi],
190
- [recall_score_all, recall_score_hi]
191
- ]
192
- ).round(2)
193
-
194
- def get_acc(t, p):
195
- if t == False and p <= 0.4:
196
- return 'โœ…'
197
- elif t == True and p > 0.6:
198
- return 'โœ…'
199
- elif t == False and p > 0.6:
200
- return 'โŒ'
201
- elif t == True and p <= 0.4:
202
- return 'โŒ'
203
- else:
204
- return '๐ŸŸจ'
205
-
206
- def get_acc_text(t, p):
207
- if t == False and p <= 0.4:
208
- return 'Correct'
209
- elif t == True and p > 0.6:
210
- return 'Correct'
211
- elif t == False and p > 0.6:
212
- return 'Incorrect'
213
- elif t == True and p <= 0.4:
214
- return 'Incorrect'
215
- else:
216
- return 'No Action'
217
-
218
- perf_daily = res1.copy()
219
- perf_daily['TargetDate'] = perf_daily.index + BDay(1)
220
- perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
221
- perf_daily['AccuracyText'] = [get_acc_text(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
222
- perf_daily['ConfidenceScore'] = [x if x > 0.6 else 1-x if x <= 0.4 else x for x in perf_daily['Predicted']]
223
- perf_daily = perf_daily[['TargetDate','Predicted','True','Accuracy','AccuracyText','ConfidenceScore']]
224
-
225
- def convert_df(df):
226
- # IMPORTANT: Cache the conversion to prevent computation on every rerun
227
- return df.to_csv()
228
-
229
- csv = convert_df(perf_daily)
230
-
231
- check = data.tail(1)
232
-
233
- data['VIX_EM'] = data['Close'] * (data['Close_VIX']/100) * (np.sqrt( 1 ) / np.sqrt(252))
234
- data['VIX_EM_High'] = data['Close'] + data['VIX_EM']
235
- data['VIX_EM_Low'] = data['Close'] - data['VIX_EM']
236
-
237
- data['VIX_EM_125'] = data['VIX_EM'] * 1.25
238
- data['VIX_EM_125_High'] = data['Close'] + data['VIX_EM_125']
239
- data['VIX_EM_125_Low'] = data['Close'] - data['VIX_EM_125']
240
-
241
- data['VIX_EM_15'] = data['VIX_EM'] * 1.5
242
- data['VIX_EM_15_High'] = data['Close'] + data['VIX_EM_15']
243
- data['VIX_EM_15_Low'] = data['Close'] - data['VIX_EM_15']
244
-
245
- data['VIX_EM'] = data['VIX_EM'].shift(1)
246
- data['VIX_EM_High'] = data['VIX_EM_High'].shift(1)
247
- data['VIX_EM_Low'] = data['VIX_EM_Low'].shift(1)
248
-
249
- data['VIX_EM_15'] = data['VIX_EM_15'].shift(1)
250
- data['VIX_EM_15_High'] = data['VIX_EM_15_High'].shift(1)
251
- data['VIX_EM_15_Low'] = data['VIX_EM_15_Low'].shift(1)
252
-
253
- data['VIX_EM_125'] = data['VIX_EM_125'].shift(1)
254
- data['VIX_EM_125_High'] = data['VIX_EM_125_High'].shift(1)
255
- data['VIX_EM_125_Low'] = data['VIX_EM_125_Low'].shift(1)
256
-
257
- df_em = pd.DataFrame(columns=['EM','Low','High','WithinRange','Tested'])
258
- df_em.loc['EM 1X'] = [
259
- data['VIX_EM'].iloc[-1].round(2),
260
- data['VIX_EM_Low'].iloc[-1].round(2),
261
- data['VIX_EM_High'].iloc[-1].round(2),
262
- f"{len(data.query('Close <= VIX_EM_High & Close >= VIX_EM_Low')) / len(data):.1%}",
263
- f"{len(data.query('High > VIX_EM_High | Low < VIX_EM_Low')) / len(data):.1%}"
264
- ]
265
- df_em.loc['EM 1.25X'] = [
266
- data['VIX_EM_125'].iloc[-1].round(2),
267
- data['VIX_EM_125_Low'].iloc[-1].round(2),
268
- data['VIX_EM_125_High'].iloc[-1].round(2),
269
- f"{len(data.query('Close <= VIX_EM_125_High & Close >= VIX_EM_125_Low')) / len(data):.1%}",
270
- f"{len(data.query('High > VIX_EM_125_High | Low < VIX_EM_125_Low')) / len(data):.1%}"
271
- ]
272
- df_em.loc[f"EM 1.5X"] = [
273
- data['VIX_EM_15'].iloc[-1].round(2),
274
- data['VIX_EM_15_Low'].iloc[-1].round(2),
275
- data['VIX_EM_15_High'].iloc[-1].round(2),
276
- f"{len(data.query('Close <= VIX_EM_15_High & Close >= VIX_EM_15_Low')) / len(data):.1%}",
277
- f"{len(data.query('High > VIX_EM_15_High | Low < VIX_EM_15_Low')) / len(data):.1%}"
278
- ]
279
-
280
- with tab1:
281
- st.subheader(f'Pred for {curr_date} as of 6:30AM PST')
282
- st.write(results)
283
- st.write(df_probas)
284
- st.text('VIX EM')
285
- st.write(df_em)
286
- with tab2:
287
- st.subheader('Latest Data for Pred')
288
- st.write(new_pred)
289
- with tab3:
290
- st.subheader('Historical Data')
291
- st.write(df_final)
292
- with tab4:
293
- st.subheader('Performance')
294
- st.write(df_performance)
295
- st.write(perf_daily[['TargetDate','Predicted','True','Accuracy']])
296
- # st.download_button(
297
- # label="Download Historical Performance",
298
- # data=csv,
299
- fname='performance_for_at_open_model.csv'
300
- # )
301
-
302
- elif option == 'โŒš 30 Mins':
303
  # run30 = st.button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run')
304
  # if run30:
305
  from model_30m import *
 
 
 
306
  with st.spinner('Loading data...'):
307
  data, df_final, final_row = get_data()
308
  # st.success("โœ… Historical data")
@@ -361,226 +158,15 @@ with st.form("choose_model"):
361
  new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
362
  new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
363
 
364
- st.success("โœ… All done!")
365
- tab1, tab2, tab3, tab4 = st.tabs(["๐Ÿ”ฎ Prediction", "โœจ New Data", "๐Ÿ—„ Historical", "๐Ÿ“Š Performance"])
366
-
367
- seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
368
-
369
- green_proba = seq_proba[0]
370
- red_proba = 1 - green_proba
371
- do_not_play = (seq_proba[0] > 0.4) and (seq_proba[0] <= 0.6)
372
- stdev = 0.01
373
- score = None
374
- num_obs = None
375
- cond = None
376
- historical_proba = None
377
- text_cond = None
378
- operator = None
379
-
380
- if do_not_play:
381
- text_cond = '๐ŸŸจ'
382
- operator = ''
383
- score = seq_proba[0]
384
- cond = (res1['Predicted'] > 0.4) & (res1['Predicted'] <= 0.6)
385
- num_obs = len(res1.loc[cond])
386
- historical_proba = res1.loc[cond, 'True'].mean()
387
-
388
-
389
- elif green_proba > red_proba:
390
- # If the day is predicted to be green, say so
391
- text_cond = '๐ŸŸฉ'
392
- operator = '>='
393
- score = green_proba
394
- # How many with this score?
395
- cond = (res1['Predicted'] >= green_proba)
396
- num_obs = len(res1.loc[cond])
397
- # How often green?
398
- historical_proba = res1.loc[cond, 'True'].mean()
399
- # print(cond)
400
-
401
- elif green_proba <= red_proba:
402
- # If the day is predicted to be green, say so
403
- text_cond = '๐ŸŸฅ'
404
- operator = '<='
405
- score = red_proba
406
- # How many with this score?
407
- cond = (res1['Predicted'] <= seq_proba[0])
408
- num_obs = len(res1.loc[cond])
409
- # How often green?
410
- historical_proba = 1 - res1.loc[cond, 'True'].mean()
411
- # print(cond)
412
-
413
- score_fmt = f'{score:.1%}'
414
-
415
- results = pd.DataFrame(index=[
416
- 'PrevClose',
417
- 'Confidence Score',
418
- 'Success Rate',
419
- f'NumObs {operator} {"" if do_not_play else score_fmt}',
420
- ], data = [
421
- f"{data.loc[final_row,'Close']:.2f}",
422
- f'{text_cond} {score:.1%}',
423
- f'{historical_proba:.1%}',
424
- num_obs,
425
- ])
426
-
427
- results.columns = ['Outputs']
428
-
429
- # st.subheader('New Prediction')
430
-
431
- int_labels = ['(-โˆž, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, โˆž]']
432
- # df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})
433
-
434
- data['ClosePct'] = (data['Close'] / data['PrevClose']) - 1
435
- data['ClosePct'] = data['ClosePct'].shift(-1)
436
- res1 = res1.merge(data['ClosePct'], left_index=True,right_index=True)
437
- df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum],'ClosePct':[np.mean]})
438
- df_probas.columns = ['PctGreen','NumObs','NumGreen','AvgPerf']
439
- df_probas['AvgPerf'] = df_probas['AvgPerf'].apply(lambda x: f'{x:.2%}')
440
-
441
- roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
442
- precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
443
- recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
444
- len_all = len(res1)
445
-
446
- res2_filtered = res1.loc[(res1['Predicted'] > 0.6) | (res1['Predicted'] <= 0.4)]
447
-
448
- roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values)
449
- precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
450
- recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
451
- len_hi = len(res2_filtered)
452
-
453
- df_performance = pd.DataFrame(
454
- index=[
455
- 'N',
456
- 'ROC AUC',
457
- 'Precision',
458
- 'Recall'
459
- ],
460
- columns = [
461
- 'All',
462
- 'High Confidence'
463
- ],
464
- data = [
465
- [len_all, len_hi],
466
- [roc_auc_score_all, roc_auc_score_hi],
467
- [precision_score_all, precision_score_hi],
468
- [recall_score_all, recall_score_hi]
469
- ]
470
- ).round(2)
471
-
472
- def get_acc(t, p):
473
- if t == False and p <= 0.4:
474
- return 'โœ…'
475
- elif t == True and p > 0.6:
476
- return 'โœ…'
477
- elif t == False and p > 0.6:
478
- return 'โŒ'
479
- elif t == True and p <= 0.4:
480
- return 'โŒ'
481
- else:
482
- return '๐ŸŸจ'
483
-
484
- def get_acc_text(t, p):
485
- if t == False and p <= 0.4:
486
- return 'Correct'
487
- elif t == True and p > 0.6:
488
- return 'Correct'
489
- elif t == False and p > 0.6:
490
- return 'Incorrect'
491
- elif t == True and p <= 0.4:
492
- return 'Incorrect'
493
- else:
494
- return 'No Action'
495
-
496
- perf_daily = res1.copy()
497
- perf_daily['TargetDate'] = perf_daily.index + BDay(1)
498
- perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
499
- perf_daily['AccuracyText'] = [get_acc_text(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
500
- perf_daily['ConfidenceScore'] = [x if x > 0.6 else 1-x if x <= 0.4 else x for x in perf_daily['Predicted']]
501
- perf_daily = perf_daily[['TargetDate','Predicted','True','Accuracy','AccuracyText','ConfidenceScore']]
502
-
503
- def convert_df(df):
504
- # IMPORTANT: Cache the conversion to prevent computation on every rerun
505
- return df.to_csv()
506
-
507
- csv = convert_df(perf_daily)
508
-
509
- check = data.tail(1)
510
-
511
- data['VIX_EM'] = data['Close'] * (data['Close_VIX']/100) * (np.sqrt( 1 ) / np.sqrt(252))
512
- data['VIX_EM_High'] = data['Close'] + data['VIX_EM']
513
- data['VIX_EM_Low'] = data['Close'] - data['VIX_EM']
514
-
515
- data['VIX_EM_125'] = data['VIX_EM'] * 1.25
516
- data['VIX_EM_125_High'] = data['Close'] + data['VIX_EM_125']
517
- data['VIX_EM_125_Low'] = data['Close'] - data['VIX_EM_125']
518
-
519
- data['VIX_EM_15'] = data['VIX_EM'] * 1.5
520
- data['VIX_EM_15_High'] = data['Close'] + data['VIX_EM_15']
521
- data['VIX_EM_15_Low'] = data['Close'] - data['VIX_EM_15']
522
-
523
- data['VIX_EM'] = data['VIX_EM'].shift(1)
524
- data['VIX_EM_High'] = data['VIX_EM_High'].shift(1)
525
- data['VIX_EM_Low'] = data['VIX_EM_Low'].shift(1)
526
-
527
- data['VIX_EM_15'] = data['VIX_EM_15'].shift(1)
528
- data['VIX_EM_15_High'] = data['VIX_EM_15_High'].shift(1)
529
- data['VIX_EM_15_Low'] = data['VIX_EM_15_Low'].shift(1)
530
-
531
- data['VIX_EM_125'] = data['VIX_EM_125'].shift(1)
532
- data['VIX_EM_125_High'] = data['VIX_EM_125_High'].shift(1)
533
- data['VIX_EM_125_Low'] = data['VIX_EM_125_Low'].shift(1)
534
-
535
- df_em = pd.DataFrame(columns=['EM','Low','High','WithinRange','Tested'])
536
- df_em.loc['EM 1X'] = [
537
- data['VIX_EM'].iloc[-1].round(2),
538
- data['VIX_EM_Low'].iloc[-1].round(2),
539
- data['VIX_EM_High'].iloc[-1].round(2),
540
- f"{len(data.query('Close <= VIX_EM_High & Close >= VIX_EM_Low')) / len(data):.1%}",
541
- f"{len(data.query('High > VIX_EM_High | Low < VIX_EM_Low')) / len(data):.1%}"
542
- ]
543
- df_em.loc['EM 1.25X'] = [
544
- data['VIX_EM_125'].iloc[-1].round(2),
545
- data['VIX_EM_125_Low'].iloc[-1].round(2),
546
- data['VIX_EM_125_High'].iloc[-1].round(2),
547
- f"{len(data.query('Close <= VIX_EM_125_High & Close >= VIX_EM_125_Low')) / len(data):.1%}",
548
- f"{len(data.query('High > VIX_EM_125_High | Low < VIX_EM_125_Low')) / len(data):.1%}"
549
- ]
550
- df_em.loc[f"EM 1.5X"] = [
551
- data['VIX_EM_15'].iloc[-1].round(2),
552
- data['VIX_EM_15_Low'].iloc[-1].round(2),
553
- data['VIX_EM_15_High'].iloc[-1].round(2),
554
- f"{len(data.query('Close <= VIX_EM_15_High & Close >= VIX_EM_15_Low')) / len(data):.1%}",
555
- f"{len(data.query('High > VIX_EM_15_High | Low < VIX_EM_15_Low')) / len(data):.1%}"
556
- ]
557
-
558
- with tab1:
559
- st.subheader(f'Pred for {curr_date} as of 7AM PST')
560
- st.write(results)
561
- st.write(df_probas)
562
- st.text('VIX EM')
563
- st.write(df_em)
564
- with tab2:
565
- st.subheader('Latest Data for Pred')
566
- st.write(new_pred)
567
- with tab3:
568
- st.subheader('Historical Data')
569
- st.write(df_final)
570
- with tab4:
571
- st.subheader('Performance')
572
- st.write(df_performance)
573
- st.write(perf_daily[['TargetDate','Predicted','True','Accuracy']])
574
- # st.download_button(
575
- # label="Download Historical Performance",
576
- # data=csv,
577
- fname='performance_for_30m_model.csv'
578
- # )
579
-
580
- elif option == 'โณ 60 Mins':
581
  # run60 = st.button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run')
582
  # if run60:
583
  from model_1h import *
 
 
 
584
  with st.spinner('Loading data...'):
585
  data, df_final, final_row = get_data()
586
  # st.success("โœ… Historical data")
@@ -639,225 +225,15 @@ with st.form("choose_model"):
639
  new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
640
  new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
641
 
642
- st.success("โœ… All done!")
643
- tab1, tab2, tab3, tab4 = st.tabs(["๐Ÿ”ฎ Prediction", "โœจ New Data", "๐Ÿ—„ Historical", "๐Ÿ“Š Performance"])
644
-
645
- seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
646
-
647
- green_proba = seq_proba[0]
648
- red_proba = 1 - green_proba
649
- do_not_play = (seq_proba[0] > 0.4) and (seq_proba[0] <= 0.6)
650
- stdev = 0.01
651
- score = None
652
- num_obs = None
653
- cond = None
654
- historical_proba = None
655
- text_cond = None
656
- operator = None
657
-
658
- if do_not_play:
659
- text_cond = '๐ŸŸจ'
660
- operator = ''
661
- score = seq_proba[0]
662
- cond = (res1['Predicted'] > 0.4) & (res1['Predicted'] <= 0.6)
663
- num_obs = len(res1.loc[cond])
664
- historical_proba = res1.loc[cond, 'True'].mean()
665
-
666
-
667
- elif green_proba > red_proba:
668
- # If the day is predicted to be green, say so
669
- text_cond = '๐ŸŸฉ'
670
- operator = '>='
671
- score = green_proba
672
- # How many with this score?
673
- cond = (res1['Predicted'] >= green_proba)
674
- num_obs = len(res1.loc[cond])
675
- # How often green?
676
- historical_proba = res1.loc[cond, 'True'].mean()
677
- # print(cond)
678
-
679
- elif green_proba <= red_proba:
680
- # If the day is predicted to be green, say so
681
- text_cond = '๐ŸŸฅ'
682
- operator = '<='
683
- score = red_proba
684
- # How many with this score?
685
- cond = (res1['Predicted'] <= seq_proba[0])
686
- num_obs = len(res1.loc[cond])
687
- # How often green?
688
- historical_proba = 1 - res1.loc[cond, 'True'].mean()
689
- # print(cond)
690
-
691
- score_fmt = f'{score:.1%}'
692
-
693
- results = pd.DataFrame(index=[
694
- 'PrevClose',
695
- 'Confidence Score',
696
- 'Success Rate',
697
- f'NumObs {operator} {"" if do_not_play else score_fmt}',
698
- ], data = [
699
- f"{data.loc[final_row,'Close']:.2f}",
700
- f'{text_cond} {score:.1%}',
701
- f'{historical_proba:.1%}',
702
- num_obs,
703
- ])
704
-
705
- results.columns = ['Outputs']
706
-
707
- # st.subheader('New Prediction')
708
- int_labels = ['(-โˆž, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, โˆž]']
709
- # df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})
710
-
711
- data['ClosePct'] = (data['Close'] / data['PrevClose']) - 1
712
- data['ClosePct'] = data['ClosePct'].shift(-1)
713
- res1 = res1.merge(data['ClosePct'], left_index=True,right_index=True)
714
- df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum],'ClosePct':[np.mean]})
715
- df_probas.columns = ['PctGreen','NumObs','NumGreen','AvgPerf']
716
- df_probas['AvgPerf'] = df_probas['AvgPerf'].apply(lambda x: f'{x:.2%}')
717
-
718
- roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
719
- precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
720
- recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
721
- len_all = len(res1)
722
-
723
- res2_filtered = res1.loc[(res1['Predicted'] > 0.6) | (res1['Predicted'] <= 0.4)]
724
-
725
- roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values)
726
- precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
727
- recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
728
- len_hi = len(res2_filtered)
729
-
730
- df_performance = pd.DataFrame(
731
- index=[
732
- 'N',
733
- 'ROC AUC',
734
- 'Precision',
735
- 'Recall'
736
- ],
737
- columns = [
738
- 'All',
739
- 'High Confidence'
740
- ],
741
- data = [
742
- [len_all, len_hi],
743
- [roc_auc_score_all, roc_auc_score_hi],
744
- [precision_score_all, precision_score_hi],
745
- [recall_score_all, recall_score_hi]
746
- ]
747
- ).round(2)
748
-
749
- def get_acc(t, p):
750
- if t == False and p <= 0.4:
751
- return 'โœ…'
752
- elif t == True and p > 0.6:
753
- return 'โœ…'
754
- elif t == False and p > 0.6:
755
- return 'โŒ'
756
- elif t == True and p <= 0.4:
757
- return 'โŒ'
758
- else:
759
- return '๐ŸŸจ'
760
-
761
- def get_acc_text(t, p):
762
- if t == False and p <= 0.4:
763
- return 'Correct'
764
- elif t == True and p > 0.6:
765
- return 'Correct'
766
- elif t == False and p > 0.6:
767
- return 'Incorrect'
768
- elif t == True and p <= 0.4:
769
- return 'Incorrect'
770
- else:
771
- return 'No Action'
772
-
773
- perf_daily = res1.copy()
774
- perf_daily['TargetDate'] = perf_daily.index + BDay(1)
775
- perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
776
- perf_daily['AccuracyText'] = [get_acc_text(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
777
- perf_daily['ConfidenceScore'] = [x if x > 0.6 else 1-x if x <= 0.4 else x for x in perf_daily['Predicted']]
778
- perf_daily = perf_daily[['TargetDate','Predicted','True','Accuracy','AccuracyText','ConfidenceScore']]
779
-
780
- def convert_df(df):
781
- # IMPORTANT: Cache the conversion to prevent computation on every rerun
782
- return df.to_csv()
783
-
784
- csv = convert_df(perf_daily)
785
-
786
- check = data.tail(1)
787
-
788
- data['VIX_EM'] = data['Close'] * (data['Close_VIX']/100) * (np.sqrt( 1 ) / np.sqrt(252))
789
- data['VIX_EM_High'] = data['Close'] + data['VIX_EM']
790
- data['VIX_EM_Low'] = data['Close'] - data['VIX_EM']
791
-
792
- data['VIX_EM_125'] = data['VIX_EM'] * 1.25
793
- data['VIX_EM_125_High'] = data['Close'] + data['VIX_EM_125']
794
- data['VIX_EM_125_Low'] = data['Close'] - data['VIX_EM_125']
795
-
796
- data['VIX_EM_15'] = data['VIX_EM'] * 1.5
797
- data['VIX_EM_15_High'] = data['Close'] + data['VIX_EM_15']
798
- data['VIX_EM_15_Low'] = data['Close'] - data['VIX_EM_15']
799
-
800
- data['VIX_EM'] = data['VIX_EM'].shift(1)
801
- data['VIX_EM_High'] = data['VIX_EM_High'].shift(1)
802
- data['VIX_EM_Low'] = data['VIX_EM_Low'].shift(1)
803
-
804
- data['VIX_EM_15'] = data['VIX_EM_15'].shift(1)
805
- data['VIX_EM_15_High'] = data['VIX_EM_15_High'].shift(1)
806
- data['VIX_EM_15_Low'] = data['VIX_EM_15_Low'].shift(1)
807
-
808
- data['VIX_EM_125'] = data['VIX_EM_125'].shift(1)
809
- data['VIX_EM_125_High'] = data['VIX_EM_125_High'].shift(1)
810
- data['VIX_EM_125_Low'] = data['VIX_EM_125_Low'].shift(1)
811
-
812
- df_em = pd.DataFrame(columns=['EM','Low','High','WithinRange','Tested'])
813
- df_em.loc['EM 1X'] = [
814
- data['VIX_EM'].iloc[-1].round(2),
815
- data['VIX_EM_Low'].iloc[-1].round(2),
816
- data['VIX_EM_High'].iloc[-1].round(2),
817
- f"{len(data.query('Close <= VIX_EM_High & Close >= VIX_EM_Low')) / len(data):.1%}",
818
- f"{len(data.query('High > VIX_EM_High | Low < VIX_EM_Low')) / len(data):.1%}"
819
- ]
820
- df_em.loc['EM 1.25X'] = [
821
- data['VIX_EM_125'].iloc[-1].round(2),
822
- data['VIX_EM_125_Low'].iloc[-1].round(2),
823
- data['VIX_EM_125_High'].iloc[-1].round(2),
824
- f"{len(data.query('Close <= VIX_EM_125_High & Close >= VIX_EM_125_Low')) / len(data):.1%}",
825
- f"{len(data.query('High > VIX_EM_125_High | Low < VIX_EM_125_Low')) / len(data):.1%}"
826
- ]
827
- df_em.loc[f"EM 1.5X"] = [
828
- data['VIX_EM_15'].iloc[-1].round(2),
829
- data['VIX_EM_15_Low'].iloc[-1].round(2),
830
- data['VIX_EM_15_High'].iloc[-1].round(2),
831
- f"{len(data.query('Close <= VIX_EM_15_High & Close >= VIX_EM_15_Low')) / len(data):.1%}",
832
- f"{len(data.query('High > VIX_EM_15_High | Low < VIX_EM_15_Low')) / len(data):.1%}"
833
- ]
834
-
835
- with tab1:
836
- st.subheader(f'Pred for {curr_date} as of 7:30AM PST')
837
- st.write(results)
838
- st.write(df_probas)
839
- st.text('VIX EM')
840
- st.write(df_em)
841
- with tab2:
842
- st.subheader('Latest Data for Pred')
843
- st.write(new_pred)
844
- with tab3:
845
- st.subheader('Historical Data')
846
- st.write(df_final)
847
- with tab4:
848
- st.subheader('Performance')
849
- st.write(df_performance)
850
- st.write(perf_daily[['TargetDate','Predicted','True','Accuracy']])
851
- # st.download_button(
852
- # label="Download Historical Performance",
853
- # data=csv,
854
- fname='performance_for_60m_model.csv'
855
- # )
856
-
857
- elif option == '๐Ÿ•ฐ 90 Mins':
858
  # run60 = st.button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run')
859
  # if run60:
860
  from model_90m import *
 
 
 
861
  with st.spinner('Loading data...'):
862
  data, df_final, final_row = get_data()
863
  # st.success("โœ… Historical data")
@@ -916,220 +292,225 @@ with st.form("choose_model"):
916
  new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
917
  new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
918
 
919
- st.success("โœ… All done!")
920
- tab1, tab2, tab3, tab4 = st.tabs(["๐Ÿ”ฎ Prediction", "โœจ New Data", "๐Ÿ—„ Historical", "๐Ÿ“Š Performance"])
921
-
922
- seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
923
-
924
- green_proba = seq_proba[0]
925
- red_proba = 1 - green_proba
926
- do_not_play = (seq_proba[0] > 0.4) and (seq_proba[0] <= 0.6)
927
- stdev = 0.01
928
- score = None
929
- num_obs = None
930
- cond = None
931
- historical_proba = None
932
- text_cond = None
933
- operator = None
934
-
935
- if do_not_play:
936
- text_cond = '๐ŸŸจ'
937
- operator = ''
938
- score = seq_proba[0]
939
- cond = (res1['Predicted'] > 0.4) & (res1['Predicted'] <= 0.6)
940
- num_obs = len(res1.loc[cond])
941
- historical_proba = res1.loc[cond, 'True'].mean()
942
-
943
-
944
- elif green_proba > red_proba:
945
- # If the day is predicted to be green, say so
946
- text_cond = '๐ŸŸฉ'
947
- operator = '>='
948
- score = green_proba
949
- # How many with this score?
950
- cond = (res1['Predicted'] >= green_proba)
951
- num_obs = len(res1.loc[cond])
952
- # How often green?
953
- historical_proba = res1.loc[cond, 'True'].mean()
954
- # print(cond)
955
-
956
- elif green_proba <= red_proba:
957
- # If the day is predicted to be green, say so
958
- text_cond = '๐ŸŸฅ'
959
- operator = '<='
960
- score = red_proba
961
- # How many with this score?
962
- cond = (res1['Predicted'] <= seq_proba[0])
963
- num_obs = len(res1.loc[cond])
964
- # How often green?
965
- historical_proba = 1 - res1.loc[cond, 'True'].mean()
966
- # print(cond)
967
-
968
- score_fmt = f'{score:.1%}'
969
-
970
- results = pd.DataFrame(index=[
971
- 'PrevClose',
972
- 'Confidence Score',
973
- 'Success Rate',
974
- f'NumObs {operator} {"" if do_not_play else score_fmt}',
975
- ], data = [
976
- f"{data.loc[final_row,'Close']:.2f}",
977
- f'{text_cond} {score:.1%}',
978
- f'{historical_proba:.1%}',
979
- num_obs,
980
- ])
981
-
982
- results.columns = ['Outputs']
983
-
984
- # st.subheader('New Prediction')
985
- int_labels = ['(-โˆž, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, โˆž]']
986
- # df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})
987
 
988
- data['ClosePct'] = (data['Close'] / data['PrevClose']) - 1
989
- data['ClosePct'] = data['ClosePct'].shift(-1)
990
- res1 = res1.merge(data['ClosePct'], left_index=True,right_index=True)
991
- df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum],'ClosePct':[np.mean]})
992
- df_probas.columns = ['PctGreen','NumObs','NumGreen','AvgPerf']
993
- df_probas['AvgPerf'] = df_probas['AvgPerf'].apply(lambda x: f'{x:.2%}')
994
-
995
- roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
996
- precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
997
- recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
998
- len_all = len(res1)
999
-
1000
- res2_filtered = res1.loc[(res1['Predicted'] > 0.6) | (res1['Predicted'] <= 0.4)]
1001
-
1002
- roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values)
1003
- precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
1004
- recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
1005
- len_hi = len(res2_filtered)
1006
-
1007
- df_performance = pd.DataFrame(
1008
- index=[
1009
- 'N',
1010
- 'ROC AUC',
1011
- 'Precision',
1012
- 'Recall'
1013
- ],
1014
- columns = [
1015
- 'All',
1016
- 'High Confidence'
1017
- ],
1018
- data = [
1019
- [len_all, len_hi],
1020
- [roc_auc_score_all, roc_auc_score_hi],
1021
- [precision_score_all, precision_score_hi],
1022
- [recall_score_all, recall_score_hi]
1023
- ]
1024
- ).round(2)
1025
-
1026
- def get_acc(t, p):
1027
- if t == False and p <= 0.4:
1028
- return 'โœ…'
1029
- elif t == True and p > 0.6:
1030
- return 'โœ…'
1031
- elif t == False and p > 0.6:
1032
- return 'โŒ'
1033
- elif t == True and p <= 0.4:
1034
- return 'โŒ'
1035
- else:
1036
- return '๐ŸŸจ'
1037
-
1038
- def get_acc_text(t, p):
1039
- if t == False and p <= 0.4:
1040
- return 'Correct'
1041
- elif t == True and p > 0.6:
1042
- return 'Correct'
1043
- elif t == False and p > 0.6:
1044
- return 'Incorrect'
1045
- elif t == True and p <= 0.4:
1046
- return 'Incorrect'
1047
- else:
1048
- return 'No Action'
1049
-
1050
- perf_daily = res1.copy()
1051
- perf_daily['TargetDate'] = perf_daily.index + BDay(1)
1052
- perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
1053
- perf_daily['AccuracyText'] = [get_acc_text(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
1054
- perf_daily['ConfidenceScore'] = [x if x > 0.6 else 1-x if x <= 0.4 else x for x in perf_daily['Predicted']]
1055
- perf_daily = perf_daily[['TargetDate','Predicted','True','Accuracy','AccuracyText','ConfidenceScore']]
1056
-
1057
- def convert_df(df):
1058
- # IMPORTANT: Cache the conversion to prevent computation on every rerun
1059
- return df.to_csv()
1060
-
1061
- csv = convert_df(perf_daily)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1062
 
1063
- check = data.tail(1)
1064
-
1065
- data['VIX_EM'] = data['Close'] * (data['Close_VIX']/100) * (np.sqrt( 1 ) / np.sqrt(252))
1066
- data['VIX_EM_High'] = data['Close'] + data['VIX_EM']
1067
- data['VIX_EM_Low'] = data['Close'] - data['VIX_EM']
1068
-
1069
- data['VIX_EM_125'] = data['VIX_EM'] * 1.25
1070
- data['VIX_EM_125_High'] = data['Close'] + data['VIX_EM_125']
1071
- data['VIX_EM_125_Low'] = data['Close'] - data['VIX_EM_125']
1072
-
1073
- data['VIX_EM_15'] = data['VIX_EM'] * 1.5
1074
- data['VIX_EM_15_High'] = data['Close'] + data['VIX_EM_15']
1075
- data['VIX_EM_15_Low'] = data['Close'] - data['VIX_EM_15']
1076
-
1077
- data['VIX_EM'] = data['VIX_EM'].shift(1)
1078
- data['VIX_EM_High'] = data['VIX_EM_High'].shift(1)
1079
- data['VIX_EM_Low'] = data['VIX_EM_Low'].shift(1)
1080
-
1081
- data['VIX_EM_15'] = data['VIX_EM_15'].shift(1)
1082
- data['VIX_EM_15_High'] = data['VIX_EM_15_High'].shift(1)
1083
- data['VIX_EM_15_Low'] = data['VIX_EM_15_Low'].shift(1)
1084
-
1085
- data['VIX_EM_125'] = data['VIX_EM_125'].shift(1)
1086
- data['VIX_EM_125_High'] = data['VIX_EM_125_High'].shift(1)
1087
- data['VIX_EM_125_Low'] = data['VIX_EM_125_Low'].shift(1)
1088
-
1089
- df_em = pd.DataFrame(columns=['EM','Low','High','WithinRange','Tested'])
1090
- df_em.loc['EM 1X'] = [
1091
- data['VIX_EM'].iloc[-1].round(2),
1092
- data['VIX_EM_Low'].iloc[-1].round(2),
1093
- data['VIX_EM_High'].iloc[-1].round(2),
1094
- f"{len(data.query('Close <= VIX_EM_High & Close >= VIX_EM_Low')) / len(data):.1%}",
1095
- f"{len(data.query('High > VIX_EM_High | Low < VIX_EM_Low')) / len(data):.1%}"
1096
- ]
1097
- df_em.loc['EM 1.25X'] = [
1098
- data['VIX_EM_125'].iloc[-1].round(2),
1099
- data['VIX_EM_125_Low'].iloc[-1].round(2),
1100
- data['VIX_EM_125_High'].iloc[-1].round(2),
1101
- f"{len(data.query('Close <= VIX_EM_125_High & Close >= VIX_EM_125_Low')) / len(data):.1%}",
1102
- f"{len(data.query('High > VIX_EM_125_High | Low < VIX_EM_125_Low')) / len(data):.1%}"
1103
- ]
1104
- df_em.loc[f"EM 1.5X"] = [
1105
- data['VIX_EM_15'].iloc[-1].round(2),
1106
- data['VIX_EM_15_Low'].iloc[-1].round(2),
1107
- data['VIX_EM_15_High'].iloc[-1].round(2),
1108
- f"{len(data.query('Close <= VIX_EM_15_High & Close >= VIX_EM_15_Low')) / len(data):.1%}",
1109
- f"{len(data.query('High > VIX_EM_15_High | Low < VIX_EM_15_Low')) / len(data):.1%}"
1110
- ]
1111
 
1112
- with tab1:
1113
- st.subheader(f'Pred for {curr_date} as of 8AM PST')
1114
- st.write(results)
1115
- st.write(df_probas)
1116
- st.text('VIX EM')
1117
- st.write(df_em)
1118
- with tab2:
1119
- st.subheader('Latest Data for Pred')
1120
- st.write(new_pred)
1121
- with tab3:
1122
- st.subheader('Historical Data')
1123
- st.write(df_final)
1124
- with tab4:
1125
- st.subheader('Performance')
1126
- st.write(df_performance)
1127
- st.write(perf_daily[['TargetDate','Predicted','True','Accuracy']])
1128
- # st.download_button(
1129
- # label="Download Historical Performance",
1130
- # data=csv,
1131
- fname='performance_for_90m_model.csv'
1132
- # )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1133
 
1134
  if submitted:
1135
  st.download_button(
 
5
  from pandas.tseries.offsets import BDay
6
 
7
  st.set_page_config(
8
+ page_title="Gameday $SPX",
9
  page_icon="๐ŸŽฎ"
10
  )
11
 
12
  st.title('๐ŸŽฎ Gameday Model for $SPX')
13
  st.markdown('**PLEASE NOTE:** Model should be run at or after market open. Documentation on the model and its features [can be found here.](https://huggingface.co/spaces/boomsss/gamedayspx/blob/main/README.md)')
14
  with st.form("choose_model"):
15
+ # option = st.selectbox(
16
+ # 'Select a model, then run.',
17
+ # ('', '๐ŸŒž At Open', 'โŒš 30 Mins', 'โณ 60 Mins', '๐Ÿ•ฐ 90 Mins'))
18
+
19
 
 
 
 
20
  col1, col2 = st.columns(2)
 
 
21
 
22
+ with col1:
23
+ option = st.select_slider(
24
+ 'Slide the scale based on PST, then run.',
25
+ ['06:30', '07:00', '07:30', '08:00']
26
+ )
27
  with col2:
28
+ submitted = st.form_submit_button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run',use_container_width=True)
29
  cleared = st.form_submit_button('๐Ÿงน Clear All',use_container_width=True)
30
 
31
  if cleared:
 
36
 
37
  if submitted:
38
 
39
+ if option == '06:30':
40
  # runday = st.button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run')
41
  # if runday:
42
  from model_day import *
43
+
44
+ fname='performance_for_open_model.csv'
45
+
46
  with st.spinner('Loading data...'):
47
  data, df_final, final_row = get_data()
48
  # st.success("โœ… Historical data")
 
91
  new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
92
  new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
93
 
94
+ seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
95
+
96
+ elif option == '07:00':
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
97
  # run30 = st.button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run')
98
  # if run30:
99
  from model_30m import *
100
+
101
+ fname='performance_for_30m_model.csv'
102
+
103
  with st.spinner('Loading data...'):
104
  data, df_final, final_row = get_data()
105
  # st.success("โœ… Historical data")
 
158
  new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
159
  new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
160
 
161
+ seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
162
+
163
+ elif option == '07:30':
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
164
  # run60 = st.button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run')
165
  # if run60:
166
  from model_1h import *
167
+
168
+ fname='performance_for_1h_model.csv'
169
+
170
  with st.spinner('Loading data...'):
171
  data, df_final, final_row = get_data()
172
  # st.success("โœ… Historical data")
 
225
  new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
226
  new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
227
 
228
+ seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
229
+
230
+ elif option == '08:00':
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
  # run60 = st.button('๐Ÿƒ๐Ÿฝโ€โ™‚๏ธ Run')
232
  # if run60:
233
  from model_90m import *
234
+
235
+ fname='performance_for_90m_model.csv'
236
+
237
  with st.spinner('Loading data...'):
238
  data, df_final, final_row = get_data()
239
  # st.success("โœ… Historical data")
 
292
  new_pred['OHLC4_VIX_n1'] = new_pred['OHLC4_VIX_n1'].astype(float)
293
  new_pred['OHLC4_VIX_n2'] = new_pred['OHLC4_VIX_n2'].astype(float)
294
 
295
+ seq_proba = seq_predict_proba(new_pred, xgbr, seq2)
296
+
297
+ st.success(f"All done for {option}!", icon="โœ…")
298
+
299
+ green_proba = seq_proba[0]
300
+ red_proba = 1 - green_proba
301
+ do_not_play = (seq_proba[0] > 0.4) and (seq_proba[0] <= 0.6)
302
+ stdev = 0.01
303
+ score = None
304
+ num_obs = None
305
+ cond = None
306
+ historical_proba = None
307
+ text_cond = None
308
+ operator = None
309
+
310
+ if do_not_play:
311
+ text_cond = '๐ŸŸจ'
312
+ operator = ''
313
+ score = seq_proba[0]
314
+ cond = (res1['Predicted'] > 0.4) & (res1['Predicted'] <= 0.6)
315
+ num_obs = len(res1.loc[cond])
316
+ historical_proba = res1.loc[cond, 'True'].mean()
317
+
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
318
 
319
+ elif green_proba > red_proba:
320
+ # If the day is predicted to be green, say so
321
+ text_cond = '๐ŸŸฉ'
322
+ operator = '>='
323
+ score = green_proba
324
+ # How many with this score?
325
+ cond = (res1['Predicted'] >= green_proba)
326
+ num_obs = len(res1.loc[cond])
327
+ # How often green?
328
+ historical_proba = res1.loc[cond, 'True'].mean()
329
+ # print(cond)
330
+
331
+ elif green_proba <= red_proba:
332
+ # If the day is predicted to be green, say so
333
+ text_cond = '๐ŸŸฅ'
334
+ operator = '<='
335
+ score = red_proba
336
+ # How many with this score?
337
+ cond = (res1['Predicted'] <= seq_proba[0])
338
+ num_obs = len(res1.loc[cond])
339
+ # How often green?
340
+ historical_proba = 1 - res1.loc[cond, 'True'].mean()
341
+ # print(cond)
342
+
343
+ score_fmt = f'{score:.1%}'
344
+
345
+ results = pd.DataFrame(index=[
346
+ 'PrevClose',
347
+ 'Confidence Score',
348
+ 'Success Rate',
349
+ f'NumObs {operator} {"" if do_not_play else score_fmt}',
350
+ ], data = [
351
+ f"{data.loc[final_row,'Close']:.2f}",
352
+ f'{text_cond} {score:.1%}',
353
+ f'{historical_proba:.1%}',
354
+ num_obs,
355
+ ])
356
+
357
+ results.columns = ['Outputs']
358
+
359
+ # st.subheader('New Prediction')
360
+
361
+ int_labels = ['(-โˆž, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, โˆž]']
362
+ # df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})
363
+
364
+ data['ClosePct'] = (data['Close'] / data['PrevClose']) - 1
365
+ data['ClosePct'] = data['ClosePct'].shift(-1)
366
+ res1 = res1.merge(data['ClosePct'], left_index=True,right_index=True)
367
+ df_probas = res1.groupby(pd.cut(res1['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum],'ClosePct':[np.mean]})
368
+ df_probas.columns = ['PctGreen','NumObs','NumGreen','AvgPerf']
369
+ df_probas['AvgPerf'] = df_probas['AvgPerf'].apply(lambda x: f'{x:.2%}')
370
+
371
+ roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)
372
+ precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
373
+ recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5)
374
+ len_all = len(res1)
375
+
376
+ res2_filtered = res1.loc[(res1['Predicted'] > 0.6) | (res1['Predicted'] <= 0.4)]
377
+
378
+ roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values)
379
+ precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
380
+ recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)
381
+ len_hi = len(res2_filtered)
382
+
383
+ df_performance = pd.DataFrame(
384
+ index=[
385
+ 'N',
386
+ 'ROC AUC',
387
+ 'Precision',
388
+ 'Recall'
389
+ ],
390
+ columns = [
391
+ 'All',
392
+ 'High Confidence'
393
+ ],
394
+ data = [
395
+ [len_all, len_hi],
396
+ [roc_auc_score_all, roc_auc_score_hi],
397
+ [precision_score_all, precision_score_hi],
398
+ [recall_score_all, recall_score_hi]
399
+ ]
400
+ ).round(2)
401
+
402
+ def get_acc(t, p):
403
+ if t == False and p <= 0.4:
404
+ return 'โœ…'
405
+ elif t == True and p > 0.6:
406
+ return 'โœ…'
407
+ elif t == False and p > 0.6:
408
+ return 'โŒ'
409
+ elif t == True and p <= 0.4:
410
+ return 'โŒ'
411
+ else:
412
+ return '๐ŸŸจ'
413
 
414
+ def get_acc_text(t, p):
415
+ if t == False and p <= 0.4:
416
+ return 'Correct'
417
+ elif t == True and p > 0.6:
418
+ return 'Correct'
419
+ elif t == False and p > 0.6:
420
+ return 'Incorrect'
421
+ elif t == True and p <= 0.4:
422
+ return 'Incorrect'
423
+ else:
424
+ return 'No Action'
425
+
426
+ perf_daily = res1.copy()
427
+ perf_daily['TargetDate'] = perf_daily.index + BDay(1)
428
+ perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
429
+ perf_daily['AccuracyText'] = [get_acc_text(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]
430
+ perf_daily['ConfidenceScore'] = [x if x > 0.6 else 1-x if x <= 0.4 else x for x in perf_daily['Predicted']]
431
+ perf_daily = perf_daily[['TargetDate','Predicted','True','Accuracy','AccuracyText','ConfidenceScore']]
432
+
433
+ def convert_df(df):
434
+ # IMPORTANT: Cache the conversion to prevent computation on every rerun
435
+ return df.to_csv()
436
+
437
+ csv = convert_df(perf_daily)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
438
 
439
+ tab1, tab2, tab3, tab4 = st.tabs(["๐Ÿ”ฎ Prediction", "โœจ New Data", "๐Ÿ—„ Historical", "๐Ÿ“Š Performance"])
440
+
441
+ check = data.tail(1)
442
+
443
+ data['VIX_EM'] = data['Close'] * (data['Close_VIX']/100) * (np.sqrt( 1 ) / np.sqrt(252))
444
+ data['VIX_EM_High'] = data['Close'] + data['VIX_EM']
445
+ data['VIX_EM_Low'] = data['Close'] - data['VIX_EM']
446
+
447
+ # Tomorrrow's EM and Today's EM
448
+ fwd_em, curr_em = data['VIX_EM'].iloc[-1], data['VIX_EM'].iloc[-2]
449
+
450
+ data['VIX_EM_125'] = data['VIX_EM'] * 1.25
451
+ data['VIX_EM_125_High'] = data['Close'] + data['VIX_EM_125']
452
+ data['VIX_EM_125_Low'] = data['Close'] - data['VIX_EM_125']
453
+
454
+ data['VIX_EM_15'] = data['VIX_EM'] * 1.5
455
+ data['VIX_EM_15_High'] = data['Close'] + data['VIX_EM_15']
456
+ data['VIX_EM_15_Low'] = data['Close'] - data['VIX_EM_15']
457
+
458
+ data['VIX_EM'] = data['VIX_EM'].shift(1)
459
+ data['VIX_EM_High'] = data['VIX_EM_High'].shift(1)
460
+ data['VIX_EM_Low'] = data['VIX_EM_Low'].shift(1)
461
+
462
+ data['VIX_EM_15'] = data['VIX_EM_15'].shift(1)
463
+ data['VIX_EM_15_High'] = data['VIX_EM_15_High'].shift(1)
464
+ data['VIX_EM_15_Low'] = data['VIX_EM_15_Low'].shift(1)
465
+
466
+ data['VIX_EM_125'] = data['VIX_EM_125'].shift(1)
467
+ data['VIX_EM_125_High'] = data['VIX_EM_125_High'].shift(1)
468
+ data['VIX_EM_125_Low'] = data['VIX_EM_125_Low'].shift(1)
469
+
470
+ df_em = pd.DataFrame(columns=['EM','Low','High','WithinRange','Tested'])
471
+ df_em.loc['EM 1X'] = [
472
+ data['VIX_EM'].iloc[-1].round(2),
473
+ data['VIX_EM_Low'].iloc[-1].round(2),
474
+ data['VIX_EM_High'].iloc[-1].round(2),
475
+ f"{len(data.query('Close <= VIX_EM_High & Close >= VIX_EM_Low')) / len(data):.1%}",
476
+ f"{len(data.query('High > VIX_EM_High | Low < VIX_EM_Low')) / len(data):.1%}"
477
+ ]
478
+ df_em.loc['EM 1.25X'] = [
479
+ data['VIX_EM_125'].iloc[-1].round(2),
480
+ data['VIX_EM_125_Low'].iloc[-1].round(2),
481
+ data['VIX_EM_125_High'].iloc[-1].round(2),
482
+ f"{len(data.query('Close <= VIX_EM_125_High & Close >= VIX_EM_125_Low')) / len(data):.1%}",
483
+ f"{len(data.query('High > VIX_EM_125_High | Low < VIX_EM_125_Low')) / len(data):.1%}"
484
+ ]
485
+ df_em.loc[f"EM 1.5X"] = [
486
+ data['VIX_EM_15'].iloc[-1].round(2),
487
+ data['VIX_EM_15_Low'].iloc[-1].round(2),
488
+ data['VIX_EM_15_High'].iloc[-1].round(2),
489
+ f"{len(data.query('Close <= VIX_EM_15_High & Close >= VIX_EM_15_Low')) / len(data):.1%}",
490
+ f"{len(data.query('High > VIX_EM_15_High | Low < VIX_EM_15_Low')) / len(data):.1%}"
491
+ ]
492
+
493
+ with tab1:
494
+ st.subheader(f'{option} on {curr_date}')
495
+ st.write(results)
496
+ st.write(df_probas)
497
+ st.text(f'VIX EM ({curr_em:.2f} / {fwd_em:.2f})')
498
+ st.write(df_em)
499
+ with tab2:
500
+ st.subheader('Latest Data for Pred')
501
+ st.write(new_pred)
502
+ with tab3:
503
+ st.subheader('Historical Data')
504
+ st.write(df_final)
505
+ with tab4:
506
+ st.subheader('Performance')
507
+ st.write(df_performance)
508
+ st.text('Performance last 10 days (download for all)')
509
+ st.write(perf_daily[['TargetDate','Predicted','True','Accuracy']].iloc[-10:])
510
+ # st.download_button(
511
+ # label="Download Historical Performance",
512
+ # data=csv,
513
+ # )
514
 
515
  if submitted:
516
  st.download_button(
mas_analysis.ipynb ADDED
@@ -0,0 +1,954 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stderr",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "Found cached dataset text (C:/Users/WINSTON-ITX/.cache/huggingface/datasets/boomsss___text/boomsss--SPX_full_30min-37ae67efd8a1cc91/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2)\n"
13
+ ]
14
+ }
15
+ ],
16
+ "source": [
17
+ "import pandas as pd\n",
18
+ "import numpy as np\n",
19
+ "import model_day\n",
20
+ "import model_30m\n",
21
+ "import model_1h\n",
22
+ "import model_90m"
23
+ ]
24
+ },
25
+ {
26
+ "cell_type": "code",
27
+ "execution_count": 2,
28
+ "metadata": {},
29
+ "outputs": [
30
+ {
31
+ "name": "stderr",
32
+ "output_type": "stream",
33
+ "text": [
34
+ "getting econ tickers: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 3/3 [00:00<00:00, 3.22it/s]\n",
35
+ "Getting release dates: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 8/8 [00:02<00:00, 3.78it/s]\n",
36
+ "Making indicators: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 8/8 [00:00<00:00, 3996.48it/s]\n",
37
+ "Merging econ data: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 8/8 [00:00<00:00, 888.11it/s]\n",
38
+ "getting econ tickers: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 3/3 [00:00<00:00, 4.14it/s]\n",
39
+ "Getting release dates: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 8/8 [00:01<00:00, 4.32it/s]\n",
40
+ "Making indicators: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 8/8 [00:00<00:00, 7985.35it/s]\n",
41
+ "Found cached dataset text (C:/Users/WINSTON-ITX/.cache/huggingface/datasets/boomsss___text/boomsss--SPX_full_30min-37ae67efd8a1cc91/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2)\n",
42
+ "Merging econ data: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 8/8 [00:00<00:00, 999.03it/s]\n",
43
+ "getting econ tickers: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 3/3 [00:00<00:00, 4.55it/s]\n",
44
+ "Getting release dates: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 8/8 [00:02<00:00, 3.26it/s]\n",
45
+ "Making indicators: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 8/8 [00:00<00:00, 3995.05it/s]\n",
46
+ "Found cached dataset text (C:/Users/WINSTON-ITX/.cache/huggingface/datasets/boomsss___text/boomsss--SPX_full_30min-37ae67efd8a1cc91/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2)\n",
47
+ "Merging econ data: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 8/8 [00:00<00:00, 930.93it/s]\n",
48
+ "getting econ tickers: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 3/3 [00:00<00:00, 5.78it/s]\n",
49
+ "Getting release dates: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 8/8 [00:01<00:00, 5.24it/s]\n",
50
+ "Making indicators: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 8/8 [00:00<00:00, 3996.00it/s]\n",
51
+ "Found cached dataset text (C:/Users/WINSTON-ITX/.cache/huggingface/datasets/boomsss___text/boomsss--SPX_full_30min-37ae67efd8a1cc91/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2)\n",
52
+ "Merging econ data: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 8/8 [00:00<00:00, 999.18it/s]\n"
53
+ ]
54
+ }
55
+ ],
56
+ "source": [
57
+ "_, df_final_day, _ = model_day.get_data()\n",
58
+ "_, df_final_30m, _ = model_30m.get_data()\n",
59
+ "_, df_final_1h, _ = model_1h.get_data()\n",
60
+ "_, df_final_90m, _ = model_90m.get_data()"
61
+ ]
62
+ },
63
+ {
64
+ "cell_type": "code",
65
+ "execution_count": 3,
66
+ "metadata": {},
67
+ "outputs": [
68
+ {
69
+ "name": "stderr",
70
+ "output_type": "stream",
71
+ "text": [
72
+ "LR Model: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 1177/1177 [00:03<00:00, 391.99it/s]\n",
73
+ "d:\\Projects\\gamedayspx\\model_day.py:63: SettingWithCopyWarning: \n",
74
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
75
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
76
+ "\n",
77
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
78
+ " for_merge['RegrModelOut'] = for_merge['RegrModelOut'] > 0\n",
79
+ "CLF Model: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 1077/1077 [00:08<00:00, 120.80it/s]\n",
80
+ "LR Model: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 1177/1177 [00:03<00:00, 367.13it/s]\n",
81
+ "d:\\Projects\\gamedayspx\\model_30m.py:61: SettingWithCopyWarning: \n",
82
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
83
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
84
+ "\n",
85
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
86
+ " for_merge['RegrModelOut'] = for_merge['RegrModelOut'] > 0\n",
87
+ "CLF Model: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 1077/1077 [00:10<00:00, 105.72it/s]\n",
88
+ "LR Model: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 1177/1177 [00:03<00:00, 351.68it/s]\n",
89
+ "d:\\Projects\\gamedayspx\\model_1h.py:60: SettingWithCopyWarning: \n",
90
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
91
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
92
+ "\n",
93
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
94
+ " for_merge['RegrModelOut'] = for_merge['RegrModelOut'] > 0\n",
95
+ "CLF Model: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 1077/1077 [00:10<00:00, 102.81it/s]\n",
96
+ "LR Model: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 1177/1177 [00:03<00:00, 368.34it/s]\n",
97
+ "d:\\Projects\\gamedayspx\\model_90m.py:60: SettingWithCopyWarning: \n",
98
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
99
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
100
+ "\n",
101
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
102
+ " for_merge['RegrModelOut'] = for_merge['RegrModelOut'] > 0\n",
103
+ "CLF Model: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 1077/1077 [00:10<00:00, 106.84it/s]\n"
104
+ ]
105
+ }
106
+ ],
107
+ "source": [
108
+ "res_day, _, _ = model_day.walk_forward_validation_seq(df_final_day.dropna(axis=0), 'Target_clf', 'Target', 100, 1)\n",
109
+ "res_30m, _, _ = model_30m.walk_forward_validation_seq(df_final_30m.dropna(axis=0), 'Target_clf', 'Target', 100, 1)\n",
110
+ "res_1h, _, _ = model_1h.walk_forward_validation_seq(df_final_1h.dropna(axis=0), 'Target_clf', 'Target', 100, 1)\n",
111
+ "res_90m, _, _ = model_90m.walk_forward_validation_seq(df_final_90m.dropna(axis=0), 'Target_clf', 'Target', 100, 1)"
112
+ ]
113
+ },
114
+ {
115
+ "cell_type": "code",
116
+ "execution_count": 6,
117
+ "metadata": {},
118
+ "outputs": [
119
+ {
120
+ "data": {
121
+ "text/html": [
122
+ "<div>\n",
123
+ "<style scoped>\n",
124
+ " .dataframe tbody tr th:only-of-type {\n",
125
+ " vertical-align: middle;\n",
126
+ " }\n",
127
+ "\n",
128
+ " .dataframe tbody tr th {\n",
129
+ " vertical-align: top;\n",
130
+ " }\n",
131
+ "\n",
132
+ " .dataframe thead th {\n",
133
+ " text-align: right;\n",
134
+ " }\n",
135
+ "</style>\n",
136
+ "<table border=\"1\" class=\"dataframe\">\n",
137
+ " <thead>\n",
138
+ " <tr style=\"text-align: right;\">\n",
139
+ " <th></th>\n",
140
+ " <th>True</th>\n",
141
+ " <th>Predicted</th>\n",
142
+ " </tr>\n",
143
+ " <tr>\n",
144
+ " <th>index</th>\n",
145
+ " <th></th>\n",
146
+ " <th></th>\n",
147
+ " </tr>\n",
148
+ " </thead>\n",
149
+ " <tbody>\n",
150
+ " <tr>\n",
151
+ " <th>2019-04-23</th>\n",
152
+ " <td>False</td>\n",
153
+ " <td>0.798423</td>\n",
154
+ " </tr>\n",
155
+ " <tr>\n",
156
+ " <th>2019-04-24</th>\n",
157
+ " <td>False</td>\n",
158
+ " <td>0.235411</td>\n",
159
+ " </tr>\n",
160
+ " <tr>\n",
161
+ " <th>2019-04-25</th>\n",
162
+ " <td>True</td>\n",
163
+ " <td>0.479671</td>\n",
164
+ " </tr>\n",
165
+ " <tr>\n",
166
+ " <th>2019-04-26</th>\n",
167
+ " <td>True</td>\n",
168
+ " <td>0.180924</td>\n",
169
+ " </tr>\n",
170
+ " <tr>\n",
171
+ " <th>2019-04-29</th>\n",
172
+ " <td>True</td>\n",
173
+ " <td>0.457531</td>\n",
174
+ " </tr>\n",
175
+ " <tr>\n",
176
+ " <th>...</th>\n",
177
+ " <td>...</td>\n",
178
+ " <td>...</td>\n",
179
+ " </tr>\n",
180
+ " <tr>\n",
181
+ " <th>2023-07-26</th>\n",
182
+ " <td>False</td>\n",
183
+ " <td>0.863622</td>\n",
184
+ " </tr>\n",
185
+ " <tr>\n",
186
+ " <th>2023-07-27</th>\n",
187
+ " <td>True</td>\n",
188
+ " <td>0.875761</td>\n",
189
+ " </tr>\n",
190
+ " <tr>\n",
191
+ " <th>2023-07-28</th>\n",
192
+ " <td>True</td>\n",
193
+ " <td>0.506219</td>\n",
194
+ " </tr>\n",
195
+ " <tr>\n",
196
+ " <th>2023-07-31</th>\n",
197
+ " <td>False</td>\n",
198
+ " <td>0.273154</td>\n",
199
+ " </tr>\n",
200
+ " <tr>\n",
201
+ " <th>2023-08-01</th>\n",
202
+ " <td>False</td>\n",
203
+ " <td>0.238163</td>\n",
204
+ " </tr>\n",
205
+ " </tbody>\n",
206
+ "</table>\n",
207
+ "<p>1077 rows ร— 2 columns</p>\n",
208
+ "</div>"
209
+ ],
210
+ "text/plain": [
211
+ " True Predicted\n",
212
+ "index \n",
213
+ "2019-04-23 False 0.798423\n",
214
+ "2019-04-24 False 0.235411\n",
215
+ "2019-04-25 True 0.479671\n",
216
+ "2019-04-26 True 0.180924\n",
217
+ "2019-04-29 True 0.457531\n",
218
+ "... ... ...\n",
219
+ "2023-07-26 False 0.863622\n",
220
+ "2023-07-27 True 0.875761\n",
221
+ "2023-07-28 True 0.506219\n",
222
+ "2023-07-31 False 0.273154\n",
223
+ "2023-08-01 False 0.238163\n",
224
+ "\n",
225
+ "[1077 rows x 2 columns]"
226
+ ]
227
+ },
228
+ "execution_count": 6,
229
+ "metadata": {},
230
+ "output_type": "execute_result"
231
+ }
232
+ ],
233
+ "source": [
234
+ "res_day"
235
+ ]
236
+ },
237
+ {
238
+ "cell_type": "code",
239
+ "execution_count": 18,
240
+ "metadata": {},
241
+ "outputs": [],
242
+ "source": [
243
+ "for df in [res_day, res_30m, res_1h, res_90m]:\n",
244
+ " df['HighConfidence'] = [True if x > 0.6 or x <= 0.4 else False for x in df['Predicted']]\n",
245
+ " df['PredDirection'] = df['Predicted'] > 0.5\n",
246
+ " df['Correct'] = df['PredDirection'] == df['True']\n",
247
+ " df['RedDays'] = df['True'] == False\n",
248
+ " df['GreenDays'] = df['True'] == True"
249
+ ]
250
+ },
251
+ {
252
+ "cell_type": "code",
253
+ "execution_count": 20,
254
+ "metadata": {},
255
+ "outputs": [
256
+ {
257
+ "data": {
258
+ "text/html": [
259
+ "<div>\n",
260
+ "<style scoped>\n",
261
+ " .dataframe tbody tr th:only-of-type {\n",
262
+ " vertical-align: middle;\n",
263
+ " }\n",
264
+ "\n",
265
+ " .dataframe tbody tr th {\n",
266
+ " vertical-align: top;\n",
267
+ " }\n",
268
+ "\n",
269
+ " .dataframe thead th {\n",
270
+ " text-align: right;\n",
271
+ " }\n",
272
+ "</style>\n",
273
+ "<table border=\"1\" class=\"dataframe\">\n",
274
+ " <thead>\n",
275
+ " <tr style=\"text-align: right;\">\n",
276
+ " <th></th>\n",
277
+ " <th>True</th>\n",
278
+ " <th>Predicted</th>\n",
279
+ " <th>HighConfidence</th>\n",
280
+ " <th>PredDirection</th>\n",
281
+ " <th>RedDays</th>\n",
282
+ " <th>GreenDays</th>\n",
283
+ " <th>Correct</th>\n",
284
+ " </tr>\n",
285
+ " <tr>\n",
286
+ " <th>index</th>\n",
287
+ " <th></th>\n",
288
+ " <th></th>\n",
289
+ " <th></th>\n",
290
+ " <th></th>\n",
291
+ " <th></th>\n",
292
+ " <th></th>\n",
293
+ " <th></th>\n",
294
+ " </tr>\n",
295
+ " </thead>\n",
296
+ " <tbody>\n",
297
+ " <tr>\n",
298
+ " <th>2019-04-23</th>\n",
299
+ " <td>False</td>\n",
300
+ " <td>0.798423</td>\n",
301
+ " <td>True</td>\n",
302
+ " <td>True</td>\n",
303
+ " <td>True</td>\n",
304
+ " <td>False</td>\n",
305
+ " <td>False</td>\n",
306
+ " </tr>\n",
307
+ " <tr>\n",
308
+ " <th>2019-04-24</th>\n",
309
+ " <td>False</td>\n",
310
+ " <td>0.235411</td>\n",
311
+ " <td>True</td>\n",
312
+ " <td>False</td>\n",
313
+ " <td>True</td>\n",
314
+ " <td>False</td>\n",
315
+ " <td>True</td>\n",
316
+ " </tr>\n",
317
+ " <tr>\n",
318
+ " <th>2019-04-25</th>\n",
319
+ " <td>True</td>\n",
320
+ " <td>0.479671</td>\n",
321
+ " <td>False</td>\n",
322
+ " <td>False</td>\n",
323
+ " <td>False</td>\n",
324
+ " <td>True</td>\n",
325
+ " <td>False</td>\n",
326
+ " </tr>\n",
327
+ " <tr>\n",
328
+ " <th>2019-04-26</th>\n",
329
+ " <td>True</td>\n",
330
+ " <td>0.180924</td>\n",
331
+ " <td>True</td>\n",
332
+ " <td>False</td>\n",
333
+ " <td>False</td>\n",
334
+ " <td>True</td>\n",
335
+ " <td>False</td>\n",
336
+ " </tr>\n",
337
+ " <tr>\n",
338
+ " <th>2019-04-29</th>\n",
339
+ " <td>True</td>\n",
340
+ " <td>0.457531</td>\n",
341
+ " <td>False</td>\n",
342
+ " <td>False</td>\n",
343
+ " <td>False</td>\n",
344
+ " <td>True</td>\n",
345
+ " <td>False</td>\n",
346
+ " </tr>\n",
347
+ " <tr>\n",
348
+ " <th>...</th>\n",
349
+ " <td>...</td>\n",
350
+ " <td>...</td>\n",
351
+ " <td>...</td>\n",
352
+ " <td>...</td>\n",
353
+ " <td>...</td>\n",
354
+ " <td>...</td>\n",
355
+ " <td>...</td>\n",
356
+ " </tr>\n",
357
+ " <tr>\n",
358
+ " <th>2023-07-26</th>\n",
359
+ " <td>False</td>\n",
360
+ " <td>0.863622</td>\n",
361
+ " <td>True</td>\n",
362
+ " <td>True</td>\n",
363
+ " <td>True</td>\n",
364
+ " <td>False</td>\n",
365
+ " <td>False</td>\n",
366
+ " </tr>\n",
367
+ " <tr>\n",
368
+ " <th>2023-07-27</th>\n",
369
+ " <td>True</td>\n",
370
+ " <td>0.875761</td>\n",
371
+ " <td>True</td>\n",
372
+ " <td>True</td>\n",
373
+ " <td>False</td>\n",
374
+ " <td>True</td>\n",
375
+ " <td>True</td>\n",
376
+ " </tr>\n",
377
+ " <tr>\n",
378
+ " <th>2023-07-28</th>\n",
379
+ " <td>True</td>\n",
380
+ " <td>0.506219</td>\n",
381
+ " <td>False</td>\n",
382
+ " <td>True</td>\n",
383
+ " <td>False</td>\n",
384
+ " <td>True</td>\n",
385
+ " <td>True</td>\n",
386
+ " </tr>\n",
387
+ " <tr>\n",
388
+ " <th>2023-07-31</th>\n",
389
+ " <td>False</td>\n",
390
+ " <td>0.273154</td>\n",
391
+ " <td>True</td>\n",
392
+ " <td>False</td>\n",
393
+ " <td>True</td>\n",
394
+ " <td>False</td>\n",
395
+ " <td>True</td>\n",
396
+ " </tr>\n",
397
+ " <tr>\n",
398
+ " <th>2023-08-01</th>\n",
399
+ " <td>False</td>\n",
400
+ " <td>0.238163</td>\n",
401
+ " <td>True</td>\n",
402
+ " <td>False</td>\n",
403
+ " <td>True</td>\n",
404
+ " <td>False</td>\n",
405
+ " <td>True</td>\n",
406
+ " </tr>\n",
407
+ " </tbody>\n",
408
+ "</table>\n",
409
+ "<p>1077 rows ร— 7 columns</p>\n",
410
+ "</div>"
411
+ ],
412
+ "text/plain": [
413
+ " True Predicted HighConfidence PredDirection RedDays \\\n",
414
+ "index \n",
415
+ "2019-04-23 False 0.798423 True True True \n",
416
+ "2019-04-24 False 0.235411 True False True \n",
417
+ "2019-04-25 True 0.479671 False False False \n",
418
+ "2019-04-26 True 0.180924 True False False \n",
419
+ "2019-04-29 True 0.457531 False False False \n",
420
+ "... ... ... ... ... ... \n",
421
+ "2023-07-26 False 0.863622 True True True \n",
422
+ "2023-07-27 True 0.875761 True True False \n",
423
+ "2023-07-28 True 0.506219 False True False \n",
424
+ "2023-07-31 False 0.273154 True False True \n",
425
+ "2023-08-01 False 0.238163 True False True \n",
426
+ "\n",
427
+ " GreenDays Correct \n",
428
+ "index \n",
429
+ "2019-04-23 False False \n",
430
+ "2019-04-24 False True \n",
431
+ "2019-04-25 True False \n",
432
+ "2019-04-26 True False \n",
433
+ "2019-04-29 True False \n",
434
+ "... ... ... \n",
435
+ "2023-07-26 False False \n",
436
+ "2023-07-27 True True \n",
437
+ "2023-07-28 True True \n",
438
+ "2023-07-31 False True \n",
439
+ "2023-08-01 False True \n",
440
+ "\n",
441
+ "[1077 rows x 7 columns]"
442
+ ]
443
+ },
444
+ "execution_count": 20,
445
+ "metadata": {},
446
+ "output_type": "execute_result"
447
+ }
448
+ ],
449
+ "source": [
450
+ "res_day"
451
+ ]
452
+ },
453
+ {
454
+ "cell_type": "code",
455
+ "execution_count": 35,
456
+ "metadata": {},
457
+ "outputs": [],
458
+ "source": [
459
+ "all_res = pd.concat([res_day.drop(columns=['GreenDays','RedDays']), res_30m.drop(columns=['GreenDays','RedDays']), res_90m.drop(columns=['GreenDays','RedDays']), res_1h.drop(columns=['GreenDays','RedDays']), res_day[['GreenDays','RedDays']]], axis=1)"
460
+ ]
461
+ },
462
+ {
463
+ "cell_type": "code",
464
+ "execution_count": 36,
465
+ "metadata": {},
466
+ "outputs": [],
467
+ "source": [
468
+ "all_res1 = all_res[['HighConfidence','PredDirection','Correct','GreenDays','RedDays']]"
469
+ ]
470
+ },
471
+ {
472
+ "cell_type": "code",
473
+ "execution_count": 37,
474
+ "metadata": {},
475
+ "outputs": [
476
+ {
477
+ "data": {
478
+ "text/html": [
479
+ "<div>\n",
480
+ "<style scoped>\n",
481
+ " .dataframe tbody tr th:only-of-type {\n",
482
+ " vertical-align: middle;\n",
483
+ " }\n",
484
+ "\n",
485
+ " .dataframe tbody tr th {\n",
486
+ " vertical-align: top;\n",
487
+ " }\n",
488
+ "\n",
489
+ " .dataframe thead th {\n",
490
+ " text-align: right;\n",
491
+ " }\n",
492
+ "</style>\n",
493
+ "<table border=\"1\" class=\"dataframe\">\n",
494
+ " <thead>\n",
495
+ " <tr style=\"text-align: right;\">\n",
496
+ " <th></th>\n",
497
+ " <th>HighConfidence</th>\n",
498
+ " <th>HighConfidence</th>\n",
499
+ " <th>HighConfidence</th>\n",
500
+ " <th>HighConfidence</th>\n",
501
+ " <th>PredDirection</th>\n",
502
+ " <th>PredDirection</th>\n",
503
+ " <th>PredDirection</th>\n",
504
+ " <th>PredDirection</th>\n",
505
+ " <th>Correct</th>\n",
506
+ " <th>Correct</th>\n",
507
+ " <th>Correct</th>\n",
508
+ " <th>Correct</th>\n",
509
+ " <th>GreenDays</th>\n",
510
+ " <th>RedDays</th>\n",
511
+ " </tr>\n",
512
+ " </thead>\n",
513
+ " <tbody>\n",
514
+ " <tr>\n",
515
+ " <th>2019-04-23</th>\n",
516
+ " <td>True</td>\n",
517
+ " <td>True</td>\n",
518
+ " <td>True</td>\n",
519
+ " <td>True</td>\n",
520
+ " <td>True</td>\n",
521
+ " <td>False</td>\n",
522
+ " <td>True</td>\n",
523
+ " <td>True</td>\n",
524
+ " <td>False</td>\n",
525
+ " <td>True</td>\n",
526
+ " <td>False</td>\n",
527
+ " <td>False</td>\n",
528
+ " <td>False</td>\n",
529
+ " <td>True</td>\n",
530
+ " </tr>\n",
531
+ " <tr>\n",
532
+ " <th>2019-04-24</th>\n",
533
+ " <td>True</td>\n",
534
+ " <td>True</td>\n",
535
+ " <td>True</td>\n",
536
+ " <td>True</td>\n",
537
+ " <td>False</td>\n",
538
+ " <td>False</td>\n",
539
+ " <td>False</td>\n",
540
+ " <td>False</td>\n",
541
+ " <td>True</td>\n",
542
+ " <td>True</td>\n",
543
+ " <td>True</td>\n",
544
+ " <td>True</td>\n",
545
+ " <td>False</td>\n",
546
+ " <td>True</td>\n",
547
+ " </tr>\n",
548
+ " <tr>\n",
549
+ " <th>2019-04-25</th>\n",
550
+ " <td>False</td>\n",
551
+ " <td>True</td>\n",
552
+ " <td>True</td>\n",
553
+ " <td>False</td>\n",
554
+ " <td>False</td>\n",
555
+ " <td>False</td>\n",
556
+ " <td>True</td>\n",
557
+ " <td>True</td>\n",
558
+ " <td>False</td>\n",
559
+ " <td>False</td>\n",
560
+ " <td>True</td>\n",
561
+ " <td>True</td>\n",
562
+ " <td>True</td>\n",
563
+ " <td>False</td>\n",
564
+ " </tr>\n",
565
+ " <tr>\n",
566
+ " <th>2019-04-26</th>\n",
567
+ " <td>True</td>\n",
568
+ " <td>True</td>\n",
569
+ " <td>True</td>\n",
570
+ " <td>True</td>\n",
571
+ " <td>False</td>\n",
572
+ " <td>True</td>\n",
573
+ " <td>True</td>\n",
574
+ " <td>True</td>\n",
575
+ " <td>False</td>\n",
576
+ " <td>True</td>\n",
577
+ " <td>True</td>\n",
578
+ " <td>True</td>\n",
579
+ " <td>True</td>\n",
580
+ " <td>False</td>\n",
581
+ " </tr>\n",
582
+ " <tr>\n",
583
+ " <th>2019-04-29</th>\n",
584
+ " <td>False</td>\n",
585
+ " <td>True</td>\n",
586
+ " <td>True</td>\n",
587
+ " <td>True</td>\n",
588
+ " <td>False</td>\n",
589
+ " <td>False</td>\n",
590
+ " <td>False</td>\n",
591
+ " <td>False</td>\n",
592
+ " <td>False</td>\n",
593
+ " <td>False</td>\n",
594
+ " <td>False</td>\n",
595
+ " <td>False</td>\n",
596
+ " <td>True</td>\n",
597
+ " <td>False</td>\n",
598
+ " </tr>\n",
599
+ " <tr>\n",
600
+ " <th>...</th>\n",
601
+ " <td>...</td>\n",
602
+ " <td>...</td>\n",
603
+ " <td>...</td>\n",
604
+ " <td>...</td>\n",
605
+ " <td>...</td>\n",
606
+ " <td>...</td>\n",
607
+ " <td>...</td>\n",
608
+ " <td>...</td>\n",
609
+ " <td>...</td>\n",
610
+ " <td>...</td>\n",
611
+ " <td>...</td>\n",
612
+ " <td>...</td>\n",
613
+ " <td>...</td>\n",
614
+ " <td>...</td>\n",
615
+ " </tr>\n",
616
+ " <tr>\n",
617
+ " <th>2023-07-26</th>\n",
618
+ " <td>True</td>\n",
619
+ " <td>True</td>\n",
620
+ " <td>True</td>\n",
621
+ " <td>True</td>\n",
622
+ " <td>True</td>\n",
623
+ " <td>True</td>\n",
624
+ " <td>True</td>\n",
625
+ " <td>True</td>\n",
626
+ " <td>False</td>\n",
627
+ " <td>False</td>\n",
628
+ " <td>False</td>\n",
629
+ " <td>False</td>\n",
630
+ " <td>False</td>\n",
631
+ " <td>True</td>\n",
632
+ " </tr>\n",
633
+ " <tr>\n",
634
+ " <th>2023-07-27</th>\n",
635
+ " <td>True</td>\n",
636
+ " <td>True</td>\n",
637
+ " <td>True</td>\n",
638
+ " <td>True</td>\n",
639
+ " <td>True</td>\n",
640
+ " <td>True</td>\n",
641
+ " <td>True</td>\n",
642
+ " <td>True</td>\n",
643
+ " <td>True</td>\n",
644
+ " <td>True</td>\n",
645
+ " <td>True</td>\n",
646
+ " <td>True</td>\n",
647
+ " <td>True</td>\n",
648
+ " <td>False</td>\n",
649
+ " </tr>\n",
650
+ " <tr>\n",
651
+ " <th>2023-07-28</th>\n",
652
+ " <td>False</td>\n",
653
+ " <td>False</td>\n",
654
+ " <td>True</td>\n",
655
+ " <td>True</td>\n",
656
+ " <td>True</td>\n",
657
+ " <td>True</td>\n",
658
+ " <td>True</td>\n",
659
+ " <td>True</td>\n",
660
+ " <td>True</td>\n",
661
+ " <td>True</td>\n",
662
+ " <td>True</td>\n",
663
+ " <td>True</td>\n",
664
+ " <td>True</td>\n",
665
+ " <td>False</td>\n",
666
+ " </tr>\n",
667
+ " <tr>\n",
668
+ " <th>2023-07-31</th>\n",
669
+ " <td>True</td>\n",
670
+ " <td>True</td>\n",
671
+ " <td>True</td>\n",
672
+ " <td>True</td>\n",
673
+ " <td>False</td>\n",
674
+ " <td>False</td>\n",
675
+ " <td>False</td>\n",
676
+ " <td>False</td>\n",
677
+ " <td>True</td>\n",
678
+ " <td>True</td>\n",
679
+ " <td>True</td>\n",
680
+ " <td>True</td>\n",
681
+ " <td>False</td>\n",
682
+ " <td>True</td>\n",
683
+ " </tr>\n",
684
+ " <tr>\n",
685
+ " <th>2023-08-01</th>\n",
686
+ " <td>True</td>\n",
687
+ " <td>True</td>\n",
688
+ " <td>True</td>\n",
689
+ " <td>True</td>\n",
690
+ " <td>False</td>\n",
691
+ " <td>False</td>\n",
692
+ " <td>False</td>\n",
693
+ " <td>False</td>\n",
694
+ " <td>True</td>\n",
695
+ " <td>True</td>\n",
696
+ " <td>True</td>\n",
697
+ " <td>True</td>\n",
698
+ " <td>False</td>\n",
699
+ " <td>True</td>\n",
700
+ " </tr>\n",
701
+ " </tbody>\n",
702
+ "</table>\n",
703
+ "<p>1077 rows ร— 14 columns</p>\n",
704
+ "</div>"
705
+ ],
706
+ "text/plain": [
707
+ " HighConfidence HighConfidence HighConfidence HighConfidence \\\n",
708
+ "2019-04-23 True True True True \n",
709
+ "2019-04-24 True True True True \n",
710
+ "2019-04-25 False True True False \n",
711
+ "2019-04-26 True True True True \n",
712
+ "2019-04-29 False True True True \n",
713
+ "... ... ... ... ... \n",
714
+ "2023-07-26 True True True True \n",
715
+ "2023-07-27 True True True True \n",
716
+ "2023-07-28 False False True True \n",
717
+ "2023-07-31 True True True True \n",
718
+ "2023-08-01 True True True True \n",
719
+ "\n",
720
+ " PredDirection PredDirection PredDirection PredDirection \\\n",
721
+ "2019-04-23 True False True True \n",
722
+ "2019-04-24 False False False False \n",
723
+ "2019-04-25 False False True True \n",
724
+ "2019-04-26 False True True True \n",
725
+ "2019-04-29 False False False False \n",
726
+ "... ... ... ... ... \n",
727
+ "2023-07-26 True True True True \n",
728
+ "2023-07-27 True True True True \n",
729
+ "2023-07-28 True True True True \n",
730
+ "2023-07-31 False False False False \n",
731
+ "2023-08-01 False False False False \n",
732
+ "\n",
733
+ " Correct Correct Correct Correct GreenDays RedDays \n",
734
+ "2019-04-23 False True False False False True \n",
735
+ "2019-04-24 True True True True False True \n",
736
+ "2019-04-25 False False True True True False \n",
737
+ "2019-04-26 False True True True True False \n",
738
+ "2019-04-29 False False False False True False \n",
739
+ "... ... ... ... ... ... ... \n",
740
+ "2023-07-26 False False False False False True \n",
741
+ "2023-07-27 True True True True True False \n",
742
+ "2023-07-28 True True True True True False \n",
743
+ "2023-07-31 True True True True False True \n",
744
+ "2023-08-01 True True True True False True \n",
745
+ "\n",
746
+ "[1077 rows x 14 columns]"
747
+ ]
748
+ },
749
+ "execution_count": 37,
750
+ "metadata": {},
751
+ "output_type": "execute_result"
752
+ }
753
+ ],
754
+ "source": [
755
+ "all_res1"
756
+ ]
757
+ },
758
+ {
759
+ "cell_type": "code",
760
+ "execution_count": 38,
761
+ "metadata": {},
762
+ "outputs": [],
763
+ "source": [
764
+ "all_res1.columns = [\n",
765
+ " 'HighConfidence_day',\n",
766
+ " 'HighConfidence_30m',\n",
767
+ " 'HighConfidence_1h',\n",
768
+ " 'HighConfidence_90m',\n",
769
+ " 'PredDirection_day',\n",
770
+ " 'PredDirection_30m',\n",
771
+ " 'PredDirection_1h',\n",
772
+ " 'PredDirection_90m',\n",
773
+ " 'Correct_day',\n",
774
+ " 'Correct_30m',\n",
775
+ " 'Correct_1h',\n",
776
+ " 'Correct_90m',\n",
777
+ " 'GreenDays',\n",
778
+ " 'RedDays'\n",
779
+ "]"
780
+ ]
781
+ },
782
+ {
783
+ "cell_type": "code",
784
+ "execution_count": 41,
785
+ "metadata": {},
786
+ "outputs": [
787
+ {
788
+ "data": {
789
+ "text/plain": [
790
+ "0.8133333333333334"
791
+ ]
792
+ },
793
+ "execution_count": 41,
794
+ "metadata": {},
795
+ "output_type": "execute_result"
796
+ }
797
+ ],
798
+ "source": [
799
+ "# When all models pred green, how often was it green?\n",
800
+ "all_res1.query('''\n",
801
+ " PredDirection_day == True & PredDirection_30m == True & PredDirection_1h == True & PredDirection_90m == True\n",
802
+ "''')['GreenDays'].sum() / len(all_res1.query('''\n",
803
+ " PredDirection_day == True & PredDirection_30m == True & PredDirection_1h == True & PredDirection_90m == True\n",
804
+ "'''))"
805
+ ]
806
+ },
807
+ {
808
+ "cell_type": "code",
809
+ "execution_count": 42,
810
+ "metadata": {},
811
+ "outputs": [
812
+ {
813
+ "data": {
814
+ "text/plain": [
815
+ "0.8638297872340426"
816
+ ]
817
+ },
818
+ "execution_count": 42,
819
+ "metadata": {},
820
+ "output_type": "execute_result"
821
+ }
822
+ ],
823
+ "source": [
824
+ "# When all models pred red, how often was it red?\n",
825
+ "all_res1.query('''\n",
826
+ " PredDirection_day == False & PredDirection_30m == False & PredDirection_1h == False & PredDirection_90m == False\n",
827
+ "''')['RedDays'].sum() / len(all_res1.query('''\n",
828
+ " PredDirection_day == False & PredDirection_30m == False & PredDirection_1h == False & PredDirection_90m == False\n",
829
+ "'''))"
830
+ ]
831
+ },
832
+ {
833
+ "cell_type": "code",
834
+ "execution_count": 57,
835
+ "metadata": {},
836
+ "outputs": [
837
+ {
838
+ "name": "stdout",
839
+ "output_type": "stream",
840
+ "text": [
841
+ "0.8508474576271187\n",
842
+ "251\n",
843
+ "295\n"
844
+ ]
845
+ }
846
+ ],
847
+ "source": [
848
+ "# When all models are pred green with high confidendce, how often was it green?\n",
849
+ "print(all_res1.query('''\n",
850
+ " HighConfidence_day == True & HighConfidence_30m == True & HighConfidence_1h == True & HighConfidence_90m == True & \\\n",
851
+ " PredDirection_day == True & PredDirection_30m == True & PredDirection_1h == True & PredDirection_90m == True\n",
852
+ "''')['GreenDays'].sum() / len(all_res1.query('''\n",
853
+ " HighConfidence_day == True & HighConfidence_30m == True & HighConfidence_1h == True & HighConfidence_90m == True & \\\n",
854
+ " PredDirection_day == True & PredDirection_30m == True & PredDirection_1h == True & PredDirection_90m == True\n",
855
+ " ''')))\n",
856
+ "\n",
857
+ "print(all_res1.query('''\n",
858
+ " HighConfidence_day == True & HighConfidence_30m == True & HighConfidence_1h == True & HighConfidence_90m == True & \\\n",
859
+ " PredDirection_day == True & PredDirection_30m == True & PredDirection_1h == True & PredDirection_90m == True\n",
860
+ "''')['GreenDays'].sum())\n",
861
+ "\n",
862
+ "print(len(all_res1.query('''\n",
863
+ " HighConfidence_day == True & HighConfidence_30m == True & HighConfidence_1h == True & HighConfidence_90m == True & \\\n",
864
+ " PredDirection_day == True & PredDirection_30m == True & PredDirection_1h == True & PredDirection_90m == True\n",
865
+ " ''')))\n"
866
+ ]
867
+ },
868
+ {
869
+ "cell_type": "code",
870
+ "execution_count": 56,
871
+ "metadata": {},
872
+ "outputs": [
873
+ {
874
+ "name": "stdout",
875
+ "output_type": "stream",
876
+ "text": [
877
+ "0.9090909090909091\n",
878
+ "150\n",
879
+ "165\n"
880
+ ]
881
+ }
882
+ ],
883
+ "source": [
884
+ "# When all models are pred red with high confidendce, how often was it red?\n",
885
+ "print(all_res1.query('''\n",
886
+ " HighConfidence_day == True & HighConfidence_30m == True & HighConfidence_1h == True & HighConfidence_90m == True & \\\n",
887
+ " PredDirection_day == False & PredDirection_30m == False & PredDirection_1h == False & PredDirection_90m == False\n",
888
+ "''')['RedDays'].sum() / len(all_res1.query('''\n",
889
+ " HighConfidence_day == True & HighConfidence_30m == True & HighConfidence_1h == True & HighConfidence_90m == True & \\\n",
890
+ " PredDirection_day == False & PredDirection_30m == False & PredDirection_1h == False & PredDirection_90m == False\n",
891
+ " ''')))\n",
892
+ "\n",
893
+ "print(all_res1.query('''\n",
894
+ " HighConfidence_day == True & HighConfidence_30m == True & HighConfidence_1h == True & HighConfidence_90m == True & \\\n",
895
+ " PredDirection_day == False & PredDirection_30m == False & PredDirection_1h == False & PredDirection_90m == False\n",
896
+ " ''')['RedDays'].sum())\n",
897
+ "\n",
898
+ "print(len(all_res1.query('''\n",
899
+ " HighConfidence_day == True & HighConfidence_30m == True & HighConfidence_1h == True & HighConfidence_90m == True & \\\n",
900
+ " PredDirection_day == False & PredDirection_30m == False & PredDirection_1h == False & PredDirection_90m == False\n",
901
+ " ''')))"
902
+ ]
903
+ },
904
+ {
905
+ "cell_type": "code",
906
+ "execution_count": 59,
907
+ "metadata": {},
908
+ "outputs": [
909
+ {
910
+ "data": {
911
+ "text/plain": [
912
+ "0.4271123491179202"
913
+ ]
914
+ },
915
+ "execution_count": 59,
916
+ "metadata": {},
917
+ "output_type": "execute_result"
918
+ }
919
+ ],
920
+ "source": [
921
+ "(165 + 295) / 1077"
922
+ ]
923
+ },
924
+ {
925
+ "cell_type": "code",
926
+ "execution_count": null,
927
+ "metadata": {},
928
+ "outputs": [],
929
+ "source": []
930
+ }
931
+ ],
932
+ "metadata": {
933
+ "kernelspec": {
934
+ "display_name": "py39",
935
+ "language": "python",
936
+ "name": "python3"
937
+ },
938
+ "language_info": {
939
+ "codemirror_mode": {
940
+ "name": "ipython",
941
+ "version": 3
942
+ },
943
+ "file_extension": ".py",
944
+ "mimetype": "text/x-python",
945
+ "name": "python",
946
+ "nbconvert_exporter": "python",
947
+ "pygments_lexer": "ipython3",
948
+ "version": "3.9.12"
949
+ },
950
+ "orig_nbformat": 4
951
+ },
952
+ "nbformat": 4,
953
+ "nbformat_minor": 2
954
+ }
research_hod_lod.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
troubleshoot_day_model.ipynb ADDED
@@ -0,0 +1,707 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "cells": [
3
+ {
4
+ "cell_type": "code",
5
+ "execution_count": 1,
6
+ "metadata": {},
7
+ "outputs": [
8
+ {
9
+ "name": "stderr",
10
+ "output_type": "stream",
11
+ "text": [
12
+ "Found cached dataset text (C:/Users/WINSTON-ITX/.cache/huggingface/datasets/boomsss___text/boomsss--SPX_full_30min-37ae67efd8a1cc91/0.0.0/cb1e9bd71a82ad27976be3b12b407850fe2837d80c22c5e03a28949843a8ace2)\n"
13
+ ]
14
+ }
15
+ ],
16
+ "source": [
17
+ "import pandas as pd\n",
18
+ "import numpy as np\n",
19
+ "from model_day import get_data, walk_forward_validation_seq\n",
20
+ "import xgboost as xgb"
21
+ ]
22
+ },
23
+ {
24
+ "cell_type": "code",
25
+ "execution_count": 2,
26
+ "metadata": {},
27
+ "outputs": [
28
+ {
29
+ "name": "stderr",
30
+ "output_type": "stream",
31
+ "text": [
32
+ "getting econ tickers: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 3/3 [00:01<00:00, 2.62it/s]\n",
33
+ "Getting release dates: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 8/8 [00:02<00:00, 3.85it/s]\n",
34
+ "Making indicators: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 8/8 [00:00<00:00, 2664.95it/s]\n",
35
+ "Merging econ data: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 8/8 [00:00<00:00, 999.15it/s]\n"
36
+ ]
37
+ }
38
+ ],
39
+ "source": [
40
+ "data, df_final, final_row = get_data()"
41
+ ]
42
+ },
43
+ {
44
+ "cell_type": "code",
45
+ "execution_count": 3,
46
+ "metadata": {},
47
+ "outputs": [],
48
+ "source": [
49
+ "data['ClosePct'] = (data['Close'] / data['PrevClose']) - 1\n",
50
+ "data['HighPct'] = (data['High'] / data['PrevClose']) - 1\n",
51
+ "data['LowPct'] = (data['Low'] / data['PrevClose']) - 1\n",
52
+ "data['ClosePct'] = data['ClosePct'].shift(-1)"
53
+ ]
54
+ },
55
+ {
56
+ "cell_type": "code",
57
+ "execution_count": 4,
58
+ "metadata": {},
59
+ "outputs": [
60
+ {
61
+ "name": "stderr",
62
+ "output_type": "stream",
63
+ "text": [
64
+ "LR Model: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 1178/1178 [00:03<00:00, 385.55it/s]\n",
65
+ "d:\\Projects\\gamedayspx\\model_day.py:63: SettingWithCopyWarning: \n",
66
+ "A value is trying to be set on a copy of a slice from a DataFrame.\n",
67
+ "Try using .loc[row_indexer,col_indexer] = value instead\n",
68
+ "\n",
69
+ "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n",
70
+ " for_merge['RegrModelOut'] = for_merge['RegrModelOut'] > 0\n",
71
+ "CLF Model: 100%|โ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆโ–ˆ| 1078/1078 [00:09<00:00, 119.55it/s]\n"
72
+ ]
73
+ }
74
+ ],
75
+ "source": [
76
+ "res1, model1, model2 = walk_forward_validation_seq(df_final.dropna(axis=0), 'Target_clf', 'Target', 100, 1)"
77
+ ]
78
+ },
79
+ {
80
+ "cell_type": "code",
81
+ "execution_count": 5,
82
+ "metadata": {},
83
+ "outputs": [
84
+ {
85
+ "data": {
86
+ "text/plain": [
87
+ "<AxesSubplot:title={'center':'Feature importance'}, xlabel='F score', ylabel='Features'>"
88
+ ]
89
+ },
90
+ "execution_count": 5,
91
+ "metadata": {},
92
+ "output_type": "execute_result"
93
+ },
94
+ {
95
+ "data": {
96
+ "image/png": "iVBORw0KGgoAAAANSUhEUgAAAikAAAEWCAYAAACjVwf7AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMiwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8qNh9FAAAACXBIWXMAAAsTAAALEwEAmpwYAAAn6klEQVR4nO3deZhU1b3u8e/LKIgIiBIVjaIQmQniQFTSERVxwCHEKTnilERjYjTOiSJmMsY4BnO8SriiJqARVI56DF6xnY0iAg5RYgQDaGQQUFCxG373j727U912Qyl010Lez/P0Q+21p9+uaq2311q7ShGBmZmZWWqalLoAMzMzs7o4pJiZmVmSHFLMzMwsSQ4pZmZmliSHFDMzM0uSQ4qZmZklySHFzJD0U0ljSl2HmVkh+XNSzNaPpLlAJ2B1QXO3iHh7PY95WkT8v/WrbuMjaRSwa0R8p9S1mFlpuSfFbMM4PCLaFPx87oCyIUhqVsrzf14ba91m1jAcUswaiKQtJf1R0juSFkj6paSm+bpdJE2VtETSYkl/ktQuX3c7sCPwP5JWSLpAUpmk+bWOP1fSAfnjUZLulnSHpPeBk9Z2/jpqHSXpjvzxTpJC0smS5klaKul0SXtImiVpmaTRBfueJOkpSaMlLZf0mqTBBeu3kzRZ0nuS3pD03VrnLaz7dOCnwLH5tc/MtztZ0t8lfSDpTUnfLzhGmaT5ks6VtDC/3pML1reSdLWkt/L6npTUKl+3t6Sn82uaKansc7zUZtZAHFLMGs6tQCWwK/BV4CDgtHydgCuA7YDuwA7AKICI+C/gX/ynd+a3RZ7vCOBuoB3wp3Wcvxh7AV2BY4HrgJ8BBwA9gWMkfb3Wtv8EOgKXAZMkdcjXTQDm59c6HPi1pP3rqfuPwK+BO/Nr75tvsxA4DGgLnAxcK6l/wTG+BGwJbA+cCtwoqX2+7nfA7sDXgA7ABcAaSdsDDwC/zNvPAyZK2vozPEdm1oAcUsw2jHvzv8aXSbpXUifgEODsiFgZEQuBa4HjACLijYh4OCJWRcQi4Brg6/UfvijPRMS9EbGG7M283vMX6RcR8XFETAFWAuMjYmFELACeIAs+VRYC10VERUTcCbwOHCppB2Af4ML8WDOAMcCJddUdER/VVUhEPBAR/4zMY8AUYL+CTSqAn+fnfxBYAXxFUhPgFODHEbEgIlZHxNMRsQr4DvBgRDyYn/thYFr+vJlZAjz+a7ZhHFk4yVXSnkBz4B1JVc1NgHn5+k7A9WRvtFvk65auZw3zCh5/eW3nL9K7BY8/qmO5TcHygqg5C/8tsp6T7YD3IuKDWusG1FN3nSQNJeuh6UZ2Ha2Blwo2WRIRlQXLH+b1dQQ2I+vlqe3LwLckHV7Q1hx4dF31mFnjcEgxaxjzgFVAx1pvnlV+DQTQOyLek3QkMLpgfe3b7laSvTEDkM8tqT0sUbjPus6/oW0vSQVBZUdgMvA20EHSFgVBZUdgQcG+ta+1xrKklsBEst6X+yKiQtK9ZENm67IY+BjYBZhZa9084PaI+O6n9jKzJHi4x6wBRMQ7ZEMSV0tqK6lJPlm2akhnC7IhieX53Ijzax3iXaBLwfJsYDNJh0pqDlwCtFyP829o2wBnSWou6Vtk82wejIh5wNPAFZI2k9SHbM7IHWs51rvATvlQDUALsmtdBFTmvSoHFVNUPvQ1Frgmn8DbVNLAPPjcARwuaUjevlk+CbfzZ798M2sIDilmDedEsjfYV8mGcu4Gts3XXQ70B5aTTd6cVGvfK4BL8jku50XEcuAHZPM5FpD1rMxn7dZ2/g3tb2STbBcDvwKGR8SSfN3xwE5kvSr3AJet4/Nf/pL/u0TS9LwH5izgLrLrOIGsl6ZY55ENDT0PvAdcCTTJA9QRZHcTLSLrWTkf/3/RLBn+MDczWy+STiL74Ll9S12LmX2x+C8GMzMzS5JDipmZmSXJwz1mZmaWJPekmJmZWZL8OSkJadeuXey6666lLqNOK1euZPPNNy91GZ+Sal2Qbm2p1gXp1pZqXZBubY1Z1wsvvLA4Ivx1Bl9ADikJ6dSpE9OmTSt1GXUqLy+nrKys1GV8Sqp1Qbq1pVoXpFtbqnVBurU1Zl2S3mqUE1mj83CPmZmZJckhxczMzJLkkGJmZmZJckgxMzOzJDmkmJmZWZIcUszMzCxJDilmZmaWJIcUMzMzS5JDipmZmSXJIcXMzMyS5JBiZmZmSXJIMTMzsyQ5pJiZmVmSHFLMzMwsSQ4pZmZmliSHFDMzM0uSQ4qZmZklySHFzMzMkuSQYmZmZklySDEzM7MkOaSYmZlZkhxSzMzMLEkOKWZmZpYkhxQzMzNLkkOKmZmZJckhxczMzJLkkGJmZmZJckgxMzOzJDmkmJmZWZIcUszMzCxJDilmZmaWJIcUMzMzS5JDipmZmSXJIcXMzMyS5JBiZmZmSXJIMTMzsyQ5pJiZmVmSHFLMzMwsSQ4pZmZmliSHFDMzM0uSQ4qZmZklySHFzMzMkuSQYmZmZklySDEzM7MkOaSYmZlZkhxSzMzMLEkOKWZmZpYkhxQzMzNLkkOKmZmZJckhxczMzJLkkGJmZmZJckgxMzOzJDmkmJmZWZIcUszMzCxJDilmZmaWJIcUMzMzS5JDipmZmSXJIcXMzMyS5JBiZmZmSXJIMTMzsyQ5pJiZmVmSHFLMzMwsSQ4pZmZmliSHFDMzM0uSIqLUNVhuxy67RpNjri91GXU6t3clV7/UrNRlfEqqdUG6taVaF6RbW6p1QVq1zf3NodWPy8vLKSsra5TzSnohIgY0ysmsUbknxczMzJLkkGJmZmZJckgxM7MvPElfkTSj4Od9SWfX2qa9pHskzZL0nKReBesOlvS6pDckXVTQ/sO8LSR1rOO8e0iqlDS8oG2EpH/kPyPyti1q1bdY0nX5umsL2mdLWlZwrIckLZN0f63zDpY0Pd/nSUm7Fqw7RtKrkl6R9OeC9tUF55lczLHy9d/Mr39ArfYdJa2QdF6xr0FtDRZSCi72ZUn/I6ndBjx2SLqjYLmZpEW1X6QijjO3rl+q+raR1FnSffkv1j8lXS+pRRHn+elnqcvMzDasiHg9IvpFRD9gd+BD4J5am/0UmBERfYATgesBJDUFbgSGAj2A4yX1yPd5CjgAeKv2OfP9rgSmFLR1AC4D9gL2BC6T1D4iPqiqL6/xLWBSXvs5Be2/r2rPXQX8Vx2X/N/At/N9/gxckp+/K3AxsE9E9ATOLtjno4Iahq3rWPnxtgB+DPytjhquAf63aqHI16CGhuxJqbrYXsB7wJnre0BJVbPDVgK9JLXKlw8EFqzv8ddxbpH9YtwbEV2BbkAb4FdF7O6QYmaWjsHAPyOidrDoAUwFiIjXgJ0kdSILE29ExJsR8QkwATgi3+7FiJhbz3l+BEwEFha0DQEejoj3ImIp8DBwcOFOkroB2wBP1HHM44HxVQsR8QjwQR3bBdA2f7wl8Hb++LvAjfm5iYiFdexb7LEAfkEWxD6udQ1HAnOAV+o5Zn2vQQ2NNdzzDLA9gKRd8u6pFyQ9IWm3gvZnJb0k6ZeSVuTtZfl2k4FXC475IFA1lbzGiyapg6R78y67ZyX1ydu3kjQl7+IaA6hgn+/k3XszJP2fPAEX2h/4OCL+L0BErAbOAU6R1FrSSZJGFxzv/rz23wCt8uP+af2fSjMzW0/HUfCeUWAmcDSApD2BLwOdyd6/5hVsNz9vq5ek7YGjyHohChVzrOOAO6PW7beSvgzsTB6k1uE04EFJ88l6Wn6Tt3cDukl6Kn9/LAxIm0malrcfua5jSeoP7BARD9Sqsw1wIXD5Wuqr7zWoocHvW8vf7AcDf8ybbgZOj4h/SNoL+ANZALgeuD4ixks6vdZh+gO9ImJOQdsEYGQ+xNMHGAvsl6+7HHgxIo6UtD9wG9CPrIvtyYj4uaRDgVPzGrsDx5J1f1VI+gPw7Xy/Kj2BFwqLioj3Jf0LqDE+V2ubiyT9MO/equv5+R7wPYCOHbdmZO/K+g5VUp1aZbc6pibVuiDd2lKtC9KtLdW6IK3aysvLqx+vWLGixnIq8iH6YWRDHrX9Brhe0gzgJeBFYPXnPNV1wIURsSbriP9MjqPuIZzjgLvzP5LX5RzgkIj4m6TzyYZeTiN73+8KlJEFsMcl9Y6IZcCXI2KBpC7AVEkvRcQ/6zpW/t51DXBSHeceBVwbESvquvZ1vAY1NGRIaZW/0NsDfwceztPV14C/FBTeMv93IHBk/vjPwO8KjvVcrYBCRMyStBNZL8qDtc69L/DNfLupeQ9KW2AQeUqOiAckLc23H0w2PvZ8XlcranbPNZiIuJksuLFjl10jlc87qC2lz2IolGpdkG5tqdYF6daWal2QVm1zv11W/bgxPyflMxoKTI+Id2uviIj3gZOheoh/DvAm2XvCDgWbdmbdUwwGABPy95SOwCGSKvP9ymodq7xqQVJfoFlE1PijOHccRUydkLQ10DciquaJ3Ak8lD+eD/wtIiqAOZJmk4WW5yNiAUBEvCmpHPiqpPfrOdYWQC+gPL/GLwGTJQ0jm28zXNJvgXbAGkkfR0TVaEO9r0FtDT4nhay7TGRPbBNgWeHkoIjoXsSxVtbTPpkszKyzy2gdBIwrqOkrETGq1javkgWZ/+yUBZ8dgTeASmo+n5utZ01mZrbh1ZgeUEhSu4KbIU4DHs+Dy/NAV0k75+uPI3v/qVdE7BwRO0XETsDdwA8i4l7gr8BByu4kag8clLettb58akR7sukT67IU2DKf2wLZvM2/54/vJQ9J+U0h3YA383paFrTvQ/a+V+exImJ5RHQsuMZngWERMS0i9itovw74dUFAqfca69Lgc1Ii4kPgLOBcspm8cyR9C7KkmqdGyC7wm/nj44o8/Fjg8oh4qVb7E2TDNUgqAxbnv2iPAyfk7UPJXnCAR8hS3zb5ug752F+hR4DWkk7Mt2kKXA3cml/jXKCfpCaSdiCbaFWlQlLzIq/JzMwagKTNyd5kJxW0nV4wxaA78LKk18n+2v8xQERUAj8kCxN/B+6KiFfy/c/K52p0Bmbl8x3rFRHvkU02fT7/+XneVuUY6n4DPw6YUMc8lSeAvwCDJc2XNCSv97vAREkzyYaOzs93+SuwRNKrwKPA+RGxJL/2afn2jwK/iYhX13Gsz6yu12BtGqWPMCJelDSLLD19G/hvSZcAzcnmlswkuw3qDkk/I+tKWl7EcecDN9SxahQwNj/nh8CIvP1yYLykV4CngX/lx3k1r2eKpCZABVnPT/Ws44gISUcBf5B0KVnAe5D/3LnzFFnX4Ktkv8TTC+q5meyXd3pEfHtd12VmZhteRKwEtqrVdlPB42fIehbq2vdBPj21gIi4gbrfhwq3OanW8liyP7Lr2rZLPe2j6mnfr572e6jj9t485Pwk/ylsfxro/VmOVWubsnraR9Va/tRrsDYNFlIiok2t5cMLFg/m0xYAe+dh4DjgK/l+5RSM19V17Nrb5an0yDq2WULWtVZXvXeSjbXVbt+p4PE84PDa2+Trgrz3po51F5LNdDYzM7MipTHbKrM7MDqfrLQMOKW05ZiZmVkpJRNSIuIJoO86N/wCa9W8Ka8XfItoSsrLy2vM3E9FqnVBurWlWhekW1uqdUHatZmtL393j5mZmSXJIcXMzMyS5JBiZmZmSXJIMTMzsyQ5pJiZmVmSHFLMzMwsSQ4pZmZmliSHFDMzM0uSQ4qZmZklqaiQImmXgq9wLsu/9bFdg1ZmZmZmm7Rie1ImAqsl7Ur2jb47AH9usKrMzMxsk1dsSFkTEZXAUcDvI+J8YNuGK8vMzMw2dcWGlApJxwMjgPvztuYNU5KZmZlZ8SHlZGAg8KuImCNpZ+D2hivLzMzMNnXNitkoIl6VdCGwY748B7iyIQszMzOzTVuxd/ccDswAHsqX+0ma3IB1mZmZ2Sau2OGeUcCewDKAiJgBdGmQiszMzMz4DBNnI2J5rbY1G7oYMzMzsypFzUkBXpF0AtBUUlfgLODphivLzMzMNnXF9qT8COgJrCL7ELflwNkNVJOZmZnZuntSJDUFHoiIbwA/a/iSzMzMzIroSYmI1cAaSVs2Qj1mZmZmQPFzUlYAL0l6GFhZ1RgRZzVIVWZmZrbJKzakTMp/zMzMzBpFsZ84O66hCzEzMzMrVFRIkTQHiNrtEeEPdDMzM7MGUexwz4CCx5sB3wI6bPhyzMzMzDJFfU5KRCwp+FkQEdcBhzZsaWZmZrYpK3a4p3/BYhOynpVie2HMzMzMPrNig8bVBY8rgTnAMRu+HDMzM7NMsSHl1Ih4s7BB0s4NUI+ZmZkZUPx399xdZJuZmZnZBrHWnhRJu5F9seCWko4uWNWW7C4fMzMzswaxruGerwCHAe2AwwvaPwC+20A1mZmZma09pETEfcB9kgZGxDONVJOZmZlZ0RNnX5R0JtnQT/UwT0Sc0iBVmZmZ2Sav2ImztwNfAoYAjwGdyYZ8zMzMzBpEsSFl14i4FFiZf9ngocBeDVeWmZmZbeqKDSkV+b/LJPUCtgS2aZiSzMzMzIqfk3KzpPbApcBkoA0wssGqMjMzs01eUSElIsbkDx8DujRcOWZmZmaZooZ7JHWS9EdJ/5sv95B0asOWZmZmZpuyYuek3Ar8FdguX54NnN0A9ZiZmZkBxYeUjhFxF7AGICIqgdUNVpWZmZlt8ooNKSslbQUEgKS9geUNVpWZmZlt8oq9u+cnZHf17CLpKWBrYHiDVWVmZmabvHV9C/KOEfGviJgu6etkXzgo4PWIqFjbvmZmZmbrY13DPfcWPL4zIl6JiJcdUMzMzKyhrSukqOCxPx/FzMzMGs26QkrU89jMzMysQa1r4mxfSe+T9ai0yh+TL0dEtG3Q6szMzGyTtdaQEhFNG6sQMzMzs0LFfk6KmZmZWaNySDEzM7MkOaSYmZlZkhxSzMzMLEkOKWZmZpakYr+7xxrBRxWr2emiB0pdRp3O7V3JSQnWlmpd0LC1zf3NoQ1yXDOzlLgnxczMzJLkkGJmZmZJckgxsw3q448/Zs8996Rv37707NmTyy67rM7t7rrrLnr06EHPnj054YQTqtvHjRtH165d6dq1K+PGjQPggw8+oF+/ftU/HTt25OyzzwbgrbfeYvDgwfTp04eysjLmz59ffawLL7yQXr160atXL+68887q9qlTp9K/f3969erFiBEjqKysBOC+++6jT58+9OvXjwEDBvDkk08C8MYbbzBw4EB69uxJnz59ahyryllnnUWbNm2ql88555zqert160a7du3WWdcjjzxC//796devH/vuuy9vvPEGALfeeitbb7119fHGjBlTvc9NN91Ez5496d69O2eddRYRNb/BZNiwYfTq1avO18AsdcnNSZH0JeA6YA9gGfAucHZEzG6k85cBn0TE0wVt3wEuAJoClcDzwHkRsawxajLbmLRs2ZKpU6fSpk0bKioq2HfffRk6dCh777139Tb/+Mc/uOKKK3jqqado3749CxcuBOD999/n8ssvZ9q0aUhi9913Z9iwYbRv354ZM2ZU77/77rtz9NFHA3Deeedx4oknMmLECKZOncrFF1/M7bffzgMPPMD06dOZMWMGq1atoqysjKFDh9KmTRtGjBjBI488Qrdu3Rg5ciTjxo3j1FNPZfDgwQwbNgxJzJo1i2OOOYbXXnuNli1bctttt9G1a1fefvttdt99d4YMGVIdPKZNm8bSpUtrPA/XXntt9ePf//73vPjiiwD11tW2bVvOOOMM7rvvPrp3784f/vAHfvnLX3LrrbcCcOyxxzJ69Oga53j66ad5+eWXmTVrFgD77rsvjz32GGVlZQBMmjSpRnAy29gk1ZMiScA9QHlE7BIRuwMXA52K3L/p2paLVAZ8reAYBwPnAEMjoifQH3i62JrMNjWSqt8YKyoqqKioIPtP+z9uueUWzjzzTNq3bw/ANttsA8Dzzz/PgQceSIcOHWjfvj0HHnggDz30UI19Z8+ezcKFC9lvv/0AePXVV9l///0B+MY3vsF9991X3T5o0CCaNWvG5ptvTp8+fXjooYdYsmQJLVq0oFu3bgAceOCBTJw4EYA2bdpU17py5crqxzvssANdu3YFYLvttmObbbZh0aJFAKxevZrzzz+f3/72t/U+J+PHj+f4449fa11Vz93772dfkbZ8+XK22267dT7Xn3zyCZ988gmrVq2ioqKCTp2y/zWtWLGCa665hksuuWStxzBLWVIhBfgGUBERN1U1RMRMoKmk+6vaJI2WdFL+eK6kKyVNB75Vx/JBkp6RNF3SXyS1Kdjv8rz9JUm7SdoJOB04R9IMSfsBPyPrNVmQ17M6IsZGxOv5cUZKel7Sy5JuzoMWksolXZ8f52VJezb802eWhtWrV9OvXz+22WYbDjzwQPbaa68a62fPns3s2bPZZ5992HvvvavfpBcvXswOO+xQvV3nzp1ZsGBBjX0nTJjAscceWx0g+vbty6RJkwC45557+OCDD1iyZAl9+/bloYce4sMPP2Tx4sU8+uijzJs3j44dO1JZWcm0adMAuPvuu5k3b1718e+55x522203Dj30UMaOHfupa3vuuef45JNP2GWXXQAYPXo0w4YNY9ttt63zuXjrrbeYM2dOdZCqry6AMWPGcMghh9C5c2duv/12LrroourjTJw4kT59+jB8+PDq7QcOHMhXv/pVtt12W7bddluGDBlC9+7dAbj00ks599xzad26df0vlFniUhvu6QW88Dn2WxIR/QEk/aZqWVJHYBJwQESslHQh8BPg5/l+i/PtfkAWRE6TdBOwIiJ+lx+vJzB9LeceHRE/z7e9HTgM+J98XeuI6CdpEDA2v74aJH0P+B5Ax45bM7J35ee4/IbXqVV2S21qUq0LGra28vLyz73vihUr1mv/Yl133XWsWLGCSy+9lN12242dd965et27777LkiVLuPzyy1m0aBEnnngiY8eOZdWqVcyZM6e6vjlz5tCyZcsa9Y4dO5aLL764uu3oo4/mhhtuYPTo0fTp04eOHTvyzDPP0KZNG7p3706fPn1o164dXbp0Yc6cOTz22GNccMEFnHLKKVRUVDBgwAA++uij6uO1b9+em266iZkzZ/LDH/6Qq6++uvo5W7JkCeeccw4XXXQRjz/+OIsXL2bMmDFcd911lJeXs3r16k89t+PHj2fgwIE88cQTALRo0aLOusrLyxk5ciS/+MUv6NGjBxMmTOD444/n/PPPp3379owbN44WLVowefJkjjjiCK655hoWLFjAm2++yfjx44Fs6KtTp060bt2a5557jiOOOIJnn32WlStXNsprXqixfs/siy21kPJ51Z7FVrW8N9ADeCr/q6sF8EzBdpPyf18Ajl7XSST1Bm4HtgB+GhF3At+QdAHQGugAvMJ/Qsp4gIh4XFJbSe1qz2OJiJuBmwF27LJrXP1Smi/Jub0rSbG2VOuChq1t7rfLPve+5eXl1XMWGsP06dNZsmQJJ598cnVb37592WuvvTjggAOArAehU6dObL/99ixcuLC6vvHjxzNo0KDq5ZkzZ9KiRQu+//3v1zjH8OHDgeyNcbfdduOwww4DqHGdJ5xwAocccghlZWWUlZVx5plnAjBlypTquSGFysrKuP766+nVqxcvv/wy/fv3p6ysjGuuuab6fA888ACLFi3i1FNPBWDVqlWcdtpp1RNeIZtAe+ONN/K1r32txrFr19WzZ08WLFjAD37wAwC6dOnCwQcf/Km69ttvPzp06EBZWRlXXXUVvXv3ZujQoUA2XPbxxx/TvHlz5syZw0knnURlZSULFy5k1KhRjRoaGvv3zL6YUhvueQXYvY72SmrWulmt9SvrWRbwcET0y396RMSpBdutyv9dTf2B7RWyeShExEsR0Q/4X6CVpM2APwDDI6I3cEut2qLWsWovm33hLFq0iGXLlgHw0Ucf8fDDD7PbbrvV2ObII4+sfsNcvHgxs2fPpkuXLuyxxx5MmTKFpUuXsnTpUqZMmcKQIUOq9yuc21Fl8eLFrFmzBoArrriCU045BciGnJYsWQLArFmzmDVrFgcddBBA9UTdVatWceWVV3L66acD2V08VXfHTJ8+nVWrVrHVVltRUVHBUUcdxYknnlgdUAAOPfRQ/v3vfzN37lzmzp1L69atawSU1157jaVLlzJw4MDqtvrqat++PcuXL2f27OwegYcffrh66Oadd96p3n/y5MnV7TvuuCMzZ86ksrKSiooKHnvsMbp3784ZZ5zB22+/zdy5c3nyySfp1q2bezVso5Tan6BTgV9L+l7ew4CkPmRho4eklkArYDDwZBHHexa4UdKuEfGGpM2B7ddxp9AHQNuC5SuA30k6IiKq7m1slf9bFUgW53NdhgN3F+x7LPCopH2B5RGxvIiazTZq77zzDiNGjGD16tWsWbOGY445hsMOO4yRI0cyYMAAhg0bxpAhQ5gyZQo9evSgadOmXHXVVWy11Va0bduWSy+9lD322AOAkSNH0qFDh+pj33XXXTz44IM1zldeXs7FF1+MJAYNGsSNN94IZJN2qybXtm3bljvuuINmzbL/5V111VXcf//9rFmzhjPOOKN6vsjEiRO57bbbaN68Oa1ateLOO+9EEuXl5Tz++OMsWbKk+m6bW2+9lX79+q31uZgwYQLHHXdcjYnDa6vrlltu4Zvf/CZNmjShffv21XNibrjhBiZPnkyzZs3o0KFDdQ3Dhw/njjvuoHfv3kji4IMP5vDDD/9Mr5dZylT7nvpSk7Qd2S3IuwMfA3OBs4HvAkcBc4AVwOSIuFXSXGBARCzO96+9vD9wJdAyP8UlETG5cDtJA4DfRUSZpG5kQWMN8KOIeELSCOA8sluQlwEvA5dFxDuSfgkcD/wbmA28FRGjJJUDM4CvA82BUyLiubVd+45ddo0mx1z/+Z64BpbqsEqqdUEDD/esx8fip9wNn2ptqdYF6dbWmHVJeiEiBjTKyaxRJfd/94h4GzimjlUX5D+1t99pHctTyT5zpd79ImIa2a3H5L0sfWptOw4YV0+9lwD13eN3R0ScXc86MzMzW4vU5qSYmZmZAQn2pHxRRERZqWswMzPbmDmkJKRV86a8vh5zDRpSeXn5et322lBSrQvSrs3MbGPg4R4zMzNLkkOKmZmZJckhxczMzJLkkGJmZmZJckgxMzOzJDmkmJmZWZIcUszMzCxJDilmZmaWJIcUMzMzS5JDipmZmSXJIcXMzMyS5JBiZmZmSXJIMTMzsyQ5pJiZmVmSHFLMzMwsSQ4pZmZmliSHFDMzM0uSQ4qZmZklySHFzMzMkuSQYmZmZklySDEzM7MkOaSYmZlZkhxSzMzMLEkOKWZmZpYkhxQzMzNLkkOKmZmZJckhxczMzJLkkGJmZmZJckgxMzOzJDmkmJmZWZIcUszMzCxJDilmZmaWJIcUMzMzS5JDipmZmSXJIcXMzMyS5JBiZmZmSXJIMTMzsyQ5pJiZmVmSHFLMzMwsSQ4pZmZmliSHFDMzM0uSQ4qZmZklySHFzMzMkuSQYmZmZklySDEzM7MkOaSYmZlZkhxSzMzMLEkOKWZmZpYkhxQzMzNLkkOKmZmZJckhxczMzJLkkGJmZmZJckgxMzOzJDmkmJmZWZIcUszMzCxJDilmZmaWJIcUMzMzS5JDipmZmSXJIcXMzMyS5JBiZmZmSXJIMTMzsyQ5pJiZmVmSHFLMzMwsSQ4pZmZmliRFRKlrsJykD4DXS11HPToCi0tdRB1SrQvSrS3VuiDd2lKtC9KtrTHr+nJEbN1I57JG1KzUBVgNr0fEgFIXURdJ01KsLdW6IN3aUq0L0q0t1bog3dpSrcs2Lh7uMTMzsyQ5pJiZmVmSHFLScnOpC1iLVGtLtS5It7ZU64J0a0u1Lki3tlTrso2IJ86amZlZktyTYmZmZklySDEzM7MkOaQkQtLBkl6X9Iaki0pdTxVJYyUtlPRyqWspJGkHSY9KelXSK5J+XOqaACRtJuk5STPzui4vdU21SWoq6UVJ95e6liqS5kp6SdIMSdNKXU8hSe0k3S3pNUl/lzQwgZq+kj9XVT/vSzq71HVVkXRO/vv/sqTxkjYrdU22cfKclARIagrMBg4E5gPPA8dHxKslLQyQNAhYAdwWEb1KXU8VSdsC20bEdElbAC8AR5b6OZMkYPOIWCGpOfAk8OOIeLaUdRWS9BNgANA2Ig4rdT2QhRRgQEQk96FkksYBT0TEGEktgNYRsazEZVXL//+xANgrIt5KoJ7tyX7ve0TER5LuAh6MiFtLW5ltjNyTkoY9gTci4s2I+ASYABxR4poAiIjHgfdKXUdtEfFOREzPH38A/B3YvrRVQWRW5IvN859k/hKQ1Bk4FBhT6lo2BpK2BAYBfwSIiE9SCii5wcA/UwgoBZoBrSQ1A1oDb5e4HttIOaSkYXtgXsHyfBJ4w91YSNoJ+CrwtxKXAlQPp8wAFgIPR0QSdeWuAy4A1pS4jtoCmCLpBUnfK3UxBXYGFgH/Nx8iGyNp81IXVctxwPhSF1ElIhYAvwP+BbwDLI+IKaWtyjZWDim2UZPUBpgInB0R75e6HoCIWB0R/YDOwJ6Skhgmk3QYsDAiXih1LXXYNyL6A0OBM/NhxhQ0A/oD/x0RXwVWAinNGWsBDAP+UupaqkhqT9YTvDOwHbC5pO+UtirbWDmkpGEBsEPBcue8zdYin/MxEfhTREwqdT215cMCjwIHl7iUKvsAw/L5HxOA/SXdUdqSMvlf30TEQuAesiHQFMwH5hf0ht1NFlpSMRSYHhHvlrqQAgcAcyJiUURUAJOAr5W4JttIOaSk4Xmgq6Sd87+MjgMml7impOUTVP8I/D0iril1PVUkbS2pXf64Fdlk6NdKWlQuIi6OiM4RsRPZ79jUiCj5X7iSNs8nP5MPpRwEJHE3WUT8G5gn6St502Cg5BPaCxxPQkM9uX8Be0tqnf93OphszpjZZ+ZvQU5ARFRK+iHwV6ApMDYiXilxWQBIGg+UAR0lzQcui4g/lrYqIOsV+C/gpXz+B8BPI+LB0pUEwLbAuPyOiybAXRGRzK2+ieoE3JO9n9EM+HNEPFTakmr4EfCn/A+IN4GTS1wPUB3oDgS+X+paCkXE3yTdDUwHKoEX8Ufk2+fkW5DNzMwsSR7uMTMzsyQ5pJiZmVmSHFLMzMwsSQ4pZmZmliSHFDMzM0uSb0E22wRIWg28VNB0ZETMLVE5ZmZF8S3IZpsASSsiok0jnq9ZRFQ21vnM7IvJwz1mhqRtJT0uaYaklyXtl7cfLGm6pJmSHsnbOki6V9IsSc9K6pO3j5J0u6SngNvzT9+dKOn5/GefEl6imW2EPNxjtmloVfDJvHMi4qha608A/hoRv8o/Lbe1pK2BW4BBETFHUod828uBFyPiSEn7A7cB/fJ1Pci+LPAjSX8Gro2IJyXtSPaJyt0b7ArN7AvHIcVs0/BR/s3M9XkeGJt/aeO9ETFDUhnweETMAYiI9/Jt9wW+mbdNlbSVpLb5uskR8VH++ACgR/5x9wBtJbWJiBUb6qLM7IvNIcXMiIjHJQ0CDgVulXQNsPRzHGplweMmwN4R8fGGqNHMNj2ek2JmSPoy8G5E3AKMAfoDzwKDJO2cb1M13PME8O28rQxYHBHv13HYKWRfzld1jn4NVL6ZfUG5J8XMIPum6/MlVQArgBMjYpGk7wGTJDUBFpJ96+4osqGhWcCHwIh6jnkWcGO+XTPgceD0Br0KM/tC8S3IZmZmliQP95iZmVmSHFLMzMwsSQ4pZmZmliSHFDMzM0uSQ4qZmZklySHFzMzMkuSQYmZmZkn6/+obwf51JA+xAAAAAElFTkSuQmCC",
97
+ "text/plain": [
98
+ "<Figure size 432x288 with 1 Axes>"
99
+ ]
100
+ },
101
+ "metadata": {
102
+ "needs_background": "light"
103
+ },
104
+ "output_type": "display_data"
105
+ }
106
+ ],
107
+ "source": [
108
+ "xgb.plot_importance(model2, importance_type='gain')"
109
+ ]
110
+ },
111
+ {
112
+ "cell_type": "code",
113
+ "execution_count": 6,
114
+ "metadata": {},
115
+ "outputs": [],
116
+ "source": [
117
+ "from sklearn.metrics import roc_auc_score, precision_score, recall_score\n",
118
+ "\n",
119
+ "# st.subheader('New Prediction')\n",
120
+ "\n",
121
+ "# df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})\n",
122
+ "df_probas = res1.groupby(pd.cut(res1['Predicted'],[-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf])).agg({'True':[np.mean,len,np.sum]})\n",
123
+ "df_probas.columns = ['PctGreen','NumObs','NumGreen']\n",
124
+ "\n",
125
+ "roc_auc_score_all = roc_auc_score(res1['True'].astype(int), res1['Predicted'].values)\n",
126
+ "precision_score_all = precision_score(res1['True'].astype(int), res1['Predicted'] > 0.5)\n",
127
+ "recall_score_all = recall_score(res1['True'].astype(int), res1['Predicted'] > 0.5)\n",
128
+ "len_all = len(res1)\n",
129
+ "\n",
130
+ "res2_filtered = res1.loc[(res1['Predicted'] > 0.625) | (res1['Predicted'] <= 0.375)]\n",
131
+ "\n",
132
+ "roc_auc_score_hi = roc_auc_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'].values)\n",
133
+ "precision_score_hi = precision_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)\n",
134
+ "recall_score_hi = recall_score(res2_filtered['True'].astype(int), res2_filtered['Predicted'] > 0.5)\n",
135
+ "len_hi = len(res2_filtered)\n",
136
+ "\n",
137
+ "df_performance = pd.DataFrame(\n",
138
+ " index=[\n",
139
+ " 'N',\n",
140
+ " 'ROC AUC',\n",
141
+ " 'Precision',\n",
142
+ " 'Recall'\n",
143
+ " ],\n",
144
+ " columns = [\n",
145
+ " 'All',\n",
146
+ " 'High Confidence'\n",
147
+ " ],\n",
148
+ " data = [\n",
149
+ " [len_all, len_hi],\n",
150
+ " [roc_auc_score_all, roc_auc_score_hi],\n",
151
+ " [precision_score_all, precision_score_hi],\n",
152
+ " [recall_score_all, recall_score_hi]\n",
153
+ " ]\n",
154
+ ").round(2)\n",
155
+ "\n",
156
+ "def get_acc(t, p):\n",
157
+ " if t == False and p <= 0.375:\n",
158
+ " return 'โœ…'\n",
159
+ " elif t == True and p > 0.625:\n",
160
+ " return 'โœ…'\n",
161
+ " elif t == False and p > 0.625:\n",
162
+ " return 'โŒ'\n",
163
+ " elif t == True and p <= 0.375:\n",
164
+ " return 'โŒ'\n",
165
+ " else:\n",
166
+ " return '๐ŸŸจ'\n",
167
+ "\n",
168
+ "perf_daily = res1.copy()\n",
169
+ "perf_daily['Accuracy'] = [get_acc(t, p) for t, p in zip(perf_daily['True'], perf_daily['Predicted'])]"
170
+ ]
171
+ },
172
+ {
173
+ "cell_type": "code",
174
+ "execution_count": 7,
175
+ "metadata": {},
176
+ "outputs": [],
177
+ "source": [
178
+ "perf_daily1 = perf_daily.merge(data['ClosePct'], left_index=True, right_index=True)"
179
+ ]
180
+ },
181
+ {
182
+ "cell_type": "code",
183
+ "execution_count": 8,
184
+ "metadata": {},
185
+ "outputs": [],
186
+ "source": [
187
+ "res2 = res1.merge(data[['ClosePct','HighPct','LowPct']], left_index=True, right_index=True)"
188
+ ]
189
+ },
190
+ {
191
+ "cell_type": "code",
192
+ "execution_count": 9,
193
+ "metadata": {},
194
+ "outputs": [],
195
+ "source": [
196
+ "int_labels = ['(-โˆž, .20]', '(.20, .40]', '(.40, .60]', '(.60, .80]', '(.80, โˆž]']\n",
197
+ "# df_probas = res1.groupby(pd.qcut(res1['Predicted'],5)).agg({'True':[np.mean,len,np.sum]})\n",
198
+ "df_probas = res2.groupby(pd.cut(res2['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)).agg({'True':[np.mean,len,np.sum],'ClosePct':[np.mean], 'HighPct':[np.mean], 'LowPct':[np.mean]})\n",
199
+ "df_probas.columns = ['PctGreen','NumObs','NumGreen','AvgPerf','AvgHigh','AvgLow']"
200
+ ]
201
+ },
202
+ {
203
+ "cell_type": "code",
204
+ "execution_count": 10,
205
+ "metadata": {},
206
+ "outputs": [
207
+ {
208
+ "data": {
209
+ "text/html": [
210
+ "<div>\n",
211
+ "<style scoped>\n",
212
+ " .dataframe tbody tr th:only-of-type {\n",
213
+ " vertical-align: middle;\n",
214
+ " }\n",
215
+ "\n",
216
+ " .dataframe tbody tr th {\n",
217
+ " vertical-align: top;\n",
218
+ " }\n",
219
+ "\n",
220
+ " .dataframe thead th {\n",
221
+ " text-align: right;\n",
222
+ " }\n",
223
+ "</style>\n",
224
+ "<table border=\"1\" class=\"dataframe\">\n",
225
+ " <thead>\n",
226
+ " <tr style=\"text-align: right;\">\n",
227
+ " <th></th>\n",
228
+ " <th>PctGreen</th>\n",
229
+ " <th>NumObs</th>\n",
230
+ " <th>NumGreen</th>\n",
231
+ " <th>AvgPerf</th>\n",
232
+ " <th>AvgHigh</th>\n",
233
+ " <th>AvgLow</th>\n",
234
+ " </tr>\n",
235
+ " <tr>\n",
236
+ " <th>Predicted</th>\n",
237
+ " <th></th>\n",
238
+ " <th></th>\n",
239
+ " <th></th>\n",
240
+ " <th></th>\n",
241
+ " <th></th>\n",
242
+ " <th></th>\n",
243
+ " </tr>\n",
244
+ " </thead>\n",
245
+ " <tbody>\n",
246
+ " <tr>\n",
247
+ " <th>(-โˆž, .20]</th>\n",
248
+ " <td>0.214286</td>\n",
249
+ " <td>112</td>\n",
250
+ " <td>24</td>\n",
251
+ " <td>-0.012956</td>\n",
252
+ " <td>0.009253</td>\n",
253
+ " <td>-0.007881</td>\n",
254
+ " </tr>\n",
255
+ " <tr>\n",
256
+ " <th>(.20, .40]</th>\n",
257
+ " <td>0.322709</td>\n",
258
+ " <td>251</td>\n",
259
+ " <td>81</td>\n",
260
+ " <td>-0.004048</td>\n",
261
+ " <td>0.006433</td>\n",
262
+ " <td>-0.005791</td>\n",
263
+ " </tr>\n",
264
+ " <tr>\n",
265
+ " <th>(.40, .60]</th>\n",
266
+ " <td>0.504630</td>\n",
267
+ " <td>216</td>\n",
268
+ " <td>109</td>\n",
269
+ " <td>-0.000173</td>\n",
270
+ " <td>0.006079</td>\n",
271
+ " <td>-0.006083</td>\n",
272
+ " </tr>\n",
273
+ " <tr>\n",
274
+ " <th>(.60, .80]</th>\n",
275
+ " <td>0.645022</td>\n",
276
+ " <td>231</td>\n",
277
+ " <td>149</td>\n",
278
+ " <td>0.002680</td>\n",
279
+ " <td>0.006207</td>\n",
280
+ " <td>-0.005687</td>\n",
281
+ " </tr>\n",
282
+ " <tr>\n",
283
+ " <th>(.80, โˆž]</th>\n",
284
+ " <td>0.791045</td>\n",
285
+ " <td>268</td>\n",
286
+ " <td>212</td>\n",
287
+ " <td>0.009038</td>\n",
288
+ " <td>0.006807</td>\n",
289
+ " <td>-0.007949</td>\n",
290
+ " </tr>\n",
291
+ " </tbody>\n",
292
+ "</table>\n",
293
+ "</div>"
294
+ ],
295
+ "text/plain": [
296
+ " PctGreen NumObs NumGreen AvgPerf AvgHigh AvgLow\n",
297
+ "Predicted \n",
298
+ "(-โˆž, .20] 0.214286 112 24 -0.012956 0.009253 -0.007881\n",
299
+ "(.20, .40] 0.322709 251 81 -0.004048 0.006433 -0.005791\n",
300
+ "(.40, .60] 0.504630 216 109 -0.000173 0.006079 -0.006083\n",
301
+ "(.60, .80] 0.645022 231 149 0.002680 0.006207 -0.005687\n",
302
+ "(.80, โˆž] 0.791045 268 212 0.009038 0.006807 -0.007949"
303
+ ]
304
+ },
305
+ "execution_count": 10,
306
+ "metadata": {},
307
+ "output_type": "execute_result"
308
+ }
309
+ ],
310
+ "source": [
311
+ "df_probas"
312
+ ]
313
+ },
314
+ {
315
+ "cell_type": "code",
316
+ "execution_count": 11,
317
+ "metadata": {},
318
+ "outputs": [],
319
+ "source": [
320
+ "res2['Quantile'] = pd.cut(res2['Predicted'], bins = [-np.inf, 0.2, 0.4, 0.6, 0.8, np.inf], labels = int_labels)"
321
+ ]
322
+ },
323
+ {
324
+ "cell_type": "code",
325
+ "execution_count": 12,
326
+ "metadata": {},
327
+ "outputs": [
328
+ {
329
+ "data": {
330
+ "image/png": "",
331
+ "text/plain": [
332
+ "<Figure size 1080x576 with 6 Axes>"
333
+ ]
334
+ },
335
+ "metadata": {
336
+ "needs_background": "light"
337
+ },
338
+ "output_type": "display_data"
339
+ }
340
+ ],
341
+ "source": [
342
+ "import matplotlib.pyplot as plt\n",
343
+ "\n",
344
+ "# Assuming you have a DataFrame 'res2' with the columns 'Quantile' and 'ClosePct'\n",
345
+ "# Assuming you have a list 'int_labels' containing the unique values for 'Quantile'\n",
346
+ "\n",
347
+ "# Create a 2x3 grid of subplots\n",
348
+ "fig, axs = plt.subplots(2, 3, figsize=(15, 8))\n",
349
+ "\n",
350
+ "# Loop through the 'int_labels' and plot the histograms in each subplot\n",
351
+ "for i, lbl in enumerate(int_labels):\n",
352
+ " # Get the subplot position based on the index i\n",
353
+ " row = i // 3\n",
354
+ " col = i % 3\n",
355
+ " \n",
356
+ " # Filter the DataFrame based on the specified value\n",
357
+ " data_subset = res2.loc[res2['Quantile'] == lbl, 'LowPct']\n",
358
+ " \n",
359
+ " # Plot the histogram in the corresponding subplot\n",
360
+ " axs[row, col].hist(data_subset)\n",
361
+ " axs[row, col].set_title(lbl)\n",
362
+ "\n",
363
+ "# Add some space between the subplots\n",
364
+ "plt.tight_layout()\n",
365
+ "\n",
366
+ "# Show the plot\n",
367
+ "plt.show()\n"
368
+ ]
369
+ },
370
+ {
371
+ "cell_type": "code",
372
+ "execution_count": 39,
373
+ "metadata": {},
374
+ "outputs": [],
375
+ "source": [
376
+ "# Investigate EM\n",
377
+ "data['VIX_EM'] = data['Close'] * (data['Close_VIX']/100) * (np.sqrt( 1 ) / np.sqrt(252))\n",
378
+ "data['VIX_EM_High'] = data['Close'] + data['VIX_EM']\n",
379
+ "data['VIX_EM_Low'] = data['Close'] - data['VIX_EM']\n",
380
+ "\n",
381
+ "data['VIX_EM_125'] = data['VIX_EM'] * 1.25\n",
382
+ "data['VIX_EM_125_High'] = data['Close'] + data['VIX_EM_125']\n",
383
+ "data['VIX_EM_125_Low'] = data['Close'] - data['VIX_EM_125']\n",
384
+ "\n",
385
+ "data['VIX_EM_15'] = data['VIX_EM'] * 1.5\n",
386
+ "data['VIX_EM_15_High'] = data['Close'] + data['VIX_EM_15']\n",
387
+ "data['VIX_EM_15_Low'] = data['Close'] - data['VIX_EM_15']\n",
388
+ "\n",
389
+ "data['VIX_EM'] = data['VIX_EM'].shift(1)\n",
390
+ "data['VIX_EM_High'] = data['VIX_EM_High'].shift(1)\n",
391
+ "data['VIX_EM_Low'] = data['VIX_EM_Low'].shift(1)\n",
392
+ "\n",
393
+ "data['VIX_EM_15'] = data['VIX_EM_15'].shift(1)\n",
394
+ "data['VIX_EM_15_High'] = data['VIX_EM_15_High'].shift(1)\n",
395
+ "data['VIX_EM_15_Low'] = data['VIX_EM_15_Low'].shift(1)\n",
396
+ "\n",
397
+ "data['VIX_EM_125'] = data['VIX_EM_125'].shift(1)\n",
398
+ "data['VIX_EM_125_High'] = data['VIX_EM_125_High'].shift(1)\n",
399
+ "data['VIX_EM_125_Low'] = data['VIX_EM_125_Low'].shift(1)"
400
+ ]
401
+ },
402
+ {
403
+ "cell_type": "code",
404
+ "execution_count": 33,
405
+ "metadata": {},
406
+ "outputs": [
407
+ {
408
+ "data": {
409
+ "text/html": [
410
+ "<div>\n",
411
+ "<style scoped>\n",
412
+ " .dataframe tbody tr th:only-of-type {\n",
413
+ " vertical-align: middle;\n",
414
+ " }\n",
415
+ "\n",
416
+ " .dataframe tbody tr th {\n",
417
+ " vertical-align: top;\n",
418
+ " }\n",
419
+ "\n",
420
+ " .dataframe thead th {\n",
421
+ " text-align: right;\n",
422
+ " }\n",
423
+ "</style>\n",
424
+ "<table border=\"1\" class=\"dataframe\">\n",
425
+ " <thead>\n",
426
+ " <tr style=\"text-align: right;\">\n",
427
+ " <th></th>\n",
428
+ " <th>VIX_EM</th>\n",
429
+ " <th>VIX_EM_15</th>\n",
430
+ " <th>VIX_EM_15_High</th>\n",
431
+ " <th>Close</th>\n",
432
+ " </tr>\n",
433
+ " <tr>\n",
434
+ " <th>index</th>\n",
435
+ " <th></th>\n",
436
+ " <th></th>\n",
437
+ " <th></th>\n",
438
+ " <th></th>\n",
439
+ " </tr>\n",
440
+ " </thead>\n",
441
+ " <tbody>\n",
442
+ " <tr>\n",
443
+ " <th>2018-07-02</th>\n",
444
+ " <td>NaN</td>\n",
445
+ " <td>NaN</td>\n",
446
+ " <td>NaN</td>\n",
447
+ " <td>2726.709961</td>\n",
448
+ " </tr>\n",
449
+ " <tr>\n",
450
+ " <th>2018-07-03</th>\n",
451
+ " <td>26.795587</td>\n",
452
+ " <td>40.193381</td>\n",
453
+ " <td>2766.903342</td>\n",
454
+ " <td>2713.219971</td>\n",
455
+ " </tr>\n",
456
+ " <tr>\n",
457
+ " <th>2018-07-05</th>\n",
458
+ " <td>27.585969</td>\n",
459
+ " <td>41.378954</td>\n",
460
+ " <td>2754.598925</td>\n",
461
+ " <td>2736.610107</td>\n",
462
+ " </tr>\n",
463
+ " <tr>\n",
464
+ " <th>2018-07-06</th>\n",
465
+ " <td>25.806818</td>\n",
466
+ " <td>38.710227</td>\n",
467
+ " <td>2775.320335</td>\n",
468
+ " <td>2759.820068</td>\n",
469
+ " </tr>\n",
470
+ " <tr>\n",
471
+ " <th>2018-07-09</th>\n",
472
+ " <td>23.244055</td>\n",
473
+ " <td>34.866083</td>\n",
474
+ " <td>2794.686151</td>\n",
475
+ " <td>2784.169922</td>\n",
476
+ " </tr>\n",
477
+ " <tr>\n",
478
+ " <th>...</th>\n",
479
+ " <td>...</td>\n",
480
+ " <td>...</td>\n",
481
+ " <td>...</td>\n",
482
+ " <td>...</td>\n",
483
+ " </tr>\n",
484
+ " <tr>\n",
485
+ " <th>2023-07-28</th>\n",
486
+ " <td>41.188099</td>\n",
487
+ " <td>61.782148</td>\n",
488
+ " <td>4599.192304</td>\n",
489
+ " <td>4582.229980</td>\n",
490
+ " </tr>\n",
491
+ " <tr>\n",
492
+ " <th>2023-07-31</th>\n",
493
+ " <td>38.477492</td>\n",
494
+ " <td>57.716238</td>\n",
495
+ " <td>4639.946219</td>\n",
496
+ " <td>4588.959961</td>\n",
497
+ " </tr>\n",
498
+ " <tr>\n",
499
+ " <th>2023-08-01</th>\n",
500
+ " <td>39.401237</td>\n",
501
+ " <td>59.101856</td>\n",
502
+ " <td>4648.061817</td>\n",
503
+ " <td>4576.729980</td>\n",
504
+ " </tr>\n",
505
+ " <tr>\n",
506
+ " <th>2023-08-02</th>\n",
507
+ " <td>40.161151</td>\n",
508
+ " <td>60.241726</td>\n",
509
+ " <td>4636.971706</td>\n",
510
+ " <td>4513.390137</td>\n",
511
+ " </tr>\n",
512
+ " <tr>\n",
513
+ " <th>2023-08-03</th>\n",
514
+ " <td>45.746582</td>\n",
515
+ " <td>68.619873</td>\n",
516
+ " <td>4582.010010</td>\n",
517
+ " <td>4501.890137</td>\n",
518
+ " </tr>\n",
519
+ " </tbody>\n",
520
+ "</table>\n",
521
+ "<p>1281 rows ร— 4 columns</p>\n",
522
+ "</div>"
523
+ ],
524
+ "text/plain": [
525
+ " VIX_EM VIX_EM_15 VIX_EM_15_High Close\n",
526
+ "index \n",
527
+ "2018-07-02 NaN NaN NaN 2726.709961\n",
528
+ "2018-07-03 26.795587 40.193381 2766.903342 2713.219971\n",
529
+ "2018-07-05 27.585969 41.378954 2754.598925 2736.610107\n",
530
+ "2018-07-06 25.806818 38.710227 2775.320335 2759.820068\n",
531
+ "2018-07-09 23.244055 34.866083 2794.686151 2784.169922\n",
532
+ "... ... ... ... ...\n",
533
+ "2023-07-28 41.188099 61.782148 4599.192304 4582.229980\n",
534
+ "2023-07-31 38.477492 57.716238 4639.946219 4588.959961\n",
535
+ "2023-08-01 39.401237 59.101856 4648.061817 4576.729980\n",
536
+ "2023-08-02 40.161151 60.241726 4636.971706 4513.390137\n",
537
+ "2023-08-03 45.746582 68.619873 4582.010010 4501.890137\n",
538
+ "\n",
539
+ "[1281 rows x 4 columns]"
540
+ ]
541
+ },
542
+ "execution_count": 33,
543
+ "metadata": {},
544
+ "output_type": "execute_result"
545
+ }
546
+ ],
547
+ "source": [
548
+ "data[['VIX_EM','VIX_EM_15','VIX_EM_15_High','Close']]"
549
+ ]
550
+ },
551
+ {
552
+ "cell_type": "code",
553
+ "execution_count": 34,
554
+ "metadata": {},
555
+ "outputs": [
556
+ {
557
+ "data": {
558
+ "text/plain": [
559
+ "0.8032786885245902"
560
+ ]
561
+ },
562
+ "execution_count": 34,
563
+ "metadata": {},
564
+ "output_type": "execute_result"
565
+ }
566
+ ],
567
+ "source": [
568
+ "# How often did price close within EM?\n",
569
+ "len(data.query('Close <= VIX_EM_High & Close >= VIX_EM_Low')) / len(data)"
570
+ ]
571
+ },
572
+ {
573
+ "cell_type": "code",
574
+ "execution_count": 35,
575
+ "metadata": {},
576
+ "outputs": [
577
+ {
578
+ "data": {
579
+ "text/plain": [
580
+ "0.33099141295862605"
581
+ ]
582
+ },
583
+ "execution_count": 35,
584
+ "metadata": {},
585
+ "output_type": "execute_result"
586
+ }
587
+ ],
588
+ "source": [
589
+ "# How often was EM tested?\n",
590
+ "len(data.query('High > VIX_EM_High | Low < VIX_EM_Low')) / len(data)"
591
+ ]
592
+ },
593
+ {
594
+ "cell_type": "code",
595
+ "execution_count": 40,
596
+ "metadata": {},
597
+ "outputs": [
598
+ {
599
+ "data": {
600
+ "text/plain": [
601
+ "0.8930523028883685"
602
+ ]
603
+ },
604
+ "execution_count": 40,
605
+ "metadata": {},
606
+ "output_type": "execute_result"
607
+ }
608
+ ],
609
+ "source": [
610
+ "# How often did price close within EM?\n",
611
+ "len(data.query('Close <= VIX_EM_125_High & Close >= VIX_EM_125_Low')) / len(data)"
612
+ ]
613
+ },
614
+ {
615
+ "cell_type": "code",
616
+ "execution_count": 41,
617
+ "metadata": {},
618
+ "outputs": [
619
+ {
620
+ "data": {
621
+ "text/plain": [
622
+ "0.19750195160031225"
623
+ ]
624
+ },
625
+ "execution_count": 41,
626
+ "metadata": {},
627
+ "output_type": "execute_result"
628
+ }
629
+ ],
630
+ "source": [
631
+ "# How often was EM tested?\n",
632
+ "len(data.query('High > VIX_EM_125_High | Low < VIX_EM_125_Low')) / len(data)"
633
+ ]
634
+ },
635
+ {
636
+ "cell_type": "code",
637
+ "execution_count": 42,
638
+ "metadata": {},
639
+ "outputs": [
640
+ {
641
+ "data": {
642
+ "text/plain": [
643
+ "0.9383294301327089"
644
+ ]
645
+ },
646
+ "execution_count": 42,
647
+ "metadata": {},
648
+ "output_type": "execute_result"
649
+ }
650
+ ],
651
+ "source": [
652
+ "# How often did price close within EM?\n",
653
+ "len(data.query('Close <= VIX_EM_15_High & Close >= VIX_EM_15_Low')) / len(data)"
654
+ ]
655
+ },
656
+ {
657
+ "cell_type": "code",
658
+ "execution_count": 43,
659
+ "metadata": {},
660
+ "outputs": [
661
+ {
662
+ "data": {
663
+ "text/plain": [
664
+ "0.10772833723653395"
665
+ ]
666
+ },
667
+ "execution_count": 43,
668
+ "metadata": {},
669
+ "output_type": "execute_result"
670
+ }
671
+ ],
672
+ "source": [
673
+ "# How often was EM tested?\n",
674
+ "len(data.query('High > VIX_EM_15_High | Low < VIX_EM_15_Low')) / len(data)"
675
+ ]
676
+ },
677
+ {
678
+ "cell_type": "code",
679
+ "execution_count": null,
680
+ "metadata": {},
681
+ "outputs": [],
682
+ "source": []
683
+ }
684
+ ],
685
+ "metadata": {
686
+ "kernelspec": {
687
+ "display_name": "py39",
688
+ "language": "python",
689
+ "name": "python3"
690
+ },
691
+ "language_info": {
692
+ "codemirror_mode": {
693
+ "name": "ipython",
694
+ "version": 3
695
+ },
696
+ "file_extension": ".py",
697
+ "mimetype": "text/x-python",
698
+ "name": "python",
699
+ "nbconvert_exporter": "python",
700
+ "pygments_lexer": "ipython3",
701
+ "version": "3.9.12"
702
+ },
703
+ "orig_nbformat": 4
704
+ },
705
+ "nbformat": 4,
706
+ "nbformat_minor": 2
707
+ }