Make changes in file
Browse filesSigned-off-by: Aadhitya A <aadhitya864@gmail.com>
- .gitignore +1 -0
- app-plain.py +957 -0
- app.py +15 -15
- demo/sample.csv +0 -0
.gitignore
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
env
|
app-plain.py
ADDED
@@ -0,0 +1,957 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# %%
|
2 |
+
# Import section
|
3 |
+
# (Please don't edit this section unless if necessary)
|
4 |
+
import copy
|
5 |
+
from pathlib import Path
|
6 |
+
import warnings
|
7 |
+
import holidays
|
8 |
+
import seaborn as sns
|
9 |
+
import matplotlib
|
10 |
+
import matplotlib.dates as mdates
|
11 |
+
import matplotlib.pyplot as plt
|
12 |
+
plt.style.use('fivethirtyeight')
|
13 |
+
import numpy as np
|
14 |
+
import pandas as pd
|
15 |
+
import glob
|
16 |
+
import csv
|
17 |
+
import lightning.pytorch as pl
|
18 |
+
from lightning.pytorch.callbacks import EarlyStopping, LearningRateMonitor
|
19 |
+
from lightning.pytorch.loggers import TensorBoardLogger
|
20 |
+
import torch
|
21 |
+
from pytorch_forecasting import Baseline, TemporalFusionTransformer, TimeSeriesDataSet
|
22 |
+
from pytorch_forecasting.data import GroupNormalizer, NaNLabelEncoder
|
23 |
+
from pytorch_forecasting.metrics import SMAPE, PoissonLoss, QuantileLoss
|
24 |
+
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters
|
25 |
+
import random
|
26 |
+
import gc
|
27 |
+
import tensorflow as tf
|
28 |
+
import tensorboard as tb
|
29 |
+
tf.io.gfile = tb.compat.tensorflow_stub.io.gfile
|
30 |
+
import os
|
31 |
+
import math
|
32 |
+
import sys
|
33 |
+
from sklearn.model_selection import train_test_split
|
34 |
+
from sklearn.preprocessing import MinMaxScaler
|
35 |
+
import tensorflow as tf
|
36 |
+
from tensorflow.keras.layers import Conv1D, LSTM, Dense, Dropout, Bidirectional, TimeDistributed
|
37 |
+
from tensorflow.keras.layers import MaxPooling1D, Flatten
|
38 |
+
from tensorflow.keras.regularizers import L1, L2
|
39 |
+
from tensorflow.keras.metrics import Accuracy
|
40 |
+
from tensorflow.keras.metrics import RootMeanSquaredError
|
41 |
+
from sklearn.metrics import mean_squared_error as MSE
|
42 |
+
from sklearn.model_selection import KFold
|
43 |
+
from sklearn.inspection import permutation_importance
|
44 |
+
from tensorflow.keras.utils import plot_model
|
45 |
+
from sklearn.metrics import explained_variance_score, mean_poisson_deviance, mean_gamma_deviance, mean_squared_error, mean_squared_log_error, d2_absolute_error_score, d2_pinball_score, d2_tweedie_score
|
46 |
+
from sklearn.metrics import r2_score
|
47 |
+
from sklearn.metrics import max_error
|
48 |
+
import datetime
|
49 |
+
from datetime import date
|
50 |
+
import optuna
|
51 |
+
from tensorflow.keras.callbacks import Callback
|
52 |
+
from optuna.integration import TFKerasPruningCallback
|
53 |
+
import shutil
|
54 |
+
import gradio as gr
|
55 |
+
|
56 |
+
# Some variables (don't edit these variables unless if necessary)
|
57 |
+
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
|
58 |
+
random.seed(30)
|
59 |
+
np.random.seed(30)
|
60 |
+
tf.random.set_seed(30)
|
61 |
+
torch.manual_seed(30)
|
62 |
+
torch.cuda.manual_seed(30)
|
63 |
+
|
64 |
+
# Global variables
|
65 |
+
PATIENCE = 30
|
66 |
+
MAX_EPOCHS = 3
|
67 |
+
LEARNING_RATE = 0.01
|
68 |
+
OPTUNA = True
|
69 |
+
ACCELERATOR = "cpu"
|
70 |
+
# This below line is only for GPU. Don't use it for CPU
|
71 |
+
#os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "max_split_size_mb:1024"
|
72 |
+
|
73 |
+
# Variables to count the number of files
|
74 |
+
w = 7
|
75 |
+
prax = [0 for x in range(w)]
|
76 |
+
|
77 |
+
# %%
|
78 |
+
# Objective function for Optuna (CNN-LSTM)
|
79 |
+
def objective(trial, X_train, y_train, X_test, y_test):
|
80 |
+
model = tf.keras.Sequential()
|
81 |
+
|
82 |
+
# Creating the Neural Network model here...
|
83 |
+
# CNN layers
|
84 |
+
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)))
|
85 |
+
# model.add(Dense(5, kernel_regularizer=L2(0.01)))
|
86 |
+
|
87 |
+
# LSTM layers
|
88 |
+
model.add(Bidirectional(LSTM(trial.suggest_int("lstm_units_1", 32, 256), return_sequences=True)))
|
89 |
+
model.add(Dropout(trial.suggest_float("dropout_1", 0.1, 0.5)))
|
90 |
+
model.add(Bidirectional(LSTM(trial.suggest_int("lstm_units_2", 32, 256), return_sequences=False)))
|
91 |
+
model.add(Dropout(trial.suggest_float("dropout_2", 0.1, 0.5)))
|
92 |
+
|
93 |
+
#Final layers
|
94 |
+
model.add(Dense(1, activation='relu'))
|
95 |
+
model.compile(optimizer='adam', loss='mse', metrics=['mse'])
|
96 |
+
|
97 |
+
# Train the model
|
98 |
+
pruning_callback = TFKerasPruningCallback(trial, "val_loss")
|
99 |
+
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=15, batch_size=32, verbose=0, callbacks=[pruning_callback])
|
100 |
+
|
101 |
+
# Evaluate the model
|
102 |
+
loss = model.evaluate(X_test, y_test, verbose=0)[0]
|
103 |
+
|
104 |
+
return loss
|
105 |
+
|
106 |
+
# %%
|
107 |
+
# Function to train the model (CNN-LSTM)
|
108 |
+
def modelCNNLSTM(csv_file, prax):
|
109 |
+
# Read the data
|
110 |
+
df = csv_file
|
111 |
+
#df = df['Date/Time'].values.astype("float64")
|
112 |
+
temp_data = df.iloc[0:len(df)-100, 1:21]
|
113 |
+
trek = df.iloc[len(df)-100:,1:21]
|
114 |
+
#print(temp_data)
|
115 |
+
data = temp_data
|
116 |
+
sc = MinMaxScaler()
|
117 |
+
# Split the data into training and testing sets
|
118 |
+
train_size = int(len(data) * 0.8)
|
119 |
+
train_data, test_data = data[:train_size], data[train_size:]
|
120 |
+
# Separate the input features and target variable
|
121 |
+
X_train, y_train = train_data, train_data['Close']
|
122 |
+
X_test, y_test = test_data, test_data['Close']
|
123 |
+
|
124 |
+
X_train = X_train[0:len(X_train)-1]
|
125 |
+
y_train = y_train[1:len(y_train)]
|
126 |
+
X_test = X_test[0:len(X_test)-1]
|
127 |
+
y_test = y_test[1:len(y_test)]
|
128 |
+
|
129 |
+
Xt = X_train
|
130 |
+
Xts = X_test
|
131 |
+
Yt = y_train
|
132 |
+
Yts = y_test
|
133 |
+
|
134 |
+
y_train = y_train.values.reshape(-1,1)
|
135 |
+
y_test = y_test.values.reshape(-1,1)
|
136 |
+
|
137 |
+
X_train = sc.fit_transform(X_train)
|
138 |
+
y_train = sc.fit_transform(y_train)
|
139 |
+
X_test = sc.fit_transform(X_test)
|
140 |
+
y_test = sc.fit_transform(y_test)
|
141 |
+
|
142 |
+
x_tr=pd.DataFrame(X_train, index = Xt.index, columns = Xt.columns)
|
143 |
+
y_tr=pd.DataFrame(y_train, index = Yt.index)
|
144 |
+
x_te=pd.DataFrame(X_test, index = Xts.index, columns = Xts.columns)
|
145 |
+
y_te=pd.DataFrame(y_test, index = Yts.index)
|
146 |
+
|
147 |
+
# Reshape the data for the CNN-LSTM model
|
148 |
+
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
|
149 |
+
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
|
150 |
+
|
151 |
+
study = optuna.create_study(direction="minimize", pruner=optuna.pruners.MedianPruner(n_min_trials=4, n_startup_trials=4))
|
152 |
+
fn = lambda trial: objective(trial, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test)
|
153 |
+
study.optimize(fn, n_trials=5)
|
154 |
+
|
155 |
+
best_params = study.best_params
|
156 |
+
#print(f"Best params: {best_params}")
|
157 |
+
|
158 |
+
model = tf.keras.Sequential()
|
159 |
+
|
160 |
+
# Creating the Neural Network model here...
|
161 |
+
# CNN layers
|
162 |
+
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)))
|
163 |
+
# model.add(Dense(5, kernel_regularizer=L2(0.01)))
|
164 |
+
|
165 |
+
# LSTM layers
|
166 |
+
model.add(Bidirectional(LSTM(best_params["lstm_units_1"], return_sequences=True)))
|
167 |
+
model.add(Dropout(best_params["dropout_1"]))
|
168 |
+
model.add(Bidirectional(LSTM(best_params["lstm_units_2"], return_sequences=False)))
|
169 |
+
model.add(Dropout(best_params["dropout_2"]))
|
170 |
+
|
171 |
+
#Final layers
|
172 |
+
model.add(Dense(1, activation='relu'))
|
173 |
+
model.compile(optimizer='adam', loss='mse', metrics=['mse'])
|
174 |
+
|
175 |
+
# Train the model
|
176 |
+
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32, verbose=0)
|
177 |
+
|
178 |
+
# Evaluate the model
|
179 |
+
loss = model.evaluate(X_test, y_test, verbose=0)[0]
|
180 |
+
|
181 |
+
print(f"Final loss (without KFold): {loss}")
|
182 |
+
|
183 |
+
kfold = KFold(n_splits=10, shuffle=True)
|
184 |
+
|
185 |
+
inputs = np.concatenate((X_train, X_test), axis=0)
|
186 |
+
targets = np.concatenate((y_train, y_test), axis=0)
|
187 |
+
acc_per_fold = []
|
188 |
+
loss_per_fold = []
|
189 |
+
xgb_res = []
|
190 |
+
num_epochs = 10
|
191 |
+
batch_size = 32
|
192 |
+
|
193 |
+
fold_no = 1
|
194 |
+
print('------------------------------------------------------------------------')
|
195 |
+
print("Training for 10 folds... Standby")
|
196 |
+
for train, test in kfold.split(inputs, targets):
|
197 |
+
#print('------------------------------------------------------------------------')
|
198 |
+
#print(f'Training for fold {fold_no} ...')
|
199 |
+
history = model.fit(inputs[train], targets[train],
|
200 |
+
batch_size=32,
|
201 |
+
epochs=15,
|
202 |
+
verbose=0)
|
203 |
+
|
204 |
+
scores = model.evaluate(inputs[test], targets[test], verbose=0)
|
205 |
+
#print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')
|
206 |
+
acc_per_fold.append(scores[1] * 100)
|
207 |
+
loss_per_fold.append(scores[0])
|
208 |
+
fold_no = fold_no + 1
|
209 |
+
|
210 |
+
|
211 |
+
print('------------------------------------------------------------------------')
|
212 |
+
#print('Score per fold')
|
213 |
+
#for i in range(0, len(acc_per_fold)):
|
214 |
+
# print('------------------------------------------------------------------------')
|
215 |
+
# print(f'> Fold {i+1} - Loss: {loss_per_fold[i]} - Loss%: {acc_per_fold[i]}%')
|
216 |
+
#print('------------------------------------------------------------------------')
|
217 |
+
#print('Average scores for all folds:')
|
218 |
+
#print(f'> Possible Loss %: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
|
219 |
+
#print(f'> Loss: {np.mean(loss_per_fold)}')
|
220 |
+
#print('------------------------------------------------------------------------')
|
221 |
+
|
222 |
+
trek = df.iloc[0:len(df), 1:21]
|
223 |
+
Y = trek[0:len(trek)]
|
224 |
+
YP = trek[1:len(trek)]
|
225 |
+
Y1 = Y['Close']
|
226 |
+
Y2 = YP['Close']
|
227 |
+
Yx = pd.DataFrame(YP, index=YP.index, columns=YP.columns)
|
228 |
+
#X = sc.fit_transform(X.reshape(-1,22))
|
229 |
+
Y = np.array(Y)
|
230 |
+
Y1 = np.array(Y1)
|
231 |
+
Y = sc.fit_transform(Y)
|
232 |
+
Y1 = Y1.reshape(-1,1)
|
233 |
+
Y1 = sc.fit_transform(Y1)
|
234 |
+
|
235 |
+
train_X = Y.reshape(Y.shape[0],Y.shape[1],1)
|
236 |
+
#Y = Y.reshape(-1,1)
|
237 |
+
pred = model.predict(train_X, verbose=0)
|
238 |
+
pred = np.array(pred).reshape(-1,1)
|
239 |
+
var2 = max_error(pred.reshape(-1,1), Y1)
|
240 |
+
print('Max Error: %f' % var2)
|
241 |
+
prax[5] = float(var2)
|
242 |
+
pred = sc.inverse_transform(pred)
|
243 |
+
|
244 |
+
print(pred[-2], pred[-1])
|
245 |
+
prax[3] = pred[-2]
|
246 |
+
prax[4] = pred[-1]
|
247 |
+
if(pred[-1]-pred[-2]>0):
|
248 |
+
prax[6] = 1
|
249 |
+
elif(pred[-1]-pred[-2]==0):
|
250 |
+
prax[6] = 0
|
251 |
+
else:
|
252 |
+
prax[6] = -1
|
253 |
+
|
254 |
+
# %%
|
255 |
+
# Function to train the model (CNN-LSTM)
|
256 |
+
def modelCNNLSTM_OpenGap(csv_file, prax):
|
257 |
+
# Read the data
|
258 |
+
df = csv_file
|
259 |
+
datLength = len(df)
|
260 |
+
df['O-C'] = 0
|
261 |
+
for i in range(datLength):
|
262 |
+
if i == 0:
|
263 |
+
df['O-C'][i] = 0
|
264 |
+
continue
|
265 |
+
else:
|
266 |
+
df['O-C'][i] = df['Open'][i] - df['Close'][i-1]
|
267 |
+
temp_data = df.iloc[0:datLength-100, 1:22]
|
268 |
+
trek = df.iloc[datLength-100:,1:22]
|
269 |
+
#print(temp_data)
|
270 |
+
data = temp_data
|
271 |
+
#data = data.values.astype("float64")
|
272 |
+
sc = MinMaxScaler()
|
273 |
+
# Split the data into training and testing sets
|
274 |
+
train_size = int(len(data) * 0.8)
|
275 |
+
train_data, test_data = data[:train_size], data[train_size:]
|
276 |
+
|
277 |
+
# Separate the input features and target variable
|
278 |
+
X_train, y_train = train_data, train_data['Close']
|
279 |
+
X_test, y_test = test_data, test_data['Close']
|
280 |
+
|
281 |
+
X_train = X_train[0:len(X_train)-1]
|
282 |
+
y_train = y_train[1:len(y_train)]
|
283 |
+
X_test = X_test[0:len(X_test)-1]
|
284 |
+
y_test = y_test[1:len(y_test)]
|
285 |
+
|
286 |
+
Xt = X_train
|
287 |
+
Xts = X_test
|
288 |
+
Yt = y_train
|
289 |
+
Yts = y_test
|
290 |
+
|
291 |
+
y_train = y_train.values.reshape(-1,1)
|
292 |
+
y_test = y_test.values.reshape(-1,1)
|
293 |
+
|
294 |
+
X_train = sc.fit_transform(X_train)
|
295 |
+
y_train = sc.fit_transform(y_train)
|
296 |
+
X_test = sc.fit_transform(X_test)
|
297 |
+
y_test = sc.fit_transform(y_test)
|
298 |
+
|
299 |
+
x_tr=pd.DataFrame(X_train, index = Xt.index, columns = Xt.columns)
|
300 |
+
y_tr=pd.DataFrame(y_train, index = Yt.index)
|
301 |
+
x_te=pd.DataFrame(X_test, index = Xts.index, columns = Xts.columns)
|
302 |
+
y_te=pd.DataFrame(y_test, index = Yts.index)
|
303 |
+
|
304 |
+
# Reshape the data for the CNN-LSTM model
|
305 |
+
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
|
306 |
+
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
|
307 |
+
|
308 |
+
study = optuna.create_study(direction="minimize", pruner=optuna.pruners.MedianPruner(n_min_trials=2, n_startup_trials=2))
|
309 |
+
fn = lambda trial: objective(trial, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test)
|
310 |
+
study.optimize(fn, n_trials=5)
|
311 |
+
|
312 |
+
best_params = study.best_params
|
313 |
+
#print(f"Best params: {best_params}")
|
314 |
+
|
315 |
+
model = tf.keras.Sequential()
|
316 |
+
|
317 |
+
# Creating the Neural Network model here...
|
318 |
+
# CNN layers
|
319 |
+
model.add(Conv1D(filters=64, kernel_size=3, activation='relu', input_shape=(X_train.shape[1], 1)))
|
320 |
+
# model.add(Dense(5, kernel_regularizer=L2(0.01)))
|
321 |
+
|
322 |
+
# LSTM layers
|
323 |
+
model.add(Bidirectional(LSTM(best_params["lstm_units_1"], return_sequences=True)))
|
324 |
+
model.add(Dropout(best_params["dropout_1"]))
|
325 |
+
model.add(Bidirectional(LSTM(best_params["lstm_units_2"], return_sequences=False)))
|
326 |
+
model.add(Dropout(best_params["dropout_2"]))
|
327 |
+
|
328 |
+
#Final layers
|
329 |
+
model.add(Dense(1, activation='relu'))
|
330 |
+
model.compile(optimizer='adam', loss='mse', metrics=['mse'])
|
331 |
+
|
332 |
+
# Train the model
|
333 |
+
history = model.fit(X_train, y_train, validation_data=(X_test, y_test), epochs=10, batch_size=32, verbose=0)
|
334 |
+
|
335 |
+
# Evaluate the model
|
336 |
+
loss = model.evaluate(X_test, y_test, verbose=0)[0]
|
337 |
+
|
338 |
+
print(f"Final loss (without KFold): {loss}")
|
339 |
+
|
340 |
+
kfold = KFold(n_splits=10, shuffle=True)
|
341 |
+
|
342 |
+
inputs = np.concatenate((X_train, X_test), axis=0)
|
343 |
+
targets = np.concatenate((y_train, y_test), axis=0)
|
344 |
+
acc_per_fold = []
|
345 |
+
loss_per_fold = []
|
346 |
+
xgb_res = []
|
347 |
+
num_epochs = 10
|
348 |
+
batch_size = 32
|
349 |
+
|
350 |
+
fold_no = 1
|
351 |
+
print('------------------------------------------------------------------------')
|
352 |
+
print("Training for 10 folds... Standby")
|
353 |
+
for train, test in kfold.split(inputs, targets):
|
354 |
+
#print('------------------------------------------------------------------------')
|
355 |
+
#print(f'Training for fold {fold_no} ...')
|
356 |
+
history = model.fit(inputs[train], targets[train],
|
357 |
+
batch_size=32,
|
358 |
+
epochs=15,
|
359 |
+
verbose=0)
|
360 |
+
|
361 |
+
scores = model.evaluate(inputs[test], targets[test], verbose=0)
|
362 |
+
#print(f'Score for fold {fold_no}: {model.metrics_names[0]} of {scores[0]}; {model.metrics_names[1]} of {scores[1]*100}%')
|
363 |
+
acc_per_fold.append(scores[1] * 100)
|
364 |
+
loss_per_fold.append(scores[0])
|
365 |
+
fold_no = fold_no + 1
|
366 |
+
|
367 |
+
|
368 |
+
print('------------------------------------------------------------------------')
|
369 |
+
#print('Score per fold')
|
370 |
+
#for i in range(0, len(acc_per_fold)):
|
371 |
+
# print('------------------------------------------------------------------------')
|
372 |
+
# print(f'> Fold {i+1} - Loss: {loss_per_fold[i]} - Loss%: {acc_per_fold[i]}%')
|
373 |
+
#print('------------------------------------------------------------------------')
|
374 |
+
#print('Average scores for all folds:')
|
375 |
+
#print(f'> Possible Loss %: {np.mean(acc_per_fold)} (+- {np.std(acc_per_fold)})')
|
376 |
+
#print(f'> Loss: {np.mean(loss_per_fold)}')
|
377 |
+
#print('------------------------------------------------------------------------')
|
378 |
+
|
379 |
+
trek = df.iloc[0:len(df), 1:22]
|
380 |
+
Y = trek[0:len(trek)]
|
381 |
+
YP = trek[1:len(trek)]
|
382 |
+
Y1 = Y['Close']
|
383 |
+
Y2 = YP['Close']
|
384 |
+
Yx = pd.DataFrame(YP, index=YP.index, columns=YP.columns)
|
385 |
+
#X = sc.fit_transform(X.reshape(-1,22))
|
386 |
+
Y = np.array(Y)
|
387 |
+
Y1 = np.array(Y1)
|
388 |
+
Y = sc.fit_transform(Y)
|
389 |
+
Y1 = Y1.reshape(-1,1)
|
390 |
+
Y1 = sc.fit_transform(Y1)
|
391 |
+
|
392 |
+
train_X = Y.reshape(Y.shape[0],Y.shape[1],1)
|
393 |
+
#Y = Y.reshape(-1,1)
|
394 |
+
pred = model.predict(train_X, verbose=0)
|
395 |
+
pred = np.array(pred).reshape(-1,1)
|
396 |
+
var2 = max_error(pred.reshape(-1,1), Y1)
|
397 |
+
print('Max Error: %f' % var2)
|
398 |
+
prax[5] = float(var2)
|
399 |
+
pred = sc.inverse_transform(pred)
|
400 |
+
|
401 |
+
print(pred[-2], pred[-1])
|
402 |
+
prax[3] = pred[-2]
|
403 |
+
prax[4] = pred[-1]
|
404 |
+
if(pred[-1]-pred[-2]>0):
|
405 |
+
prax[6] = 1
|
406 |
+
elif(pred[-1]-pred[-2]==0):
|
407 |
+
prax[6] = 0
|
408 |
+
else:
|
409 |
+
prax[6] = -1
|
410 |
+
|
411 |
+
# %%
|
412 |
+
# Function to train the model (TFT)
|
413 |
+
def modelTFT(csv_file, prax):
|
414 |
+
train = csv_file
|
415 |
+
#test = pd.read_csv("/kaggle/input/artemis-test/nifty_daily.csv")
|
416 |
+
train['date'] = pd.to_datetime(train['Date/Time'])
|
417 |
+
#test['date'] = pd.to_datetime(test['Date'])
|
418 |
+
|
419 |
+
data = pd.concat([train], axis = 0, ignore_index=True)
|
420 |
+
# Check that key is country-store-product-date combination
|
421 |
+
#assert len(data.drop_duplicates(['country', 'store', 'product', 'date'])) == len(data)
|
422 |
+
# Check that there is one date per country-store-product combination
|
423 |
+
#assert len(data.drop_duplicates(['country', 'store', 'product'])) == len(data)//data['date'].nunique()
|
424 |
+
|
425 |
+
#display(train.sample(4))
|
426 |
+
|
427 |
+
"""<a id ="3"></a><h3 style="background:#0554f2; border:0; border-radius: 4px; color:#f5f6f7">Model Implementation in Pytorch-Forecasting </h3>"""
|
428 |
+
|
429 |
+
# Add a time_idx (an sequence of consecutive integers that goes from min to max date)
|
430 |
+
|
431 |
+
data = (data.merge((data[['Date/Time']].drop_duplicates(ignore_index=True)
|
432 |
+
.rename_axis('time_idx')).reset_index(), on = ['Date/Time']))
|
433 |
+
# add additional features
|
434 |
+
data["day_of_week"] = data['date'].dt.dayofweek.astype(str).astype("category") # categories have be strings
|
435 |
+
data["week_of_year"] = data['date'].dt.isocalendar().week.astype(str).astype("category") # categories have be strings
|
436 |
+
data["month"] = data['date'].dt.month.astype(str).astype("category") # categories have be strings
|
437 |
+
#data["log_num_sold"] = np.log(data.num_sold + 1e-8)
|
438 |
+
#data["avg_volume_by_country"] = data.groupby(["time_idx", "country"], observed=True).num_sold.transform("mean")
|
439 |
+
#data["avg_volume_by_store"] = data.groupby(["time_idx", "store"], observed=True).num_sold.transform("mean")
|
440 |
+
#data["avg_volume_by_product"] = data.groupby(["time_idx", "product"], observed=True).num_sold.transform("mean")
|
441 |
+
|
442 |
+
#unique_dates_country = data[['date', 'Ticker']].drop_duplicates(ignore_index = True)
|
443 |
+
#unique_dates_country['is_holiday'] = (unique_dates_country
|
444 |
+
# .apply(lambda x: x.date in holidays.country_holidays(x.country), axis = 1).astype('category'))
|
445 |
+
#unique_dates_country['is_holiday_lead_1'] = (unique_dates_country
|
446 |
+
# .apply(lambda x: x.date+pd.Timedelta(days=1) in holidays.country_holidays(x.country), axis = 1).astype('category'))
|
447 |
+
#unique_dates_country['is_holiday_lead_2'] = (unique_dates_country
|
448 |
+
# .apply(lambda x: x.date+pd.Timedelta(days=2) in holidays.country_holidays(x.country), axis = 1).astype('category'))
|
449 |
+
#unique_dates_country['is_holiday_lag_1'] = (unique_dates_country
|
450 |
+
# .apply(lambda x: x.date-pd.Timedelta(days=1) in holidays.country_holidays(x.country), axis = 1).astype('category'))
|
451 |
+
#unique_dates_country['is_holiday_lag_2'] = (unique_dates_country
|
452 |
+
# .apply(lambda x: x.date-pd.Timedelta(days=2) in holidays.country_holidays(x.country), axis = 1).astype('category'))
|
453 |
+
#data = data.merge(unique_dates_country, on = ['date', 'Ticker'], validate = "m:1")
|
454 |
+
#del unique_dates_country
|
455 |
+
gc.collect()
|
456 |
+
data.sample(5, random_state=30)
|
457 |
+
|
458 |
+
train = data.iloc[:len(train)]
|
459 |
+
test = data.iloc[len(train):]
|
460 |
+
|
461 |
+
max_prediction_length = 2
|
462 |
+
max_encoder_length = train.date.nunique()
|
463 |
+
training_cutoff = train["time_idx"].max() - max_prediction_length #we will validate on 2020
|
464 |
+
|
465 |
+
# Let's create a Dataset
|
466 |
+
training = TimeSeriesDataSet(
|
467 |
+
train[lambda x: x.time_idx <= training_cutoff],
|
468 |
+
time_idx="time_idx",
|
469 |
+
target="Close",
|
470 |
+
group_ids=["Ticker"],
|
471 |
+
min_encoder_length=max_prediction_length, # keep encoder length long (as it is in the validation set)
|
472 |
+
max_encoder_length=max_encoder_length,
|
473 |
+
max_prediction_length=max_prediction_length,
|
474 |
+
static_categoricals=["Ticker"],
|
475 |
+
time_varying_known_categoricals=["month", "week_of_year", "day_of_week"],
|
476 |
+
#variable_groups={"is_holiday": ["is_holiday"]}, # group of categorical variables can be treated as one variable
|
477 |
+
time_varying_known_reals=["time_idx"],
|
478 |
+
time_varying_unknown_categoricals=[],
|
479 |
+
time_varying_unknown_reals=[
|
480 |
+
'Open','High','Low','Close','OI','RSI14','RSI44','HHRSI','Rsi Weekly','LLCHHV','white','Vap44','Vap14','Ema5','Ema20','Ema50','Ema200'
|
481 |
+
],
|
482 |
+
target_normalizer=GroupNormalizer(
|
483 |
+
groups=['Ticker'], transformation="softplus"
|
484 |
+
), # use softplus and normalize by group
|
485 |
+
categorical_encoders={
|
486 |
+
'week_of_year':NaNLabelEncoder(add_nan=True)
|
487 |
+
},
|
488 |
+
#lags={'num_sold': [7, 30, 365]},
|
489 |
+
add_relative_time_idx=True,
|
490 |
+
add_target_scales=True,
|
491 |
+
add_encoder_length=True,
|
492 |
+
)
|
493 |
+
|
494 |
+
# create validation set (predict=True) which means to predict the last max_prediction_length points in time
|
495 |
+
# for each series
|
496 |
+
validation = TimeSeriesDataSet.from_dataset(training, train, predict=True, stop_randomization=True)
|
497 |
+
|
498 |
+
# create dataloaders for model
|
499 |
+
batch_size = 128 # set this between 32 to 128
|
500 |
+
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
|
501 |
+
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)
|
502 |
+
|
503 |
+
#let's see how a naive model does
|
504 |
+
|
505 |
+
actuals = torch.cat([y for x, (y, weight) in iter(val_dataloader)])#.cuda()
|
506 |
+
baseline_predictions = Baseline().predict(val_dataloader)#.cuda()
|
507 |
+
(actuals - baseline_predictions).abs().mean().item()
|
508 |
+
|
509 |
+
sm = SMAPE()
|
510 |
+
|
511 |
+
print(f"Median loss for naive prediction on validation: {sm.loss(actuals, baseline_predictions).mean(axis = 1).median().item()}")
|
512 |
+
|
513 |
+
early_stop_callback = EarlyStopping(monitor="train_loss", min_delta=1e-2, patience=PATIENCE, verbose=False, mode="min")
|
514 |
+
lr_logger = LearningRateMonitor() # log the learning rate
|
515 |
+
logger = TensorBoardLogger("lightning_logs") # logging results to a tensorboard
|
516 |
+
|
517 |
+
trainer = pl.Trainer(
|
518 |
+
max_epochs=1,
|
519 |
+
accelerator=ACCELERATOR,
|
520 |
+
enable_model_summary=False,
|
521 |
+
gradient_clip_val=0.25,
|
522 |
+
limit_train_batches=10, # coment in for training, running valiation every 30 batches
|
523 |
+
#fast_dev_run=True, # comment in to check that networkor dataset has no serious bugs
|
524 |
+
callbacks=[lr_logger, early_stop_callback],
|
525 |
+
logger=logger,
|
526 |
+
)
|
527 |
+
|
528 |
+
tft = TemporalFusionTransformer.from_dataset(
|
529 |
+
training,
|
530 |
+
learning_rate=LEARNING_RATE,
|
531 |
+
lstm_layers=2,
|
532 |
+
hidden_size=16,
|
533 |
+
attention_head_size=2,
|
534 |
+
dropout=0.2,
|
535 |
+
hidden_continuous_size=8,
|
536 |
+
output_size=1, # 7 quantiles by default
|
537 |
+
loss=SMAPE(),
|
538 |
+
log_interval=10, # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
|
539 |
+
reduce_on_plateau_patience=4
|
540 |
+
)
|
541 |
+
|
542 |
+
tft.to(DEVICE)
|
543 |
+
trainer.fit(
|
544 |
+
tft,
|
545 |
+
train_dataloaders=train_dataloader,
|
546 |
+
val_dataloaders=val_dataloader,
|
547 |
+
)
|
548 |
+
#torch.cuda.empty_cache()
|
549 |
+
#print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")
|
550 |
+
|
551 |
+
if OPTUNA:
|
552 |
+
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters
|
553 |
+
|
554 |
+
# create study
|
555 |
+
study = optimize_hyperparameters(
|
556 |
+
train_dataloader,
|
557 |
+
val_dataloader,
|
558 |
+
model_path="optuna_test",
|
559 |
+
n_trials=5,
|
560 |
+
max_epochs=MAX_EPOCHS,
|
561 |
+
gradient_clip_val_range=(0.01, 0.3),
|
562 |
+
hidden_size_range=(8, 24),
|
563 |
+
hidden_continuous_size_range=(8, 12),
|
564 |
+
attention_head_size_range=(2, 4),
|
565 |
+
learning_rate_range=(0.01, 0.05),
|
566 |
+
dropout_range=(0.1, 0.25),
|
567 |
+
trainer_kwargs=dict(limit_train_batches=20),
|
568 |
+
reduce_on_plateau_patience=4,
|
569 |
+
pruner=optuna.pruners.MedianPruner(n_min_trials=3, n_startup_trials=3),
|
570 |
+
use_learning_rate_finder=False, # use Optuna to find ideal learning rate or use in-built learning rate finder
|
571 |
+
)
|
572 |
+
#torch.cuda.empty_cache()
|
573 |
+
#'''
|
574 |
+
trainer = pl.Trainer(
|
575 |
+
max_epochs=MAX_EPOCHS,
|
576 |
+
accelerator=ACCELERATOR,
|
577 |
+
enable_model_summary=False,
|
578 |
+
gradient_clip_val=study.best_params['gradient_clip_val'],
|
579 |
+
limit_train_batches=20, # coment in for training, running valiation every 30 batches
|
580 |
+
#fast_dev_run=True, # comment in to check that networkor dataset has no serious bugs
|
581 |
+
callbacks=[lr_logger, early_stop_callback],
|
582 |
+
logger=logger,
|
583 |
+
)
|
584 |
+
|
585 |
+
tft = TemporalFusionTransformer.from_dataset(
|
586 |
+
training,
|
587 |
+
learning_rate=study.best_params['learning_rate'],
|
588 |
+
lstm_layers=2,
|
589 |
+
hidden_size=study.best_params['hidden_size'],
|
590 |
+
attention_head_size=study.best_params['attention_head_size'],
|
591 |
+
dropout=study.best_params['dropout'],
|
592 |
+
hidden_continuous_size=study.best_params['hidden_continuous_size'],
|
593 |
+
output_size=1, # 7 quantiles by default
|
594 |
+
loss=SMAPE(),
|
595 |
+
log_interval=10, # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
|
596 |
+
reduce_on_plateau_patience=4
|
597 |
+
)
|
598 |
+
|
599 |
+
tft.to(DEVICE)
|
600 |
+
trainer.fit(
|
601 |
+
tft,
|
602 |
+
train_dataloaders=train_dataloader,
|
603 |
+
val_dataloaders=val_dataloader,
|
604 |
+
)
|
605 |
+
#'''
|
606 |
+
#torch.cuda.empty_cache()
|
607 |
+
best_model_path = trainer.checkpoint_callback.best_model_path
|
608 |
+
best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)
|
609 |
+
actuals = torch.cat([y[0] for x, y in iter(val_dataloader)])#.cuda()
|
610 |
+
predictions = best_tft.predict(val_dataloader, mode="prediction")
|
611 |
+
raw_predictions = best_tft.predict(val_dataloader, mode="raw", return_x=True)
|
612 |
+
|
613 |
+
sm = SMAPE()
|
614 |
+
print(f"Validation median SMAPE loss: {sm.loss(actuals, predictions).mean(axis = 1).median().item()}")
|
615 |
+
prax[5] = sm.loss(actuals, predictions).mean(axis = 1).median().item()
|
616 |
+
#best_tft.plot_prediction(raw_predictions.x, raw_predictions.output, idx=0, add_loss_to_title=True);
|
617 |
+
|
618 |
+
print(raw_predictions[0][0])
|
619 |
+
prax[3] = '-'
|
620 |
+
prax[4] = raw_predictions[0][0].data.cpu().tolist()[0][0]
|
621 |
+
t = prax[4]
|
622 |
+
tm = data['Close'][len(data)-1]
|
623 |
+
if(t-tm>0):
|
624 |
+
prax[6] = 1
|
625 |
+
elif(t-tm==0):
|
626 |
+
prax[6] = 0
|
627 |
+
else:
|
628 |
+
prax[6] = -1
|
629 |
+
#prax[i][3] = raw_predictions[0][0].data[1]
|
630 |
+
print("-----------")
|
631 |
+
|
632 |
+
#with open("out.csv", "w", newline="") as f:
|
633 |
+
# writer = csv.writer(f)
|
634 |
+
# writer.writerows(prax)
|
635 |
+
|
636 |
+
# %%
|
637 |
+
# Function to train the model (TFT)
|
638 |
+
def modelTFT_OpenGap(csv_file, prax):
|
639 |
+
train = csv_file
|
640 |
+
#test = pd.read_csv("/kaggle/input/artemis-test/nifty_daily.csv")
|
641 |
+
train['date'] = pd.to_datetime(train['Date/Time'])
|
642 |
+
#test['date'] = pd.to_datetime(test['Date'])
|
643 |
+
datLength = len(train)
|
644 |
+
train['O-C'] = 0
|
645 |
+
for i in range(datLength):
|
646 |
+
if i == 0:
|
647 |
+
train['O-C'][i] = 0
|
648 |
+
continue
|
649 |
+
else:
|
650 |
+
train['O-C'][i] = train['Open'][i] - train['Close'][i-1]
|
651 |
+
data = pd.concat([train], axis = 0, ignore_index=True)
|
652 |
+
# Check that key is country-store-product-date combination
|
653 |
+
#assert len(data.drop_duplicates(['country', 'store', 'product', 'date'])) == len(data)
|
654 |
+
# Check that there is one date per country-store-product combination
|
655 |
+
#assert len(data.drop_duplicates(['country', 'store', 'product'])) == len(data)//data['date'].nunique()
|
656 |
+
|
657 |
+
#display(train.sample(4))
|
658 |
+
|
659 |
+
"""<a id ="3"></a><h3 style="background:#0554f2; border:0; border-radius: 4px; color:#f5f6f7">Model Implementation in Pytorch-Forecasting </h3>"""
|
660 |
+
|
661 |
+
# Add a time_idx (an sequence of consecutive integers that goes from min to max date)
|
662 |
+
|
663 |
+
data = (data.merge((data[['Date/Time']].drop_duplicates(ignore_index=True)
|
664 |
+
.rename_axis('time_idx')).reset_index(), on = ['Date/Time']))
|
665 |
+
# add additional features
|
666 |
+
data["day_of_week"] = data['date'].dt.dayofweek.astype(str).astype("category") # categories have be strings
|
667 |
+
data["week_of_year"] = data['date'].dt.isocalendar().week.astype(str).astype("category") # categories have be strings
|
668 |
+
data["month"] = data['date'].dt.month.astype(str).astype("category") # categories have be strings
|
669 |
+
#data["log_num_sold"] = np.log(data.num_sold + 1e-8)
|
670 |
+
#data["avg_volume_by_country"] = data.groupby(["time_idx", "country"], observed=True).num_sold.transform("mean")
|
671 |
+
#data["avg_volume_by_store"] = data.groupby(["time_idx", "store"], observed=True).num_sold.transform("mean")
|
672 |
+
#data["avg_volume_by_product"] = data.groupby(["time_idx", "product"], observed=True).num_sold.transform("mean")
|
673 |
+
|
674 |
+
#unique_dates_country = data[['date', 'Ticker']].drop_duplicates(ignore_index = True)
|
675 |
+
#unique_dates_country['is_holiday'] = (unique_dates_country
|
676 |
+
# .apply(lambda x: x.date in holidays.country_holidays(x.country), axis = 1).astype('category'))
|
677 |
+
#unique_dates_country['is_holiday_lead_1'] = (unique_dates_country
|
678 |
+
# .apply(lambda x: x.date+pd.Timedelta(days=1) in holidays.country_holidays(x.country), axis = 1).astype('category'))
|
679 |
+
#unique_dates_country['is_holiday_lead_2'] = (unique_dates_country
|
680 |
+
# .apply(lambda x: x.date+pd.Timedelta(days=2) in holidays.country_holidays(x.country), axis = 1).astype('category'))
|
681 |
+
#unique_dates_country['is_holiday_lag_1'] = (unique_dates_country
|
682 |
+
# .apply(lambda x: x.date-pd.Timedelta(days=1) in holidays.country_holidays(x.country), axis = 1).astype('category'))
|
683 |
+
#unique_dates_country['is_holiday_lag_2'] = (unique_dates_country
|
684 |
+
# .apply(lambda x: x.date-pd.Timedelta(days=2) in holidays.country_holidays(x.country), axis = 1).astype('category'))
|
685 |
+
#data = data.merge(unique_dates_country, on = ['date', 'Ticker'], validate = "m:1")
|
686 |
+
#del unique_dates_country
|
687 |
+
gc.collect()
|
688 |
+
data.sample(5, random_state=30)
|
689 |
+
|
690 |
+
train = data.iloc[:len(train)]
|
691 |
+
test = data.iloc[len(train):]
|
692 |
+
|
693 |
+
max_prediction_length = 2
|
694 |
+
max_encoder_length = train.date.nunique()
|
695 |
+
training_cutoff = train["time_idx"].max() - max_prediction_length #we will validate on 2020
|
696 |
+
|
697 |
+
# Let's create a Dataset
|
698 |
+
training = TimeSeriesDataSet(
|
699 |
+
train[lambda x: x.time_idx <= training_cutoff],
|
700 |
+
time_idx="time_idx",
|
701 |
+
target="Close",
|
702 |
+
group_ids=["Ticker"],
|
703 |
+
min_encoder_length=max_prediction_length, # keep encoder length long (as it is in the validation set)
|
704 |
+
max_encoder_length=max_encoder_length,
|
705 |
+
max_prediction_length=max_prediction_length,
|
706 |
+
static_categoricals=["Ticker"],
|
707 |
+
time_varying_known_categoricals=["month", "week_of_year", "day_of_week"],
|
708 |
+
#variable_groups={"is_holiday": ["is_holiday"]}, # group of categorical variables can be treated as one variable
|
709 |
+
time_varying_known_reals=["time_idx"],
|
710 |
+
time_varying_unknown_categoricals=[],
|
711 |
+
time_varying_unknown_reals=[
|
712 |
+
'Open','High','Low','Close','OI','RSI14','RSI44','HHRSI','Rsi Weekly','LLCHHV','white','Vap44','Vap14','Ema5','Ema20','Ema50','Ema200', 'O-C'
|
713 |
+
],
|
714 |
+
target_normalizer=GroupNormalizer(
|
715 |
+
groups=['Ticker'], transformation="softplus"
|
716 |
+
), # use softplus and normalize by group
|
717 |
+
categorical_encoders={
|
718 |
+
'week_of_year':NaNLabelEncoder(add_nan=True)
|
719 |
+
},
|
720 |
+
#lags={'num_sold': [7, 30, 365]},
|
721 |
+
add_relative_time_idx=True,
|
722 |
+
add_target_scales=True,
|
723 |
+
add_encoder_length=True,
|
724 |
+
)
|
725 |
+
|
726 |
+
# create validation set (predict=True) which means to predict the last max_prediction_length points in time
|
727 |
+
# for each series
|
728 |
+
validation = TimeSeriesDataSet.from_dataset(training, train, predict=True, stop_randomization=True)
|
729 |
+
|
730 |
+
# create dataloaders for model
|
731 |
+
batch_size = 128 # set this between 32 to 128
|
732 |
+
train_dataloader = training.to_dataloader(train=True, batch_size=batch_size, num_workers=0)
|
733 |
+
val_dataloader = validation.to_dataloader(train=False, batch_size=batch_size * 10, num_workers=0)
|
734 |
+
|
735 |
+
#let's see how a naive model does
|
736 |
+
|
737 |
+
actuals = torch.cat([y for x, (y, weight) in iter(val_dataloader)])#.cuda()
|
738 |
+
baseline_predictions = Baseline().predict(val_dataloader)#.cuda()
|
739 |
+
(actuals - baseline_predictions).abs().mean().item()
|
740 |
+
|
741 |
+
sm = SMAPE()
|
742 |
+
|
743 |
+
print(f"Median loss for naive prediction on validation: {sm.loss(actuals, baseline_predictions).mean(axis = 1).median().item()}")
|
744 |
+
|
745 |
+
early_stop_callback = EarlyStopping(monitor="train_loss", min_delta=1e-2, patience=PATIENCE, verbose=False, mode="min")
|
746 |
+
lr_logger = LearningRateMonitor() # log the learning rate
|
747 |
+
logger = TensorBoardLogger("lightning_logs") # logging results to a tensorboard
|
748 |
+
|
749 |
+
trainer = pl.Trainer(
|
750 |
+
max_epochs=1,
|
751 |
+
accelerator=ACCELERATOR,
|
752 |
+
enable_model_summary=False,
|
753 |
+
gradient_clip_val=0.25,
|
754 |
+
limit_train_batches=10, # coment in for training, running valiation every 30 batches
|
755 |
+
#fast_dev_run=True, # comment in to check that networkor dataset has no serious bugs
|
756 |
+
callbacks=[lr_logger, early_stop_callback],
|
757 |
+
logger=logger,
|
758 |
+
)
|
759 |
+
|
760 |
+
tft = TemporalFusionTransformer.from_dataset(
|
761 |
+
training,
|
762 |
+
learning_rate=LEARNING_RATE,
|
763 |
+
lstm_layers=2,
|
764 |
+
hidden_size=16,
|
765 |
+
attention_head_size=2,
|
766 |
+
dropout=0.2,
|
767 |
+
hidden_continuous_size=8,
|
768 |
+
output_size=1, # 7 quantiles by default
|
769 |
+
loss=SMAPE(),
|
770 |
+
log_interval=10, # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
|
771 |
+
reduce_on_plateau_patience=4
|
772 |
+
)
|
773 |
+
|
774 |
+
tft.to(DEVICE)
|
775 |
+
trainer.fit(
|
776 |
+
tft,
|
777 |
+
train_dataloaders=train_dataloader,
|
778 |
+
val_dataloaders=val_dataloader,
|
779 |
+
)
|
780 |
+
#torch.cuda.empty_cache()
|
781 |
+
#print(f"Number of parameters in network: {tft.size()/1e3:.1f}k")
|
782 |
+
|
783 |
+
if OPTUNA:
|
784 |
+
from pytorch_forecasting.models.temporal_fusion_transformer.tuning import optimize_hyperparameters
|
785 |
+
|
786 |
+
# create study
|
787 |
+
study = optimize_hyperparameters(
|
788 |
+
train_dataloader,
|
789 |
+
val_dataloader,
|
790 |
+
model_path="optuna_test",
|
791 |
+
n_trials=5,
|
792 |
+
max_epochs=MAX_EPOCHS,
|
793 |
+
gradient_clip_val_range=(0.01, 0.3),
|
794 |
+
hidden_size_range=(8, 24),
|
795 |
+
hidden_continuous_size_range=(8, 12),
|
796 |
+
attention_head_size_range=(2, 4),
|
797 |
+
learning_rate_range=(0.01, 0.05),
|
798 |
+
dropout_range=(0.1, 0.25),
|
799 |
+
trainer_kwargs=dict(limit_train_batches=20),
|
800 |
+
reduce_on_plateau_patience=4,
|
801 |
+
pruner=optuna.pruners.MedianPruner(n_min_trials=3, n_warmup_steps=3),
|
802 |
+
use_learning_rate_finder=False, # use Optuna to find ideal learning rate or use in-built learning rate finder
|
803 |
+
)
|
804 |
+
#torch.cuda.empty_cache()
|
805 |
+
#'''
|
806 |
+
trainer = pl.Trainer(
|
807 |
+
max_epochs=MAX_EPOCHS,
|
808 |
+
accelerator=ACCELERATOR,
|
809 |
+
enable_model_summary=False,
|
810 |
+
gradient_clip_val=study.best_params['gradient_clip_val'],
|
811 |
+
limit_train_batches=20, # coment in for training, running valiation every 30 batches
|
812 |
+
#fast_dev_run=True, # comment in to check that networkor dataset has no serious bugs
|
813 |
+
callbacks=[lr_logger, early_stop_callback],
|
814 |
+
logger=logger,
|
815 |
+
)
|
816 |
+
|
817 |
+
tft = TemporalFusionTransformer.from_dataset(
|
818 |
+
training,
|
819 |
+
learning_rate=study.best_params['learning_rate'],
|
820 |
+
lstm_layers=2,
|
821 |
+
hidden_size=study.best_params['hidden_size'],
|
822 |
+
attention_head_size=study.best_params['attention_head_size'],
|
823 |
+
dropout=study.best_params['dropout'],
|
824 |
+
hidden_continuous_size=study.best_params['hidden_continuous_size'],
|
825 |
+
output_size=1, # 7 quantiles by default
|
826 |
+
loss=SMAPE(),
|
827 |
+
log_interval=10, # uncomment for learning rate finder and otherwise, e.g. to 10 for logging every 10 batches
|
828 |
+
reduce_on_plateau_patience=4
|
829 |
+
)
|
830 |
+
|
831 |
+
tft.to(DEVICE)
|
832 |
+
trainer.fit(
|
833 |
+
tft,
|
834 |
+
train_dataloaders=train_dataloader,
|
835 |
+
val_dataloaders=val_dataloader,
|
836 |
+
)
|
837 |
+
#'''
|
838 |
+
#torch.cuda.empty_cache()
|
839 |
+
best_model_path = trainer.checkpoint_callback.best_model_path
|
840 |
+
best_tft = TemporalFusionTransformer.load_from_checkpoint(best_model_path)
|
841 |
+
actuals = torch.cat([y[0] for x, y in iter(val_dataloader)])#.cuda()
|
842 |
+
predictions = best_tft.predict(val_dataloader, mode="prediction")
|
843 |
+
raw_predictions = best_tft.predict(val_dataloader, mode="raw", return_x=True)
|
844 |
+
|
845 |
+
sm = SMAPE()
|
846 |
+
print(f"Validation median SMAPE loss: {sm.loss(actuals, predictions).mean(axis = 1).median().item()}")
|
847 |
+
prax[5] = sm.loss(actuals, predictions).mean(axis = 1).median().item()
|
848 |
+
#best_tft.plot_prediction(raw_predictions.x, raw_predictions.output, idx=0, add_loss_to_title=True);
|
849 |
+
|
850 |
+
print(raw_predictions[0][0])
|
851 |
+
prax[3] = '-'
|
852 |
+
prax[4] = raw_predictions[0][0].data.cpu().tolist()[0][0]
|
853 |
+
t = prax[4]
|
854 |
+
tm = data['Close'][len(data)-1]
|
855 |
+
if(t-tm>0):
|
856 |
+
prax[6] = 1
|
857 |
+
elif(t-tm==0):
|
858 |
+
prax[6] = 0
|
859 |
+
else:
|
860 |
+
prax[6] = -1
|
861 |
+
#prax[i][3] = raw_predictions[0][0].data[1]
|
862 |
+
print("-----------")
|
863 |
+
|
864 |
+
#with open("out.csv", "w", newline="") as f:
|
865 |
+
# writer = csv.writer(f)
|
866 |
+
# writer.writerows(prax)
|
867 |
+
|
868 |
+
# %%
|
869 |
+
def generate_csv(data_list):
|
870 |
+
today = date.today().strftime("%Y_%m_%d")
|
871 |
+
filename = f"result_{today}.csv"
|
872 |
+
file_exists = os.path.isfile(filename)
|
873 |
+
with open(filename, mode='a', newline='') as csv_file:
|
874 |
+
fieldnames = ['Ticker', 'Prev_Close_Real', 'Model', 'Prev_Close_Model', 'Close_Model', 'Max_Err', 'Up_Down' ] # replace with your own column names
|
875 |
+
writer = csv.writer(csv_file, delimiter=',')
|
876 |
+
if not file_exists:
|
877 |
+
writer.writerow(fieldnames) # file doesn't exist yet, write a header
|
878 |
+
writer.writerow(data_list)
|
879 |
+
csv_file.close()
|
880 |
+
|
881 |
+
def guess_date(string):
|
882 |
+
for fmt in ["%Y/%m/%d", "%d-%m-%Y", "%Y%m%d", "%m/%d/%Y", "%d/%m/%Y", "%Y-%m-%d", "%d/%m/%y", "%m/%d/%y"]:
|
883 |
+
try:
|
884 |
+
return datetime.datetime.strptime(string, fmt).date()
|
885 |
+
except ValueError:
|
886 |
+
continue
|
887 |
+
raise ValueError(string)
|
888 |
+
|
889 |
+
# %%
|
890 |
+
# Main function
|
891 |
+
def main():
|
892 |
+
# Data loading
|
893 |
+
path = "./demo"
|
894 |
+
print("Searching CSV files in ", path, "...")
|
895 |
+
# path = "/kaggle/input/artemis-test"
|
896 |
+
|
897 |
+
# Get a list of all the CSV files in the folder
|
898 |
+
csv_files = glob.glob(path + "/*.csv")
|
899 |
+
prax = [0,0,0,0,0,0,0]
|
900 |
+
# Create a list of DataFrames, one for each CSV file
|
901 |
+
dfs = []
|
902 |
+
c = 0
|
903 |
+
for csv_file in csv_files:
|
904 |
+
df = pd.read_csv(csv_file)
|
905 |
+
dfs.append(df)
|
906 |
+
c = c + 1
|
907 |
+
|
908 |
+
if c == 0:
|
909 |
+
print("No CSV files found in ", path, ".")
|
910 |
+
print("Exiting...")
|
911 |
+
|
912 |
+
for df in dfs:
|
913 |
+
#print(df.head())
|
914 |
+
print(df['Ticker'][0])
|
915 |
+
prax[0] = df['Ticker'][0]
|
916 |
+
prax[1] = df['Close'][len(df)-1]
|
917 |
+
print('------------------')
|
918 |
+
#df = df.drop(['Volume'], axis=1)
|
919 |
+
for i in range(len(df)):
|
920 |
+
x = guess_date(df['Date/Time'][i])
|
921 |
+
df['Date/Time'][i] = x.strftime("%Y-%m-%d")
|
922 |
+
df['Date/Time'] = pd.to_datetime(df['Date/Time'])
|
923 |
+
df.fillna(0, inplace=True)
|
924 |
+
modelTFT(df, prax)
|
925 |
+
prax[2] = "TFT"
|
926 |
+
generate_csv(prax)
|
927 |
+
prax = [0,0,0,0,0,0,0]
|
928 |
+
modelTFT_OpenGap(df, prax)
|
929 |
+
prax[2] = "TFT_OpenGap"
|
930 |
+
generate_csv(prax)
|
931 |
+
#df.set_index('Date/Time', inplace=True)
|
932 |
+
df = df.drop(['Date/Time'], axis=1)
|
933 |
+
prax = [0,0,0,0,0,0,0]
|
934 |
+
modelCNNLSTM(df, prax)
|
935 |
+
prax[2] = "CNNLSTM"
|
936 |
+
generate_csv(prax)
|
937 |
+
prax = [0,0,0,0,0,0,0]
|
938 |
+
modelCNNLSTM_OpenGap(df, prax)
|
939 |
+
prax[2] = "CNNLSTM_OpenGap"
|
940 |
+
#print("Saving to CSV...Standby...")
|
941 |
+
generate_csv(prax)
|
942 |
+
# Generate blank line
|
943 |
+
prax=["","","","","","",""]
|
944 |
+
generate_csv(prax)
|
945 |
+
# Reset prax
|
946 |
+
prax = [0,0,0,0,0,0,0]
|
947 |
+
|
948 |
+
if __name__ == "__main__":
|
949 |
+
main()
|
950 |
+
|
951 |
+
print("Deleting temporary log files...")
|
952 |
+
# Delete "lightning_logs" directory
|
953 |
+
if os.path.exists("lightning_logs"):
|
954 |
+
shutil.rmtree("lightning_logs")
|
955 |
+
# Delete "optuna_test" directory
|
956 |
+
if os.path.exists("optuna_test"):
|
957 |
+
shutil.rmtree("optuna_test")
|
app.py
CHANGED
@@ -108,9 +108,9 @@ def objective(trial, X_train, y_train, X_test, y_test):
|
|
108 |
def modelCNNLSTM(csv_file, prax):
|
109 |
# Read the data
|
110 |
df = csv_file
|
111 |
-
df = df['Date/Time'].values.astype("float64")
|
112 |
-
temp_data = df.iloc[0:len(df)-100, 1:
|
113 |
-
trek = df.iloc[len(df)-100:,1:
|
114 |
#print(temp_data)
|
115 |
data = temp_data
|
116 |
sc = MinMaxScaler()
|
@@ -148,9 +148,9 @@ def modelCNNLSTM(csv_file, prax):
|
|
148 |
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
|
149 |
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
|
150 |
|
151 |
-
study = optuna.create_study(direction="minimize", pruner=optuna.pruners.MedianPruner(n_min_trials=
|
152 |
fn = lambda trial: objective(trial, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test)
|
153 |
-
study.optimize(fn, n_trials=
|
154 |
|
155 |
best_params = study.best_params
|
156 |
#print(f"Best params: {best_params}")
|
@@ -219,7 +219,7 @@ def modelCNNLSTM(csv_file, prax):
|
|
219 |
#print(f'> Loss: {np.mean(loss_per_fold)}')
|
220 |
#print('------------------------------------------------------------------------')
|
221 |
|
222 |
-
trek = df.iloc[0:len(df), 1:
|
223 |
Y = trek[0:len(trek)]
|
224 |
YP = trek[1:len(trek)]
|
225 |
Y1 = Y['Close']
|
@@ -264,8 +264,8 @@ def modelCNNLSTM_OpenGap(csv_file, prax):
|
|
264 |
continue
|
265 |
else:
|
266 |
df['O-C'][i] = df['Open'][i] - df['Close'][i-1]
|
267 |
-
temp_data = df.iloc[0:datLength-100, 1:
|
268 |
-
trek = df.iloc[datLength-100:,1:
|
269 |
#print(temp_data)
|
270 |
data = temp_data
|
271 |
#data = data.values.astype("float64")
|
@@ -305,9 +305,9 @@ def modelCNNLSTM_OpenGap(csv_file, prax):
|
|
305 |
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
|
306 |
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
|
307 |
|
308 |
-
study = optuna.create_study(direction="minimize", pruner=optuna.pruners.MedianPruner(n_min_trials=
|
309 |
fn = lambda trial: objective(trial, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test)
|
310 |
-
study.optimize(fn, n_trials=
|
311 |
|
312 |
best_params = study.best_params
|
313 |
#print(f"Best params: {best_params}")
|
@@ -376,7 +376,7 @@ def modelCNNLSTM_OpenGap(csv_file, prax):
|
|
376 |
#print(f'> Loss: {np.mean(loss_per_fold)}')
|
377 |
#print('------------------------------------------------------------------------')
|
378 |
|
379 |
-
trek = df.iloc[0:len(df), 1:
|
380 |
Y = trek[0:len(trek)]
|
381 |
YP = trek[1:len(trek)]
|
382 |
Y1 = Y['Close']
|
@@ -477,7 +477,7 @@ def modelTFT(csv_file, prax):
|
|
477 |
time_varying_known_reals=["time_idx"],
|
478 |
time_varying_unknown_categoricals=[],
|
479 |
time_varying_unknown_reals=[
|
480 |
-
'Open','High','Low','Close','OI','RSI14','RSI44','HHRSI','Rsi Weekly','LLCHHV','white','Vap44','Vap14','
|
481 |
],
|
482 |
target_normalizer=GroupNormalizer(
|
483 |
groups=['Ticker'], transformation="softplus"
|
@@ -709,7 +709,7 @@ def modelTFT_OpenGap(csv_file, prax):
|
|
709 |
time_varying_known_reals=["time_idx"],
|
710 |
time_varying_unknown_categoricals=[],
|
711 |
time_varying_unknown_reals=[
|
712 |
-
'Open','High','Low','Close','OI','RSI14','RSI44','HHRSI','Rsi Weekly','LLCHHV','white','Vap44','Vap14','
|
713 |
],
|
714 |
target_normalizer=GroupNormalizer(
|
715 |
groups=['Ticker'], transformation="softplus"
|
@@ -867,7 +867,8 @@ def modelTFT_OpenGap(csv_file, prax):
|
|
867 |
|
868 |
# %%
|
869 |
def generate_csv(data_list):
|
870 |
-
|
|
|
871 |
file_exists = os.path.isfile(filename)
|
872 |
with open(filename, mode='a', newline='') as csv_file:
|
873 |
fieldnames = ['Ticker', 'Prev_Close_Real', 'Model', 'Prev_Close_Model', 'Close_Model', 'Max_Err', 'Up_Down' ] # replace with your own column names
|
@@ -936,7 +937,6 @@ def main(files):
|
|
936 |
|
937 |
gradioApp = gr.Interface(fn=main, inputs=gr.File(file_count="multiple", file_type=".csv"), outputs="file")
|
938 |
|
939 |
-
|
940 |
if __name__ == "__main__":
|
941 |
# Calling main function
|
942 |
gradioApp.launch()
|
|
|
108 |
def modelCNNLSTM(csv_file, prax):
|
109 |
# Read the data
|
110 |
df = csv_file
|
111 |
+
#df = df['Date/Time'].values.astype("float64")
|
112 |
+
temp_data = df.iloc[0:len(df)-100, 1:21]
|
113 |
+
trek = df.iloc[len(df)-100:,1:21]
|
114 |
#print(temp_data)
|
115 |
data = temp_data
|
116 |
sc = MinMaxScaler()
|
|
|
148 |
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
|
149 |
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
|
150 |
|
151 |
+
study = optuna.create_study(direction="minimize", pruner=optuna.pruners.MedianPruner(n_min_trials=4, n_startup_trials=4))
|
152 |
fn = lambda trial: objective(trial, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test)
|
153 |
+
study.optimize(fn, n_trials=5)
|
154 |
|
155 |
best_params = study.best_params
|
156 |
#print(f"Best params: {best_params}")
|
|
|
219 |
#print(f'> Loss: {np.mean(loss_per_fold)}')
|
220 |
#print('------------------------------------------------------------------------')
|
221 |
|
222 |
+
trek = df.iloc[0:len(df), 1:21]
|
223 |
Y = trek[0:len(trek)]
|
224 |
YP = trek[1:len(trek)]
|
225 |
Y1 = Y['Close']
|
|
|
264 |
continue
|
265 |
else:
|
266 |
df['O-C'][i] = df['Open'][i] - df['Close'][i-1]
|
267 |
+
temp_data = df.iloc[0:datLength-100, 1:22]
|
268 |
+
trek = df.iloc[datLength-100:,1:22]
|
269 |
#print(temp_data)
|
270 |
data = temp_data
|
271 |
#data = data.values.astype("float64")
|
|
|
305 |
X_train = X_train.reshape((X_train.shape[0], X_train.shape[1], 1))
|
306 |
X_test = X_test.reshape((X_test.shape[0], X_test.shape[1], 1))
|
307 |
|
308 |
+
study = optuna.create_study(direction="minimize", pruner=optuna.pruners.MedianPruner(n_min_trials=2, n_startup_trials=2))
|
309 |
fn = lambda trial: objective(trial, X_train=X_train, X_test=X_test, y_train=y_train, y_test=y_test)
|
310 |
+
study.optimize(fn, n_trials=5)
|
311 |
|
312 |
best_params = study.best_params
|
313 |
#print(f"Best params: {best_params}")
|
|
|
376 |
#print(f'> Loss: {np.mean(loss_per_fold)}')
|
377 |
#print('------------------------------------------------------------------------')
|
378 |
|
379 |
+
trek = df.iloc[0:len(df), 1:22]
|
380 |
Y = trek[0:len(trek)]
|
381 |
YP = trek[1:len(trek)]
|
382 |
Y1 = Y['Close']
|
|
|
477 |
time_varying_known_reals=["time_idx"],
|
478 |
time_varying_unknown_categoricals=[],
|
479 |
time_varying_unknown_reals=[
|
480 |
+
'Open','High','Low','Close','OI','RSI14','RSI44','HHRSI','Rsi Weekly','LLCHHV','white','Vap44','Vap14','Ema5','Ema20','Ema50','Ema200'
|
481 |
],
|
482 |
target_normalizer=GroupNormalizer(
|
483 |
groups=['Ticker'], transformation="softplus"
|
|
|
709 |
time_varying_known_reals=["time_idx"],
|
710 |
time_varying_unknown_categoricals=[],
|
711 |
time_varying_unknown_reals=[
|
712 |
+
'Open','High','Low','Close','OI','RSI14','RSI44','HHRSI','Rsi Weekly','LLCHHV','white','Vap44','Vap14','Ema5','Ema20','Ema50','Ema200', 'O-C'
|
713 |
],
|
714 |
target_normalizer=GroupNormalizer(
|
715 |
groups=['Ticker'], transformation="softplus"
|
|
|
867 |
|
868 |
# %%
|
869 |
def generate_csv(data_list):
|
870 |
+
today = date.today().strftime("%Y_%m_%d")
|
871 |
+
filename = f"result_{today}.csv"
|
872 |
file_exists = os.path.isfile(filename)
|
873 |
with open(filename, mode='a', newline='') as csv_file:
|
874 |
fieldnames = ['Ticker', 'Prev_Close_Real', 'Model', 'Prev_Close_Model', 'Close_Model', 'Max_Err', 'Up_Down' ] # replace with your own column names
|
|
|
937 |
|
938 |
gradioApp = gr.Interface(fn=main, inputs=gr.File(file_count="multiple", file_type=".csv"), outputs="file")
|
939 |
|
|
|
940 |
if __name__ == "__main__":
|
941 |
# Calling main function
|
942 |
gradioApp.launch()
|
demo/sample.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|