[ update ]: remove asyncio features
Browse files- .github/workflows/gru_pipeline.yaml +1 -1
- .github/workflows/lstm_gru_pipeline.yaml +1 -1
- .github/workflows/lstm_pipeline.yaml +1 -1
- dev.requirements.txt +18 -12
- training/data_processor.pyx +4 -4
- training/main.pyx +11 -11
- training/model_builder.pyx +3 -3
- training/post_processor.pyx +2 -2
- training/trainer.pyx +1 -1
- trainingcli.py +18 -10
.github/workflows/gru_pipeline.yaml
CHANGED
@@ -74,7 +74,7 @@ jobs:
|
|
74 |
fetch-depth: 1
|
75 |
|
76 |
- name: Install Libraries
|
77 |
-
run: pip install -r requirements.txt
|
78 |
|
79 |
- name: Download Artifact (datasets)
|
80 |
uses: actions/download-artifact@v3
|
|
|
74 |
fetch-depth: 1
|
75 |
|
76 |
- name: Install Libraries
|
77 |
+
run: pip install -r dev.requirements.txt
|
78 |
|
79 |
- name: Download Artifact (datasets)
|
80 |
uses: actions/download-artifact@v3
|
.github/workflows/lstm_gru_pipeline.yaml
CHANGED
@@ -74,7 +74,7 @@ jobs:
|
|
74 |
fetch-depth: 1
|
75 |
|
76 |
- name: Install Libraries
|
77 |
-
run: pip install -r requirements.txt
|
78 |
|
79 |
- name: Download Artifact (datasets)
|
80 |
uses: actions/download-artifact@v3
|
|
|
74 |
fetch-depth: 1
|
75 |
|
76 |
- name: Install Libraries
|
77 |
+
run: pip install -r dev.requirements.txt
|
78 |
|
79 |
- name: Download Artifact (datasets)
|
80 |
uses: actions/download-artifact@v3
|
.github/workflows/lstm_pipeline.yaml
CHANGED
@@ -74,7 +74,7 @@ jobs:
|
|
74 |
fetch-depth: 1
|
75 |
|
76 |
- name: Install Libraries
|
77 |
-
run: pip install -r requirements.txt
|
78 |
|
79 |
- name: Download Artifact (datasets)
|
80 |
uses: actions/download-artifact@v3
|
|
|
74 |
fetch-depth: 1
|
75 |
|
76 |
- name: Install Libraries
|
77 |
+
run: pip install -r dev.requirements.txt
|
78 |
|
79 |
- name: Download Artifact (datasets)
|
80 |
uses: actions/download-artifact@v3
|
dev.requirements.txt
CHANGED
@@ -1,44 +1,48 @@
|
|
1 |
absl-py==2.1.0
|
|
|
2 |
annotated-types==0.7.0
|
3 |
anyio==4.4.0
|
4 |
astunparse==1.6.3
|
|
|
5 |
certifi==2024.7.4
|
6 |
charset-normalizer==3.3.2
|
7 |
click==8.1.7
|
|
|
8 |
dnspython==2.6.1
|
9 |
-
email_validator==2.
|
10 |
-
exceptiongroup==1.2.
|
11 |
fastapi==0.111.0
|
12 |
-
fastapi-cli==0.0.
|
13 |
flatbuffers==24.3.25
|
14 |
-
gast==0.
|
15 |
google-pasta==0.2.0
|
16 |
-
grpcio==1.
|
17 |
h11==0.14.0
|
18 |
h5py==3.11.0
|
19 |
httpcore==1.0.5
|
20 |
httptools==0.6.1
|
21 |
httpx==0.27.0
|
22 |
idna==3.7
|
23 |
-
importlib_metadata==7.1.0
|
24 |
Jinja2==3.1.4
|
25 |
joblib==1.4.2
|
26 |
keras==3.3.3
|
27 |
libclang==18.1.1
|
|
|
28 |
Markdown==3.6
|
29 |
markdown-it-py==3.0.0
|
30 |
MarkupSafe==2.1.5
|
31 |
mdurl==0.1.2
|
32 |
ml-dtypes==0.3.2
|
33 |
namex==0.0.8
|
|
|
34 |
numpy==1.26.4
|
35 |
opt-einsum==3.3.0
|
36 |
-
optree==0.
|
37 |
-
|
38 |
-
packaging==24.0
|
39 |
pandas==2.2.2
|
40 |
protobuf==4.25.3
|
41 |
pydantic==2.7.2
|
|
|
42 |
pydantic_core==2.18.3
|
43 |
Pygments==2.18.0
|
44 |
python-dateutil==2.9.0.post0
|
@@ -46,6 +50,8 @@ python-dotenv==1.0.1
|
|
46 |
python-multipart==0.0.9
|
47 |
pytz==2024.1
|
48 |
PyYAML==6.0.1
|
|
|
|
|
49 |
requests==2.32.3
|
50 |
rich==13.7.1
|
51 |
scikit-learn==1.5.0
|
@@ -57,18 +63,18 @@ starlette==0.37.2
|
|
57 |
tensorboard==2.16.2
|
58 |
tensorboard-data-server==0.7.2
|
59 |
tensorflow==2.16.1
|
60 |
-
tensorflow-io-gcs-filesystem==0.
|
61 |
termcolor==2.4.0
|
62 |
threadpoolctl==3.5.0
|
63 |
typer==0.12.3
|
64 |
-
typing_extensions==4.12.
|
65 |
tzdata==2024.1
|
66 |
ujson==5.10.0
|
67 |
urllib3==2.2.2
|
68 |
uvicorn==0.30.1
|
69 |
uvloop==0.19.0
|
|
|
70 |
watchfiles==0.22.0
|
71 |
websockets==12.0
|
72 |
Werkzeug==3.0.3
|
73 |
wrapt==1.16.0
|
74 |
-
zipp==3.19.1
|
|
|
1 |
absl-py==2.1.0
|
2 |
+
aioredis==2.0.1
|
3 |
annotated-types==0.7.0
|
4 |
anyio==4.4.0
|
5 |
astunparse==1.6.3
|
6 |
+
async-timeout==4.0.3
|
7 |
certifi==2024.7.4
|
8 |
charset-normalizer==3.3.2
|
9 |
click==8.1.7
|
10 |
+
Cython==3.0.10
|
11 |
dnspython==2.6.1
|
12 |
+
email_validator==2.2.0
|
13 |
+
exceptiongroup==1.2.2
|
14 |
fastapi==0.111.0
|
15 |
+
fastapi-cli==0.0.5
|
16 |
flatbuffers==24.3.25
|
17 |
+
gast==0.6.0
|
18 |
google-pasta==0.2.0
|
19 |
+
grpcio==1.65.4
|
20 |
h11==0.14.0
|
21 |
h5py==3.11.0
|
22 |
httpcore==1.0.5
|
23 |
httptools==0.6.1
|
24 |
httpx==0.27.0
|
25 |
idna==3.7
|
|
|
26 |
Jinja2==3.1.4
|
27 |
joblib==1.4.2
|
28 |
keras==3.3.3
|
29 |
libclang==18.1.1
|
30 |
+
llvmlite==0.43.0
|
31 |
Markdown==3.6
|
32 |
markdown-it-py==3.0.0
|
33 |
MarkupSafe==2.1.5
|
34 |
mdurl==0.1.2
|
35 |
ml-dtypes==0.3.2
|
36 |
namex==0.0.8
|
37 |
+
numba==0.60.0
|
38 |
numpy==1.26.4
|
39 |
opt-einsum==3.3.0
|
40 |
+
optree==0.12.1
|
41 |
+
packaging==24.1
|
|
|
42 |
pandas==2.2.2
|
43 |
protobuf==4.25.3
|
44 |
pydantic==2.7.2
|
45 |
+
pydantic-settings==2.4.0
|
46 |
pydantic_core==2.18.3
|
47 |
Pygments==2.18.0
|
48 |
python-dateutil==2.9.0.post0
|
|
|
50 |
python-multipart==0.0.9
|
51 |
pytz==2024.1
|
52 |
PyYAML==6.0.1
|
53 |
+
redis==3.5.3
|
54 |
+
redis-py-cluster==2.1.3
|
55 |
requests==2.32.3
|
56 |
rich==13.7.1
|
57 |
scikit-learn==1.5.0
|
|
|
63 |
tensorboard==2.16.2
|
64 |
tensorboard-data-server==0.7.2
|
65 |
tensorflow==2.16.1
|
66 |
+
tensorflow-io-gcs-filesystem==0.37.1
|
67 |
termcolor==2.4.0
|
68 |
threadpoolctl==3.5.0
|
69 |
typer==0.12.3
|
70 |
+
typing_extensions==4.12.2
|
71 |
tzdata==2024.1
|
72 |
ujson==5.10.0
|
73 |
urllib3==2.2.2
|
74 |
uvicorn==0.30.1
|
75 |
uvloop==0.19.0
|
76 |
+
valkey==6.0.0b1
|
77 |
watchfiles==0.22.0
|
78 |
websockets==12.0
|
79 |
Werkzeug==3.0.3
|
80 |
wrapt==1.16.0
|
|
training/data_processor.pyx
CHANGED
@@ -6,7 +6,7 @@ filterwarnings('ignore')
|
|
6 |
|
7 |
|
8 |
""" Get Datasets """
|
9 |
-
|
10 |
cdef list items = os.listdir(datasets_path)
|
11 |
cdef list csv_files = []
|
12 |
cdef str item
|
@@ -19,7 +19,7 @@ async def get_datasets(str datasets_path):
|
|
19 |
|
20 |
|
21 |
""" Create Sequences """
|
22 |
-
|
23 |
cdef list labels = []
|
24 |
cdef list sequences = []
|
25 |
cdef int i
|
@@ -34,7 +34,7 @@ async def create_sequences(df, int sequence_length):
|
|
34 |
|
35 |
|
36 |
""" Pre-Process Data """
|
37 |
-
|
38 |
cdef str col
|
39 |
|
40 |
for col in dataframe.columns:
|
@@ -48,7 +48,7 @@ async def preprocess_data(dataframe):
|
|
48 |
|
49 |
|
50 |
""" Scale Data """
|
51 |
-
|
52 |
scaler = scaler_cls()
|
53 |
dataframe['Close'] = scaler.fit_transform(dataframe[['Close']])
|
54 |
return scaler, dataframe
|
|
|
6 |
|
7 |
|
8 |
""" Get Datasets """
|
9 |
+
def get_datasets(str datasets_path):
|
10 |
cdef list items = os.listdir(datasets_path)
|
11 |
cdef list csv_files = []
|
12 |
cdef str item
|
|
|
19 |
|
20 |
|
21 |
""" Create Sequences """
|
22 |
+
def create_sequences(df, int sequence_length):
|
23 |
cdef list labels = []
|
24 |
cdef list sequences = []
|
25 |
cdef int i
|
|
|
34 |
|
35 |
|
36 |
""" Pre-Process Data """
|
37 |
+
def preprocess_data(dataframe):
|
38 |
cdef str col
|
39 |
|
40 |
for col in dataframe.columns:
|
|
|
48 |
|
49 |
|
50 |
""" Scale Data """
|
51 |
+
def scale_data(dataframe, scaler_cls):
|
52 |
scaler = scaler_cls()
|
53 |
dataframe['Close'] = scaler.fit_transform(dataframe[['Close']])
|
54 |
return scaler, dataframe
|
training/main.pyx
CHANGED
@@ -24,14 +24,14 @@ from training.model_builder import (
|
|
24 |
from warnings import filterwarnings
|
25 |
filterwarnings('ignore')
|
26 |
|
27 |
-
|
28 |
datasets_path = './datasets'
|
29 |
models_path = './models'
|
30 |
posttrained = './posttrained'
|
31 |
pickle_file = './pickles'
|
32 |
|
33 |
|
34 |
-
for dataset in
|
35 |
print(f"[TRAINING] {dataset.replace('.csv', '')} ")
|
36 |
|
37 |
dataframe = pd.read_csv(os.path.join(datasets_path, dataset), index_col='Date')[['Close']]
|
@@ -39,28 +39,28 @@ async def main(algorithm: str, sequence_length: int, epochs: int, batch_size: in
|
|
39 |
|
40 |
# dataframe = preprocess_data(dataframe)
|
41 |
dataframe.dropna(inplace = True)
|
42 |
-
standard_scaler, dataframe =
|
43 |
-
minmax_scaler, dataframe =
|
44 |
|
45 |
-
sequences, labels =
|
46 |
input_shape = (sequences.shape[1], sequences.shape[2])
|
47 |
|
48 |
if algorithm == "GRU":
|
49 |
-
model =
|
50 |
|
51 |
elif algorithm == "LSTM":
|
52 |
-
model =
|
53 |
|
54 |
elif algorithm == "LSTM_GRU":
|
55 |
-
model =
|
56 |
|
57 |
-
else: model =
|
58 |
|
59 |
train_size = int(len(sequences) * 0.8)
|
60 |
X_train, X_test = sequences[:train_size], sequences[train_size:]
|
61 |
y_train, y_test = labels[:train_size], labels[train_size:]
|
62 |
|
63 |
-
|
64 |
'model': model,
|
65 |
'model_file': model_file,
|
66 |
'sequence_length': sequence_length,
|
@@ -70,7 +70,7 @@ async def main(algorithm: str, sequence_length: int, epochs: int, batch_size: in
|
|
70 |
|
71 |
dataframe_json = {'Date': dataframe.index.tolist(), 'Close': dataframe['Close'].tolist()}
|
72 |
|
73 |
-
|
74 |
os.path.join(posttrained, f'{dataset.replace(".csv", "")}-posttrained.json'),
|
75 |
dataframe_json
|
76 |
)
|
|
|
24 |
from warnings import filterwarnings
|
25 |
filterwarnings('ignore')
|
26 |
|
27 |
+
def main(algorithm: str, sequence_length: int, epochs: int, batch_size: int):
|
28 |
datasets_path = './datasets'
|
29 |
models_path = './models'
|
30 |
posttrained = './posttrained'
|
31 |
pickle_file = './pickles'
|
32 |
|
33 |
|
34 |
+
for dataset in get_datasets(datasets_path):
|
35 |
print(f"[TRAINING] {dataset.replace('.csv', '')} ")
|
36 |
|
37 |
dataframe = pd.read_csv(os.path.join(datasets_path, dataset), index_col='Date')[['Close']]
|
|
|
39 |
|
40 |
# dataframe = preprocess_data(dataframe)
|
41 |
dataframe.dropna(inplace = True)
|
42 |
+
standard_scaler, dataframe = scale_data(dataframe, StandardScaler)
|
43 |
+
minmax_scaler, dataframe = scale_data(dataframe, MinMaxScaler)
|
44 |
|
45 |
+
sequences, labels = create_sequences(dataframe, sequence_length)
|
46 |
input_shape = (sequences.shape[1], sequences.shape[2])
|
47 |
|
48 |
if algorithm == "GRU":
|
49 |
+
model = gru_model(input_shape)
|
50 |
|
51 |
elif algorithm == "LSTM":
|
52 |
+
model = lstm_model(input_shape)
|
53 |
|
54 |
elif algorithm == "LSTM_GRU":
|
55 |
+
model = lstm_gru_model(input_shape)
|
56 |
|
57 |
+
else: model = lstm_model(input_shape)
|
58 |
|
59 |
train_size = int(len(sequences) * 0.8)
|
60 |
X_train, X_test = sequences[:train_size], sequences[train_size:]
|
61 |
y_train, y_test = labels[:train_size], labels[train_size:]
|
62 |
|
63 |
+
train({
|
64 |
'model': model,
|
65 |
'model_file': model_file,
|
66 |
'sequence_length': sequence_length,
|
|
|
70 |
|
71 |
dataframe_json = {'Date': dataframe.index.tolist(), 'Close': dataframe['Close'].tolist()}
|
72 |
|
73 |
+
save_json(
|
74 |
os.path.join(posttrained, f'{dataset.replace(".csv", "")}-posttrained.json'),
|
75 |
dataframe_json
|
76 |
)
|
training/model_builder.pyx
CHANGED
@@ -6,7 +6,7 @@ filterwarnings('ignore')
|
|
6 |
|
7 |
|
8 |
""" GRU (Gated Recurrent Units) Model """
|
9 |
-
|
10 |
cdef object model = Sequential([
|
11 |
GRU(50, return_sequences = True, input_shape = input_shape),
|
12 |
Dropout(0.2),
|
@@ -28,7 +28,7 @@ async def gru_model(input_shape):
|
|
28 |
|
29 |
|
30 |
""" LSTM (Long Short-Term Memory) Model """
|
31 |
-
|
32 |
cdef object model = Sequential([
|
33 |
LSTM(50, return_sequences = True, input_shape = input_shape),
|
34 |
Dropout(0.2),
|
@@ -53,7 +53,7 @@ async def lstm_model(input_shape):
|
|
53 |
LSTM (Long Short-Term Memory) and
|
54 |
GRU (Gated Recurrent Units) Model
|
55 |
"""
|
56 |
-
|
57 |
cdef object model = Sequential([
|
58 |
LSTM(50, return_sequences = True, input_shape = input_shape),
|
59 |
Dropout(0.2),
|
|
|
6 |
|
7 |
|
8 |
""" GRU (Gated Recurrent Units) Model """
|
9 |
+
def gru_model(input_shape):
|
10 |
cdef object model = Sequential([
|
11 |
GRU(50, return_sequences = True, input_shape = input_shape),
|
12 |
Dropout(0.2),
|
|
|
28 |
|
29 |
|
30 |
""" LSTM (Long Short-Term Memory) Model """
|
31 |
+
def lstm_model(input_shape):
|
32 |
cdef object model = Sequential([
|
33 |
LSTM(50, return_sequences = True, input_shape = input_shape),
|
34 |
Dropout(0.2),
|
|
|
53 |
LSTM (Long Short-Term Memory) and
|
54 |
GRU (Gated Recurrent Units) Model
|
55 |
"""
|
56 |
+
def lstm_gru_model(input_shape):
|
57 |
cdef object model = Sequential([
|
58 |
LSTM(50, return_sequences = True, input_shape = input_shape),
|
59 |
Dropout(0.2),
|
training/post_processor.pyx
CHANGED
@@ -6,12 +6,12 @@ filterwarnings('ignore')
|
|
6 |
|
7 |
|
8 |
""" Inverse Transform """
|
9 |
-
|
10 |
return scaler.inverse_transform(data)
|
11 |
|
12 |
|
13 |
""" save json """
|
14 |
-
|
15 |
with open(filename, 'w') as f:
|
16 |
json.dump(data, f)
|
17 |
|
|
|
6 |
|
7 |
|
8 |
""" Inverse Transform """
|
9 |
+
def inverse_transform(object scaler, data):
|
10 |
return scaler.inverse_transform(data)
|
11 |
|
12 |
|
13 |
""" save json """
|
14 |
+
def save_json(str filename, data):
|
15 |
with open(filename, 'w') as f:
|
16 |
json.dump(data, f)
|
17 |
|
training/trainer.pyx
CHANGED
@@ -5,7 +5,7 @@ filterwarnings('ignore')
|
|
5 |
|
6 |
|
7 |
""" Trainer """
|
8 |
-
|
9 |
cdef object early_stopping = EarlyStopping(
|
10 |
monitor = 'val_loss',
|
11 |
patience = 5,
|
|
|
5 |
|
6 |
|
7 |
""" Trainer """
|
8 |
+
def train(dict configuration, X_train, y_train, X_test, y_test):
|
9 |
cdef object early_stopping = EarlyStopping(
|
10 |
monitor = 'val_loss',
|
11 |
patience = 5,
|
trainingcli.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1 |
-
import asyncio
|
2 |
import argparse
|
3 |
from training.main import main as training
|
4 |
|
@@ -14,17 +14,25 @@ def main() -> None:
|
|
14 |
parser.add_argument('-s', '--sequences', type = int, required = True, help = 'sequences length')
|
15 |
|
16 |
args = parser.parse_args()
|
17 |
-
|
18 |
-
|
19 |
-
|
20 |
-
|
21 |
-
|
22 |
-
|
23 |
-
algorithm = args.algorithm,
|
24 |
-
sequence_length = args.sequences
|
25 |
-
)
|
26 |
)
|
27 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
|
29 |
if __name__ == "__main__": main()
|
30 |
|
|
|
1 |
+
# import asyncio
|
2 |
import argparse
|
3 |
from training.main import main as training
|
4 |
|
|
|
14 |
parser.add_argument('-s', '--sequences', type = int, required = True, help = 'sequences length')
|
15 |
|
16 |
args = parser.parse_args()
|
17 |
+
|
18 |
+
training(
|
19 |
+
epochs = args.epochs,
|
20 |
+
batch_size = args.batchs,
|
21 |
+
algorithm = args.algorithm,
|
22 |
+
sequence_length = args.sequences
|
|
|
|
|
|
|
23 |
)
|
24 |
|
25 |
+
# event_loop = asyncio.get_event_loop()
|
26 |
+
|
27 |
+
# event_loop.run_until_complete(
|
28 |
+
# training(
|
29 |
+
# epochs = args.epochs,
|
30 |
+
# batch_size = args.batchs,
|
31 |
+
# algorithm = args.algorithm,
|
32 |
+
# sequence_length = args.sequences
|
33 |
+
# )
|
34 |
+
# )
|
35 |
+
|
36 |
|
37 |
if __name__ == "__main__": main()
|
38 |
|