File size: 4,402 Bytes
c2a30b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29937fc
c2a30b3
 
 
29937fc
c2a30b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29937fc
c2a30b3
 
29937fc
c2a30b3
 
29937fc
c2a30b3
 
29937fc
c2a30b3
29937fc
c2a30b3
29937fc
c2a30b3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29937fc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import optuna
import numpy as np
import pandas as pd
import json
import tensorflow as tf
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Bidirectional, LSTM, Dropout, Dense
from tensorflow.keras.optimizers import Adam
from optuna.integration import TFKerasPruningCallback
import pickle
from optuna.visualization import plot_optimization_history
import optuna.visualization as ov
from optuna.trial import TrialState

config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)

"""### **Load data**"""

# Load data
with open('data/features_162k_phobertbase.pkl', 'rb') as f:
    data_dict = pickle.load(f)


X_train = np.array(data_dict['X_train'])
X_val = np.array(data_dict['X_val'])
X_test = np.array(data_dict['X_test'])
y_train = data_dict['y_train']
y_val = data_dict['y_val']
y_test = data_dict['y_test']

y_train = y_train.values.astype(int)
y_test = y_test.values.astype(int)
y_val = y_val.values.astype(int)

"""##**Build Model**"""

# Define the BiLSTM model architecture
def build_bilstm_model(lstm_units_1, lstm_units_2, dense_units, dropout_rate, learning_rate):
    model = Sequential()
    model.add(Input(shape=(X_train.shape[1], X_train.shape[2])))
    # LSTM Layer 1 with dropout
    model.add(Bidirectional(LSTM(lstm_units_1, return_sequences=True)))
    model.add(Dropout(dropout_rate))
    # LSTM Layer 2 with dropout
    model.add(Bidirectional(LSTM(lstm_units_2, return_sequences=False)))
    model.add(Dropout(dropout_rate))
    # Dense Layer with dropout and ReLU activation
    model.add(Dense(dense_units, activation='relu'))
    model.add(Dropout(dropout_rate))
    # Final Dense Layer with softmax activation
    model.add(Dense(y_train.shape[1], activation='softmax'))
    # Use Adam optimizer with the specified learning rate
    optimizer = Adam(learning_rate=learning_rate)
    # Compile the model
    model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])

    return model

"""##**Create objective**"""

# Define the objective function for optimization
def objective_bilstm(trial):
    lstm_units_1 = trial.suggest_int('lstm_units_1', 64, 512, step=32)
    lstm_units_2 = trial.suggest_int('lstm_units_2', lstm_units_1//2, lstm_units_1, step=32)
    dense_units = trial.suggest_int('dense_units', 64, 512, step=32)
    dropout_rate = trial.suggest_float('dropout_rate', 0.2, 0.5, step=0.1)
    learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2, log=True)
    epochs = trial.suggest_int('epochs', 10, 30, step=10)
    batch_size = trial.suggest_int('batch_size', 64, 256, step=32)

    print(f"Trying hyperparameters: lstm_units_1={lstm_units_1}, lstm_units_2={lstm_units_2}, dense_units={dense_units}, "
          f"dropout_rate={dropout_rate}, learning_rate={learning_rate}, batch_size={batch_size}")

    model = build_bilstm_model(lstm_units_1, lstm_units_2, dense_units, dropout_rate, learning_rate)

    model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size,
              validation_data=(X_val, y_val), callbacks=[TFKerasPruningCallback(trial, "val_loss")], verbose=1)

    _, accuracy = model.evaluate(X_test, y_test, verbose=0)

    return accuracy

"""##**Study to find hyperparameters**"""

# Create an Optuna study for optimization
study_bilstm = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler(), pruner=optuna.pruners.HyperbandPruner())
study_bilstm.optimize(lambda trial: objective_bilstm(trial), n_trials=100)

# Save completed trials to a CSV file
complete_trials = study_bilstm.trials_dataframe()[study_bilstm.trials_dataframe()['state'] == 'COMPLETE']
complete_trials.to_csv("assets/study_bilstm_256_trials.csv", index=False)

# Extract the best hyperparameters
best_hyperparameters_bilstm = study_bilstm.best_trial.params

# Save the best hyperparameters to a JSON file
with open('hyperparameters/BiLSTM_phobertbase.json', 'w') as file:
    json.dump(best_hyperparameters_bilstm, file)

plot_optimization_history(study_bilstm)

html_file_path = "images/study_bilstm_phobertbase_optimize_history.html"
# Plot and save the optimization history plot as an HTML file
ov.plot_optimization_history(study_bilstm).write_html(html_file_path)
plot_optimization_history(study_bilstm)