TextClassification-PhoBERT / optimize_bilstm.py
Nguyen Thi Dieu Hien
Update optimize_bilstm.py
29937fc unverified
raw
history blame contribute delete
No virus
4.4 kB
import optuna
import numpy as np
import pandas as pd
import json
import tensorflow as tf
from tensorflow.compat.v1 import ConfigProto
from tensorflow.compat.v1 import InteractiveSession
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Bidirectional, LSTM, Dropout, Dense
from tensorflow.keras.optimizers import Adam
from optuna.integration import TFKerasPruningCallback
import pickle
from optuna.visualization import plot_optimization_history
import optuna.visualization as ov
from optuna.trial import TrialState
config = ConfigProto()
config.gpu_options.allow_growth = True
session = InteractiveSession(config=config)
"""### **Load data**"""
# Load data
with open('data/features_162k_phobertbase.pkl', 'rb') as f:
data_dict = pickle.load(f)
X_train = np.array(data_dict['X_train'])
X_val = np.array(data_dict['X_val'])
X_test = np.array(data_dict['X_test'])
y_train = data_dict['y_train']
y_val = data_dict['y_val']
y_test = data_dict['y_test']
y_train = y_train.values.astype(int)
y_test = y_test.values.astype(int)
y_val = y_val.values.astype(int)
"""##**Build Model**"""
# Define the BiLSTM model architecture
def build_bilstm_model(lstm_units_1, lstm_units_2, dense_units, dropout_rate, learning_rate):
model = Sequential()
model.add(Input(shape=(X_train.shape[1], X_train.shape[2])))
# LSTM Layer 1 with dropout
model.add(Bidirectional(LSTM(lstm_units_1, return_sequences=True)))
model.add(Dropout(dropout_rate))
# LSTM Layer 2 with dropout
model.add(Bidirectional(LSTM(lstm_units_2, return_sequences=False)))
model.add(Dropout(dropout_rate))
# Dense Layer with dropout and ReLU activation
model.add(Dense(dense_units, activation='relu'))
model.add(Dropout(dropout_rate))
# Final Dense Layer with softmax activation
model.add(Dense(y_train.shape[1], activation='softmax'))
# Use Adam optimizer with the specified learning rate
optimizer = Adam(learning_rate=learning_rate)
# Compile the model
model.compile(loss='categorical_crossentropy', optimizer=optimizer, metrics=['accuracy'])
return model
"""##**Create objective**"""
# Define the objective function for optimization
def objective_bilstm(trial):
lstm_units_1 = trial.suggest_int('lstm_units_1', 64, 512, step=32)
lstm_units_2 = trial.suggest_int('lstm_units_2', lstm_units_1//2, lstm_units_1, step=32)
dense_units = trial.suggest_int('dense_units', 64, 512, step=32)
dropout_rate = trial.suggest_float('dropout_rate', 0.2, 0.5, step=0.1)
learning_rate = trial.suggest_float('learning_rate', 1e-5, 1e-2, log=True)
epochs = trial.suggest_int('epochs', 10, 30, step=10)
batch_size = trial.suggest_int('batch_size', 64, 256, step=32)
print(f"Trying hyperparameters: lstm_units_1={lstm_units_1}, lstm_units_2={lstm_units_2}, dense_units={dense_units}, "
f"dropout_rate={dropout_rate}, learning_rate={learning_rate}, batch_size={batch_size}")
model = build_bilstm_model(lstm_units_1, lstm_units_2, dense_units, dropout_rate, learning_rate)
model.fit(X_train, y_train, epochs=epochs, batch_size=batch_size,
validation_data=(X_val, y_val), callbacks=[TFKerasPruningCallback(trial, "val_loss")], verbose=1)
_, accuracy = model.evaluate(X_test, y_test, verbose=0)
return accuracy
"""##**Study to find hyperparameters**"""
# Create an Optuna study for optimization
study_bilstm = optuna.create_study(direction="maximize", sampler=optuna.samplers.TPESampler(), pruner=optuna.pruners.HyperbandPruner())
study_bilstm.optimize(lambda trial: objective_bilstm(trial), n_trials=100)
# Save completed trials to a CSV file
complete_trials = study_bilstm.trials_dataframe()[study_bilstm.trials_dataframe()['state'] == 'COMPLETE']
complete_trials.to_csv("assets/study_bilstm_256_trials.csv", index=False)
# Extract the best hyperparameters
best_hyperparameters_bilstm = study_bilstm.best_trial.params
# Save the best hyperparameters to a JSON file
with open('hyperparameters/BiLSTM_phobertbase.json', 'w') as file:
json.dump(best_hyperparameters_bilstm, file)
plot_optimization_history(study_bilstm)
html_file_path = "images/study_bilstm_phobertbase_optimize_history.html"
# Plot and save the optimization history plot as an HTML file
ov.plot_optimization_history(study_bilstm).write_html(html_file_path)
plot_optimization_history(study_bilstm)