|
|
|
import pandas as pd
|
|
import numpy as np
|
|
from sklearn.model_selection import train_test_split
|
|
from sklearn.metrics import root_mean_squared_error,r2_score,mean_squared_error,root_mean_squared_log_error,mean_absolute_error,mean_squared_log_error
|
|
from sklearn.metrics import f1_score, accuracy_score, precision_score,recall_score, average_precision_score
|
|
|
|
|
|
reg_evaluation_df = pd.DataFrame({"evaluation_df_method" :[],
|
|
"model": [],
|
|
"method": [],
|
|
"train_r2": [],
|
|
"test_r2": [],
|
|
"adjusted_r2_train": [],
|
|
"adjusted_r2_test": [],
|
|
"train_evaluation": [],
|
|
"test_evaluation" : []
|
|
})
|
|
|
|
classification_evaluation_df = pd.DataFrame({"evaluation_df_method" :[],
|
|
'model': [],
|
|
'train_f1': [],
|
|
'test_f1': [],
|
|
'train_acc': [],
|
|
'test_acc': [],
|
|
'precision_train': [],
|
|
'precision_test': [],
|
|
'recall_train': [],
|
|
'recall_test': []
|
|
})
|
|
|
|
|
|
def evaluation(evaluation_df_method,X_train,X_test,y_train,y_test,model,method,eva):
|
|
global y_pred_train,y_pred_test,y_pred_proba_train,y_pred_proba_test
|
|
model = model
|
|
model.fit(X_train,y_train)
|
|
y_pred_train = model.predict(X_train)
|
|
y_pred_test = model.predict(X_test)
|
|
|
|
if eva == "reg":
|
|
|
|
train_r2 = r2_score(y_train, y_pred_train)
|
|
test_r2 = r2_score(y_test, y_pred_test)
|
|
|
|
n_r_train, n_c_train = X_train.shape
|
|
n_r_test, n_c_test = X_test.shape
|
|
|
|
adj_r2_train = 1 - ((1 - train_r2)*(n_r_train - 1)/ (n_r_train - n_c_train - 1))
|
|
adj_r2_test = 1 - ((1 - test_r2)*(n_r_test - 1)/ (n_r_test - n_c_test - 1))
|
|
|
|
train_evaluation = method(y_train, y_pred_train)
|
|
test_evaluation = method(y_test, y_pred_test)
|
|
|
|
if method == root_mean_squared_error:
|
|
a = "root_mean_squared_error"
|
|
elif method ==root_mean_squared_log_error:
|
|
a = "root_mean_squared_log_error"
|
|
elif method == mean_absolute_error:
|
|
a = "mean_absolute_error"
|
|
elif method == mean_squared_error:
|
|
a = "mean_squared_error"
|
|
elif method == mean_squared_log_error:
|
|
a = "mean_squared_log_error"
|
|
|
|
|
|
global reg_evaluation_df,temp_df
|
|
|
|
|
|
temp_df = pd.DataFrame({"evaluation_df_method" :[evaluation_df_method],
|
|
"model": [model],
|
|
"method": [a],
|
|
"train_r2": [train_r2],
|
|
"test_r2": [test_r2],
|
|
"adjusted_r2_train": [adj_r2_train],
|
|
"adjusted_r2_test": [adj_r2_test],
|
|
"train_evaluation": [train_evaluation],
|
|
"test_evaluation" : [test_evaluation]
|
|
})
|
|
reg_evaluation_df = pd.concat([reg_evaluation_df,temp_df]).reset_index(drop = True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
elif eva == "class":
|
|
|
|
|
|
|
|
|
|
unique_classes = np.unique(y_train)
|
|
|
|
|
|
if len(unique_classes) == 2:
|
|
|
|
print("Using 'binary' average for binary classification.")
|
|
average_method = 'binary'
|
|
elif len(unique_classes)!=2:
|
|
|
|
class_counts = np.bincount(y_train)
|
|
|
|
|
|
imbalance_ratio = max(class_counts) / min(class_counts)
|
|
|
|
if imbalance_ratio > 1.5:
|
|
|
|
print("Using 'weighted' average due to imbalanced dataset.")
|
|
average_method = 'weighted'
|
|
else:
|
|
|
|
print("Using 'macro' average due to balanced dataset.")
|
|
average_method = 'macro'
|
|
|
|
|
|
train_f1_scores = (f1_score(y_train, y_pred_train,average=average_method))
|
|
test_f1_scores = (f1_score(y_test, y_pred_test,average=average_method))
|
|
|
|
|
|
train_accuracies = (accuracy_score(y_train, y_pred_train))
|
|
test_accuracies = (accuracy_score(y_test, y_pred_test))
|
|
|
|
|
|
train_precisions = (precision_score(y_train, y_pred_train,average=average_method))
|
|
test_precisions = (precision_score(y_test, y_pred_test,average=average_method))
|
|
|
|
|
|
train_recalls = (recall_score(y_train, y_pred_train,average=average_method))
|
|
test_recalls = (recall_score(y_test, y_pred_test,average=average_method))
|
|
|
|
|
|
global classification_evaluation_df,temp_df1
|
|
|
|
|
|
temp_df1 = pd.DataFrame({"evaluation_df_method" :[evaluation_df_method],
|
|
'model': [model],
|
|
'train_f1': [train_f1_scores],
|
|
'test_f1': [test_f1_scores],
|
|
'train_acc': [train_accuracies],
|
|
'test_acc': [test_accuracies],
|
|
'precision_train': [train_precisions],
|
|
'precision_test': [test_precisions],
|
|
'recall_train': [train_recalls],
|
|
'recall_test': [test_recalls]
|
|
})
|
|
classification_evaluation_df = pd.concat([classification_evaluation_df, temp_df1]).reset_index(drop = True)
|
|
|
|
return classification_evaluation_df
|
|
|
|
global method_df
|
|
method_df = pd.DataFrame(data = [root_mean_squared_error, root_mean_squared_log_error,mean_absolute_error,mean_squared_error,mean_squared_log_error],
|
|
index = ["root_mean_squared_error", "root_mean_squared_log_error","mean_absolute_error","mean_squared_error","mean_squared_log_error"]) |