File size: 7,671 Bytes
a8af817 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 |
# importing libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import root_mean_squared_error,r2_score,mean_squared_error,root_mean_squared_log_error,mean_absolute_error,mean_squared_log_error
from sklearn.metrics import f1_score, accuracy_score, precision_score,recall_score, average_precision_score
# creating a class for evaluation
reg_evaluation_df = pd.DataFrame({"evaluation_df_method" :[],
"model": [],# model displays regression model
"method": [],# method display evaluation metrics used
"train_r2": [],# train r2 shows train R2 score
"test_r2": [],# test r2 shows test R2 Score
"adjusted_r2_train": [],# adjusted_r2_train shows adjusted r2 score for train
"adjusted_r2_test": [],# adjusted_r2_test shows adjusted r2 score for test
"train_evaluation": [],# train_evaluation shows train evaluation score by used method
"test_evaluation" : []# test_evaluation shows test evaluation score by used method
})
classification_evaluation_df = pd.DataFrame({"evaluation_df_method" :[],
'model': [],
'train_f1': [],
'test_f1': [],
'train_acc': [],
'test_acc': [],
'precision_train': [],
'precision_test': [],
'recall_train': [],
'recall_test': []
})
# function for evaluating dataframe
def evaluation(evaluation_df_method,X_train,X_test,y_train,y_test,model,method,eva):# input parameters from train_test_split , model and method for evaluation.
global y_pred_train,y_pred_test,y_pred_proba_train,y_pred_proba_test
model = model
model.fit(X_train,y_train) # model fitting
y_pred_train = model.predict(X_train) # model prediction for train
y_pred_test = model.predict(X_test) # model prediction for test
if eva == "reg":
train_r2 = r2_score(y_train, y_pred_train) # evaluating r2 score for train
test_r2 = r2_score(y_test, y_pred_test) # evaluating r2 score for test
n_r_train, n_c_train = X_train.shape # getting no of rows and columns of train data
n_r_test, n_c_test = X_test.shape # getting no of rows and columns of test data
adj_r2_train = 1 - ((1 - train_r2)*(n_r_train - 1)/ (n_r_train - n_c_train - 1)) # evaluating adjusted r2 score for train
adj_r2_test = 1 - ((1 - test_r2)*(n_r_test - 1)/ (n_r_test - n_c_test - 1)) # evaluating adjusted r2 score for test
train_evaluation = method(y_train, y_pred_train) # evaluating train error
test_evaluation = method(y_test, y_pred_test) # evaluating test error
if method == root_mean_squared_error:
a = "root_mean_squared_error"
elif method ==root_mean_squared_log_error:
a = "root_mean_squared_log_error"
elif method == mean_absolute_error:
a = "mean_absolute_error"
elif method == mean_squared_error:
a = "mean_squared_error"
elif method == mean_squared_log_error:
a = "mean_squared_log_error"
# declaring global dataframes
global reg_evaluation_df,temp_df
# creating temporary dataframe for concating in later into main evaluation dataframe
temp_df = pd.DataFrame({"evaluation_df_method" :[evaluation_df_method],
"model": [model],
"method": [a],
"train_r2": [train_r2],
"test_r2": [test_r2],
"adjusted_r2_train": [adj_r2_train],
"adjusted_r2_test": [adj_r2_test],
"train_evaluation": [train_evaluation],
"test_evaluation" : [test_evaluation]
})
reg_evaluation_df = pd.concat([reg_evaluation_df,temp_df]).reset_index(drop = True)
# return reg_evaluation_df # returning evaluation_df
elif eva == "class":
# y_pred_proba_train= model.predict_proba(X_train)
# y_pred_proba_test= model.predict_proba(X_test)
unique_classes = np.unique(y_train)
# Determine the average method
if len(unique_classes) == 2:
# Binary classification
print("Using 'binary' average for binary classification.")
average_method = 'binary'
elif len(unique_classes)!=2:
# Determine the distribution of the target column
class_counts = np.bincount(y_train)
# Check if the dataset is imbalanced
imbalance_ratio = max(class_counts) / min(class_counts)
if imbalance_ratio > 1.5:
# Imbalanced dataset
print("Using 'weighted' average due to imbalanced dataset.")
average_method = 'weighted'
else:
# Balanced dataset
print("Using 'macro' average due to balanced dataset.")
average_method = 'macro'
# F1 scores
train_f1_scores = (f1_score(y_train, y_pred_train,average=average_method))
test_f1_scores = (f1_score(y_test, y_pred_test,average=average_method))
# Accuracies
train_accuracies = (accuracy_score(y_train, y_pred_train))
test_accuracies = (accuracy_score(y_test, y_pred_test))
# Precisions
train_precisions = (precision_score(y_train, y_pred_train,average=average_method))
test_precisions = (precision_score(y_test, y_pred_test,average=average_method))
# Recalls
train_recalls = (recall_score(y_train, y_pred_train,average=average_method))
test_recalls = (recall_score(y_test, y_pred_test,average=average_method))
# declaring global dataframes
global classification_evaluation_df,temp_df1
# creating temporary dataframe for concating in later into main evaluation dataframe
temp_df1 = pd.DataFrame({"evaluation_df_method" :[evaluation_df_method],
'model': [model],
'train_f1': [train_f1_scores],
'test_f1': [test_f1_scores],
'train_acc': [train_accuracies],
'test_acc': [test_accuracies],
'precision_train': [train_precisions],
'precision_test': [test_precisions],
'recall_train': [train_recalls],
'recall_test': [test_recalls]
})
classification_evaluation_df = pd.concat([classification_evaluation_df, temp_df1]).reset_index(drop = True)
return classification_evaluation_df # returning evaluation_df
global method_df
method_df = pd.DataFrame(data = [root_mean_squared_error, root_mean_squared_log_error,mean_absolute_error,mean_squared_error,mean_squared_log_error],
index = ["root_mean_squared_error", "root_mean_squared_log_error","mean_absolute_error","mean_squared_error","mean_squared_log_error"]) |