LazyML / evaluationer.py
Gaurav069's picture
Upload 9 files
a8af817 verified
raw
history blame
7.67 kB
# importing libraries
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import root_mean_squared_error,r2_score,mean_squared_error,root_mean_squared_log_error,mean_absolute_error,mean_squared_log_error
from sklearn.metrics import f1_score, accuracy_score, precision_score,recall_score, average_precision_score
# creating a class for evaluation
reg_evaluation_df = pd.DataFrame({"evaluation_df_method" :[],
"model": [],# model displays regression model
"method": [],# method display evaluation metrics used
"train_r2": [],# train r2 shows train R2 score
"test_r2": [],# test r2 shows test R2 Score
"adjusted_r2_train": [],# adjusted_r2_train shows adjusted r2 score for train
"adjusted_r2_test": [],# adjusted_r2_test shows adjusted r2 score for test
"train_evaluation": [],# train_evaluation shows train evaluation score by used method
"test_evaluation" : []# test_evaluation shows test evaluation score by used method
})
classification_evaluation_df = pd.DataFrame({"evaluation_df_method" :[],
'model': [],
'train_f1': [],
'test_f1': [],
'train_acc': [],
'test_acc': [],
'precision_train': [],
'precision_test': [],
'recall_train': [],
'recall_test': []
})
# function for evaluating dataframe
def evaluation(evaluation_df_method,X_train,X_test,y_train,y_test,model,method,eva):# input parameters from train_test_split , model and method for evaluation.
global y_pred_train,y_pred_test,y_pred_proba_train,y_pred_proba_test
model = model
model.fit(X_train,y_train) # model fitting
y_pred_train = model.predict(X_train) # model prediction for train
y_pred_test = model.predict(X_test) # model prediction for test
if eva == "reg":
train_r2 = r2_score(y_train, y_pred_train) # evaluating r2 score for train
test_r2 = r2_score(y_test, y_pred_test) # evaluating r2 score for test
n_r_train, n_c_train = X_train.shape # getting no of rows and columns of train data
n_r_test, n_c_test = X_test.shape # getting no of rows and columns of test data
adj_r2_train = 1 - ((1 - train_r2)*(n_r_train - 1)/ (n_r_train - n_c_train - 1)) # evaluating adjusted r2 score for train
adj_r2_test = 1 - ((1 - test_r2)*(n_r_test - 1)/ (n_r_test - n_c_test - 1)) # evaluating adjusted r2 score for test
train_evaluation = method(y_train, y_pred_train) # evaluating train error
test_evaluation = method(y_test, y_pred_test) # evaluating test error
if method == root_mean_squared_error:
a = "root_mean_squared_error"
elif method ==root_mean_squared_log_error:
a = "root_mean_squared_log_error"
elif method == mean_absolute_error:
a = "mean_absolute_error"
elif method == mean_squared_error:
a = "mean_squared_error"
elif method == mean_squared_log_error:
a = "mean_squared_log_error"
# declaring global dataframes
global reg_evaluation_df,temp_df
# creating temporary dataframe for concating in later into main evaluation dataframe
temp_df = pd.DataFrame({"evaluation_df_method" :[evaluation_df_method],
"model": [model],
"method": [a],
"train_r2": [train_r2],
"test_r2": [test_r2],
"adjusted_r2_train": [adj_r2_train],
"adjusted_r2_test": [adj_r2_test],
"train_evaluation": [train_evaluation],
"test_evaluation" : [test_evaluation]
})
reg_evaluation_df = pd.concat([reg_evaluation_df,temp_df]).reset_index(drop = True)
# return reg_evaluation_df # returning evaluation_df
elif eva == "class":
# y_pred_proba_train= model.predict_proba(X_train)
# y_pred_proba_test= model.predict_proba(X_test)
unique_classes = np.unique(y_train)
# Determine the average method
if len(unique_classes) == 2:
# Binary classification
print("Using 'binary' average for binary classification.")
average_method = 'binary'
elif len(unique_classes)!=2:
# Determine the distribution of the target column
class_counts = np.bincount(y_train)
# Check if the dataset is imbalanced
imbalance_ratio = max(class_counts) / min(class_counts)
if imbalance_ratio > 1.5:
# Imbalanced dataset
print("Using 'weighted' average due to imbalanced dataset.")
average_method = 'weighted'
else:
# Balanced dataset
print("Using 'macro' average due to balanced dataset.")
average_method = 'macro'
# F1 scores
train_f1_scores = (f1_score(y_train, y_pred_train,average=average_method))
test_f1_scores = (f1_score(y_test, y_pred_test,average=average_method))
# Accuracies
train_accuracies = (accuracy_score(y_train, y_pred_train))
test_accuracies = (accuracy_score(y_test, y_pred_test))
# Precisions
train_precisions = (precision_score(y_train, y_pred_train,average=average_method))
test_precisions = (precision_score(y_test, y_pred_test,average=average_method))
# Recalls
train_recalls = (recall_score(y_train, y_pred_train,average=average_method))
test_recalls = (recall_score(y_test, y_pred_test,average=average_method))
# declaring global dataframes
global classification_evaluation_df,temp_df1
# creating temporary dataframe for concating in later into main evaluation dataframe
temp_df1 = pd.DataFrame({"evaluation_df_method" :[evaluation_df_method],
'model': [model],
'train_f1': [train_f1_scores],
'test_f1': [test_f1_scores],
'train_acc': [train_accuracies],
'test_acc': [test_accuracies],
'precision_train': [train_precisions],
'precision_test': [test_precisions],
'recall_train': [train_recalls],
'recall_test': [test_recalls]
})
classification_evaluation_df = pd.concat([classification_evaluation_df, temp_df1]).reset_index(drop = True)
return classification_evaluation_df # returning evaluation_df
global method_df
method_df = pd.DataFrame(data = [root_mean_squared_error, root_mean_squared_log_error,mean_absolute_error,mean_squared_error,mean_squared_log_error],
index = ["root_mean_squared_error", "root_mean_squared_log_error","mean_absolute_error","mean_squared_error","mean_squared_log_error"])