|
|
| import pandas as pd
|
| import numpy as np
|
| import pickle
|
| from sklearn.linear_model import LogisticRegression
|
| from sklearn.ensemble import RandomForestClassifier
|
| from xgboost import XGBClassifier
|
| from tensorflow.keras.models import Sequential
|
| from tensorflow.keras.layers import Dense, Dropout
|
| from sklearn.metrics import (
|
| accuracy_score, precision_score, recall_score, f1_score,
|
| roc_auc_score, confusion_matrix, classification_report
|
| )
|
| import matplotlib.pyplot as plt
|
| import seaborn as sns
|
| import warnings
|
|
|
|
|
| warnings.filterwarnings('ignore')
|
|
|
| class ModelTrainer:
|
| def __init__(self):
|
| self.models = {
|
| 'Logistic Regression': LogisticRegression(max_iter=1000, class_weight='balanced'),
|
| 'Random Forest': RandomForestClassifier(n_estimators=100, class_weight='balanced', random_state=42),
|
| 'XGBoost': XGBClassifier(scale_pos_weight=10, n_estimators=100, random_state=42, use_label_encoder=False, eval_metric='logloss')
|
| }
|
| self.neural_net = None
|
|
|
| def train_models(self, X_train, y_train):
|
| """Train multiple machine learning models"""
|
| trained_models = {}
|
|
|
| for name, model in self.models.items():
|
| print(f"Training {name}...")
|
| model.fit(X_train, y_train)
|
| trained_models[name] = model
|
|
|
| return trained_models
|
|
|
| def train_neural_network(self, X_train, y_train, input_dim):
|
| """Train a neural network model"""
|
| model = Sequential([
|
| Dense(64, activation='relu', input_dim=input_dim),
|
| Dropout(0.3),
|
| Dense(32, activation='relu'),
|
| Dropout(0.3),
|
| Dense(16, activation='relu'),
|
| Dense(1, activation='sigmoid')
|
| ])
|
|
|
| model.compile(
|
| optimizer='adam',
|
| loss='binary_crossentropy',
|
| metrics=['accuracy']
|
| )
|
|
|
| history = model.fit(
|
| X_train, y_train,
|
| epochs=20,
|
| batch_size=64,
|
| validation_split=0.2,
|
| verbose=1
|
| )
|
|
|
| self.neural_net = model
|
| return model, history
|
|
|
| def evaluate_model(self, model, X_test, y_test, model_name="Model"):
|
| """Evaluate model performance with various metrics"""
|
| if model_name == "Neural Network":
|
| y_pred_proba = model.predict(X_test)
|
| y_pred = (y_pred_proba > 0.5).astype(int)
|
| else:
|
| y_pred = model.predict(X_test)
|
| y_pred_proba = model.predict_proba(X_test)[:, 1]
|
|
|
|
|
| accuracy = accuracy_score(y_test, y_pred)
|
| precision = precision_score(y_test, y_pred)
|
| recall = recall_score(y_test, y_pred)
|
| f1 = f1_score(y_test, y_pred)
|
| auc = roc_auc_score(y_test, y_pred_proba)
|
|
|
|
|
| cm = confusion_matrix(y_test, y_pred)
|
|
|
|
|
| report = classification_report(y_test, y_pred)
|
|
|
| results = {
|
| 'model_name': model_name,
|
| 'accuracy': accuracy,
|
| 'precision': precision,
|
| 'recall': recall,
|
| 'f1_score': f1,
|
| 'auc': auc,
|
| 'confusion_matrix': cm,
|
| 'classification_report': report,
|
| 'y_test': y_test,
|
| 'y_pred_proba': y_pred_proba
|
| }
|
|
|
| return results
|
|
|
| def save_model(self, model, file_path):
|
| """Save the trained model to a file"""
|
| if isinstance(model, Sequential):
|
| model.save(file_path)
|
| else:
|
| with open(file_path, 'wb') as f:
|
| pickle.dump(model, f)
|
|
|
| def load_model(self, file_path, model_type='sklearn'):
|
| """Load a trained model from a file"""
|
| if model_type == 'keras':
|
| from tensorflow.keras.models import load_model
|
| return load_model(file_path)
|
| else:
|
| with open(file_path, 'rb') as f:
|
| return pickle.load(f) |