import pandas as pd import matplotlib.pyplot as plt import seaborn as sns from sklearn.model_selection import train_test_split from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import classification_report, confusion_matrix # ------------------------------- # Load the feature-engineered dataset # ------------------------------- df = pd.read_csv('feature_engineered_transactions.csv') # ------------------------------- # Split into features and labels # ------------------------------- X = df.drop(columns=['is_anomalous']) y = df['is_anomalous'] # ------------------------------- # Train-test split # ------------------------------- X_train, X_test, y_train, y_test = train_test_split( X, y, test_size=0.2, stratify=y, random_state=42 ) # ------------------------------- # Train Random Forest Classifier # ------------------------------- clf = RandomForestClassifier(n_estimators=100, random_state=42) clf.fit(X_train, y_train) # ------------------------------- # Make predictions # ------------------------------- y_pred = clf.predict(X_test) # ------------------------------- # Evaluation Report # ------------------------------- print("\nāœ… Classification Report:\n") print(classification_report(y_test, y_pred, digits=4)) # ------------------------------- # Create one page with subplots # ------------------------------- fig, axes = plt.subplots(1, 2, figsize=(15, 6)) plt.suptitle("Anomaly Detection Results", fontsize=16, fontweight='bold') # --- A. Confusion Matrix --- cm = confusion_matrix(y_test, y_pred) sns.heatmap( cm, annot=True, fmt="d", cmap="Blues", xticklabels=["Normal", "Suspicious"], yticklabels=["Normal", "Suspicious"], ax=axes[0] ) axes[0].set_title("Confusion Matrix") axes[0].set_xlabel("Predicted") axes[0].set_ylabel("Actual") # --- B. Feature Importance --- importances = pd.Series(clf.feature_importances_, index=X.columns).sort_values(ascending=False) sns.barplot( x=importances.values[:10], y=importances.index[:10], color='skyblue', ax=axes[1] ) axes[1].set_title("Top 10 Feature Importances") axes[1].set_xlabel("Importance") axes[1].set_ylabel("Feature") # --- Layout --- plt.tight_layout(rect=[0, 0, 1, 0.95]) # Adjust to fit title plt.show() import joblib # Save model joblib.dump(clf, 'anomaly_detector_rf_model.pkl') # Save feature order for later use joblib.dump(list(X.columns), 'feature_order.pkl') print("āœ… Model and feature list saved!")