import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

data= pd.read_csv("creditcard.csv")
data

data.info()

data.isnull().sum()

# Check for NaN values in the DataFrame
nan_values = data.isna().sum()
nan_values

# Drop Nan values
data.dropna(inplace=True)

data["Class"].value_counts()

# Class Distribution
plt.figure(figsize=(8, 6))
sns.countplot(x='Class', data=data, palette='viridis')
plt.xlabel('Class (0: Non-Fraud, 1: Fraud)')
plt.ylabel('Count')
plt.title('Class Distribution (Fraud vs. Non-Fraud)')
plt.show()

# Transaction Amount Distribution for Fraudulent and Non-Fraudulent Transactions
plt.figure(figsize=(8, 6))
sns.boxplot(x='Class', y='Amount', data=data)
plt.xlabel('Class (0: Non-Fraud, 1: Fraud)')
plt.ylabel('Transaction Amount')
plt.title('Transaction Amount Distribution by Class')
plt.show()

# Correlation Matrix Heatmap
correlation_matrix = data.corr()
plt.figure(figsize=(12, 10))
sns.heatmap(correlation_matrix, annot=False, cmap='coolwarm', fmt=".2f")
plt.title('Correlation Matrix Heatmap')
plt.show()

# Histograms of Features for Fraudulent and Non-Fraudulent Transactions
features_to_plot = ['V1', 'V2', 'V3', 'V4', 'V5', 'V6', 'V7', 'V8', 'V9', 'V10']
for feature in features_to_plot:
    plt.figure(figsize=(8, 6))
    sns.histplot(data[data['Class'] == 0][feature], label='Non-Fraud', kde=True)
    sns.histplot(data[data['Class'] == 1][feature], label='Fraud', kde=True)
    plt.title(f'Distribution of {feature} by Class')
    plt.legend()
    plt.show()

# Combined visualization for Amount and Time
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
sns.histplot(data['Amount'], bins=50, kde=True)
plt.xlabel('Transaction Amount')
plt.ylabel('Frequency')
plt.title('Distribution of Transaction Amount')

plt.subplot(1, 2, 2)
sns.histplot(data['Time'], bins=50, kde=True)
plt.xlabel('Time (seconds from the first transaction)')
plt.ylabel('Frequency')
plt.title('Distribution of Transaction Time')

plt.tight_layout()
plt.show()


# Transaction amount vs. fraud
sns.boxplot(x='Class', y='Amount', data=data)
plt.title('Transaction Amount vs. Class')
plt.show()


# Separate features (X) and target variable (y)
X = data.drop('Class', axis=1)
y = data['Class']

# # Apply SMOTE to oversample the minority class
# from imblearn.over_sampling import SMOTE

# smote = SMOTE(random_state=42)
# X_resampled, y_resampled = smote.fit_resample(X, y)

# # Print the class distribution after SMOTE
# print(pd.Series(y_resampled).value_counts())

# Split the data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


# Analyze the results
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, roc_auc_score
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn import metrics

def analyze_results(y_test,y_pred):
    # Get the metrics
    accuracy = metrics.accuracy_score(y_test, y_pred)
    precision =  metrics.precision_score(y_test, y_pred, average='weighted')
    recall = metrics.recall_score(y_test, y_pred, average='weighted')
    f1_score = metrics.f1_score(y_test, y_pred, average='weighted')

    # print metrics
    print("Accuracy:", "%.6f" % accuracy)
    print("Precision:", "%.6f" % precision)
    print("Recall:", "%.6f" %  recall)
    print("F1 Score:", "%.6f" %  f1_score)
    print("Classification Report:")
    print(metrics.classification_report(y_test, y_pred))

    # Produce a confusion matrix
    cm = confusion_matrix(y_test, y_pred)
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Neutral', 'Positive', 'Negative'],
                yticklabels=['Neutral', 'Positive', 'Negative'])
    plt.xlabel('Predicted Labels')
    plt.ylabel('True Labels')
    plt.title('Confusion Matrix')
    plt.show()

    return accuracy, precision, recall, f1_score


from xgboost import XGBClassifier

# Create an XGBoost classifier
xgb_model = XGBClassifier(random_state=42)

# Train the model
xgb_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred_xgb = xgb_model.predict(X_test)

# Analyze the results
analyze_results(y_test, y_pred_xgb)


from sklearn.ensemble import RandomForestClassifier

# Create a Random Forest classifier
rf_model = RandomForestClassifier(random_state=42)

# Train the model
rf_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred_rf = rf_model.predict(X_test)

# Analyze the results
analyze_results(y_test, y_pred_rf)


from sklearn.ensemble import GradientBoostingClassifier

# Create a Gradient Boosting Machine classifier
gbm_model = GradientBoostingClassifier(random_state=42)

# Train the model
gbm_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred_gbm = gbm_model.predict(X_test)

# Analyze the results
analyze_results(y_test, y_pred_gbm)


import matplotlib.pyplot as plt
model_names = ['XGBoost', 'Random Forest', 'Gradient Boosting']
accuracies = [0.999145, 0.999561, 0.998947]

plt.figure(figsize=(10, 6))
sns.barplot(x=model_names, y=accuracies, palette='viridis')
plt.xticks(rotation=45)
plt.ylim(0.95, 1.001)
plt.xlabel('Model')
plt.ylabel('Accuracy')
plt.title('Comparison of Model Accuracies')
plt.show()


import gradio as gr

# Assuming you have defined the three models elsewhere in your code: xgb_model, rf_model, gb_model
# Make sure you import or define these models before using them in the Gradio interface

# Prediction function using the three models
def predict_fraud(*features):
    # Convert the input features into a list
    input_data = [features]

    # Get predictions from each of the three models
    xgb_pred = xgb_model.predict(input_data)[0]
    rf_pred = rf_model.predict(input_data)[0]
    gb_pred = gbm_model.predict(input_data)[0]

    # Return predictions
    return (
        "Fraud" if xgb_pred == 1 else "Not Fraud",
        "Fraud" if rf_pred == 1 else "Not Fraud",
        "Fraud" if gb_pred == 1 else "Not Fraud"
    )

# Define input labels
input_labels = [
    "Time", "V1", "V2", "V3", "V4", "V5", "V6", "V7", "V8", "V9", "V10", "V11", "V12",
    "V13", "V14", "V15", "V16", "V17", "V18", "V19", "V20", "V21", "V22", "V23", "V24",
    "V25", "V26", "V27", "V28", "Amount"
]

# Create Gradio interface using the updated components syntax
gradio_interface = gr.Interface(
    fn=predict_fraud,
    inputs=[gr.Number(label=label) for label in input_labels],
    outputs=[
        gr.Textbox(label="XGBoost Prediction"),
        gr.Textbox(label="Random Forest Prediction"),
        gr.Textbox(label="Gradient Boosting Prediction")
    ],
    title="Credit Card Fraud Detection with XGBoost, Random Forest, and Gradient Boosting",
    description="Enter transaction details to get predictions from three different models."
)

# Launch the Gradio app
gradio_interface.launch(share=True)