import pandas as pd

import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import gradio as gr

def model(split_size):
    # Load the dataset
    data = pd.read_excel('RehabLOSCalculatorDatasetShortNoPHI.xlsx')

    # Preprocess the data
    data = data.drop(columns=['Rehab Rounds Recommendation', 'Final Score', 'Medical Score', 'Therapy Score', 'Social Score'])
    X = data.drop(columns=['Discharge Location'])
    y = data['Discharge Location'].apply(lambda x: 1 if x == 'IRF' else 0)
    X = pd.get_dummies(X, columns=X.select_dtypes(include=['object']).columns, drop_first=True)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=split_size, random_state=42)
    
    # Scale the data
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)

    # Train the model
    clf = RandomForestClassifier(random_state=42)
    clf.fit(X_train_scaled, y_train)
    y_pred = clf.predict(X_test_scaled)

    # Calculate accuracy and classification metrics
    accuracy = accuracy_score(y_test, y_pred)
    cm = confusion_matrix(y_test, y_pred)
    TN, FP, FN, TP = cm.ravel()
    sensitivity = TP / (TP + FN)
    specificity = TN / (TN + FP)

    # Feature importance
    feature_importances = clf.feature_importances_
    features = X.columns
    importance_df = pd.DataFrame({'Feature': features, 'Importance': feature_importances}).sort_values(by='Importance', ascending=False)

    # Confusion matrix plot
    plt.figure(figsize=(6, 4))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Not IRF', 'IRF'], yticklabels=['Not IRF', 'IRF'])
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')
    cm_plot = plt.gcf()  # Get the current figure for Gradio output

    # Return the results
    result_text = f"Accuracy: {accuracy:.2f}\nSensitivity (Recall): {sensitivity:.2f}\nSpecificity: {specificity:.2f}\n\nFeature Importances:\n{importance_df.to_string(index=False)}"
    return cm_plot, result_text

# Gradio Interface
interface = gr.Interface(
    fn=model,
    inputs=gr.Slider(0.1, 0.5, step=0.05, label="Test Set Size (Fraction)"),
    outputs=["plot", "text"],
    title="Random Forest Model for Discharge Location Prediction",
    description="Adjust the fraction of the test set (0.1 to 0.5) and view the feature importance, accuracy, sensitivity, and specificity."
)

# Launch the Gradio app (for Hugging Face deployment)
interface.launch()