import pandas as pd import numpy as np import seaborn as sns import matplotlib.pyplot as plt from sklearn.model_selection import train_test_split from sklearn.preprocessing import StandardScaler from sklearn.ensemble import RandomForestClassifier from sklearn.metrics import accuracy_score, classification_report, confusion_matrix import gradio as gr def model(split_size): # Load the dataset data = pd.read_excel('RehabLOSCalculatorDatasetShortNoPHI.xlsx') # Preprocess the data data = data.drop(columns=['Rehab Rounds Recommendation', 'Final Score', 'Medical Score', 'Therapy Score', 'Social Score']) X = data.drop(columns=['Discharge Location']) y = data['Discharge Location'].apply(lambda x: 1 if x == 'IRF' else 0) X = pd.get_dummies(X, columns=X.select_dtypes(include=['object']).columns, drop_first=True) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=split_size, random_state=42) # Scale the data scaler = StandardScaler() X_train_scaled = scaler.fit_transform(X_train) X_test_scaled = scaler.transform(X_test) # Train the model clf = RandomForestClassifier(random_state=42) clf.fit(X_train_scaled, y_train) y_pred = clf.predict(X_test_scaled) # Calculate accuracy and classification metrics accuracy = accuracy_score(y_test, y_pred) cm = confusion_matrix(y_test, y_pred) TN, FP, FN, TP = cm.ravel() sensitivity = TP / (TP + FN) specificity = TN / (TN + FP) # Feature importance feature_importances = clf.feature_importances_ features = X.columns importance_df = pd.DataFrame({'Feature': features, 'Importance': feature_importances}).sort_values(by='Importance', ascending=False) # Confusion matrix plot plt.figure(figsize=(6, 4)) sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Not IRF', 'IRF'], yticklabels=['Not IRF', 'IRF']) plt.xlabel('Predicted') plt.ylabel('True') plt.title('Confusion Matrix') cm_plot = plt.gcf() # Get the current figure for Gradio output # Return the results result_text = f"Accuracy: {accuracy:.2f}\nSensitivity (Recall): {sensitivity:.2f}\nSpecificity: {specificity:.2f}\n\nFeature Importances:\n{importance_df.to_string(index=False)}" return cm_plot, result_text # Gradio Interface interface = gr.Interface( fn=model, inputs=gr.Slider(0.1, 0.5, step=0.05, label="Test Set Size (Fraction)"), outputs=["plot", "text"], title="Random Forest Model for Discharge Location Prediction", description="Adjust the fraction of the test set (0.1 to 0.5) and view the feature importance, accuracy, sensitivity, and specificity." ) # Launch the Gradio app (for Hugging Face deployment) interface.launch()