AblationStudy_ / app.py
Roberta2024's picture
Create app.py
59beeed verified
import streamlit as st
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
import numpy as np
# Function to process data and return feature importances
def calculate_importances(file):
# Read uploaded file
heart_df = pd.read_csv(file)
# Set X and y
X = heart_df.drop('target', axis=1)
y = heart_df['target']
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)
# Initialize models
rf_model = RandomForestClassifier(random_state=42)
xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
cart_model = DecisionTreeClassifier(random_state=42)
# Train models
rf_model.fit(X_train, y_train)
xgb_model.fit(X_train, y_train)
cart_model.fit(X_train, y_train)
# Get feature importances
rf_importances = rf_model.feature_importances_
xgb_importances = xgb_model.feature_importances_
cart_importances = cart_model.feature_importances_
feature_names = X.columns
# Prepare DataFrame
rf_importance = {'Feature': feature_names, 'Random Forest': rf_importances}
xgb_importance = {'Feature': feature_names, 'XGBoost': xgb_importances}
cart_importance = {'Feature': feature_names, 'CART': cart_importances}
# Create DataFrames
rf_df = pd.DataFrame(rf_importance)
xgb_df = pd.DataFrame(xgb_importance)
cart_df = pd.DataFrame(cart_importance)
# Merge DataFrames
importance_df = rf_df.merge(xgb_df, on='Feature').merge(cart_df, on='Feature')
# Save to Excel
file_name = 'feature_importances.xlsx'
importance_df.to_excel(file_name, index=False)
return file_name, importance_df.head()
# Streamlit interface
st.title("Feature Importance Calculation")
# File upload
uploaded_file = st.file_uploader("Upload heart.csv file", type=['csv'])
if uploaded_file is not None:
# Process the file and get results
excel_file, preview_df = calculate_importances(uploaded_file)
# Display a preview of the DataFrame
st.write("Feature Importances (Preview):")
st.dataframe(preview_df)
# Provide a link to download the Excel file
with open(excel_file, "rb") as file:
btn = st.download_button(
label="Download Excel File",
data=file,
file_name=excel_file,
mime="application/vnd.ms-excel"
)