import streamlit as st import pandas as pd import numpy as np from LEURN import LEURN import torch from DATA import split_and_processing from TRAINER import Trainer import numpy as np import openml # Initialize or reset session states if necessary if 'init' not in st.session_state: st.session_state['training_completed'] = False st.session_state['data_chosen'] = False st.session_state['init'] = True st.session_state['selected_row']=False st.session_state['explanation_made']=False st.session_state['result']=False # Streamlit application layout st.title("LEURN") # Usage st.subheader("Usage") st.write("1. Upload your training excel/csv file. The file should contain all the features and the target to be predicted") st.write("2. Select target variable to predict") st.write("3. Select categorical variables in your dataset") st.write("4. Select neural network hyperparameters") st.write("5. Press Train Neural Network") st.write("6. If you want to make inference and explain decisions, go to 7, if you want to generate data, go to 10") st.write("7. Upload your test excel/csv file. The file should only contain features, and not target") st.write("8. Select the row you want to make inference and explain.") st.write("9. Press explain button.") st.write("10. Press generate button. This directly generates new samples with explanations and output.") # Upload csv or excel st.subheader("File Uploader") uploaded_file = st.file_uploader("Upload your Excel/CSV file", type=["csv", "xlsx"]) if uploaded_file is not None: def are_all_strings(series): return all(isinstance(item, str) for item in series) # Reading the uploaded file df = pd.read_csv(uploaded_file) if uploaded_file.type == "text/csv" else pd.read_excel(uploaded_file) st.write("Data Preview:") st.write(df.head()) st.subheader("Categorical Feature and Target Selection") # Selecting the target variable target = st.selectbox("Select the target variable", options=df.columns) # Define features and target X = df.drop(target, axis=1) y = df[target] attribute_names = X.columns # Select categorical variables st.write("Select categorical variables:") categoricals = [st.checkbox(f"{col} is categorical", key=col,value=are_all_strings(X[col])) for col in X.columns] # User input for model parameters st.subheader("Model Training Parameters") depth = st.selectbox("Select Model Depth", options=[1, 2, 3, 4, 5], index=2) batch_size = st.selectbox("Select Batch Size", options=[64, 128, 256, 512, 1024, 2048, 4096], index=4) lr = st.selectbox("Select Learning Rate", options=[1e-4, 5e-4, 1e-3, 5e-3, 1e-2], index=3) epochs = st.number_input("Enter Number of Epochs", min_value=1, max_value=1000, value=300) droprate = st.slider("Select Dropout Rate", min_value=0.0, max_value=1.0, value=0.0, step=0.05) output_type = st.radio("Select Output Type (0: regression, 1: binary classification, 2: multi-class classification)", options=[0, 1, 2], index=0) if st.button("Train Neural Network"): device = torch.device("cuda" if torch.cuda.is_available() else "cpu") #Split and process X_train, X_val, X_test, y_train, y_val, y_test, preprocessor = split_and_processing(X, y, categoricals, output_type, attribute_names) #Initialize model model = LEURN(preprocessor, depth=depth, droprate=droprate).to(device) #Train model model_trainer = Trainer(model, X_train, X_val, y_train, y_val, lr=lr, batch_size=batch_size, epochs=epochs, problem_type=output_type, verbose=False) model_trainer.train() #Load best model model.load_state_dict(model_trainer.best_model) #Get performances perf_train = model_trainer.evaluate(X_train, y_train) perf_val = model_trainer.evaluate(X_val, y_val) perf_test = model_trainer.evaluate(X_test, y_test) st.session_state['perf_train']=perf_train st.session_state['perf_val']=perf_val st.session_state['perf_test']=perf_test #Save test dataset and model to explain/generate later X_test_inverse = preprocessor.inverse_transform_X(X_test) X_test_inverse.to_csv('test.csv',index=False) st.session_state['training_completed'] = True st.session_state['model'] = model # Adjusted for compatibility if st.session_state['training_completed'] == True: #Print performances st.write("Here are performances, try different hyperparameters if not satisfied") if output_type == 0: st.subheader("Training Results (MSE)") elif output_type == 1: st.subheader("Training Results (ROC-AUC)") else: st.subheader("Training Results (ACC)") st.write(f"Training Score: {st.session_state['perf_train']:.4f}") st.write(f"Validation Score: {st.session_state['perf_val']:.4f}") st.write(f"Test Score: {st.session_state['perf_test']:.4f}") # File uploader for explanation st.subheader("Explain New Inputs") uploaded_file_to_explain = st.file_uploader("Upload your Excel/CSV file to explain. Uploaded file should not have the target variable.", type=["csv", "xlsx"]) if uploaded_file_to_explain is not None: # Reading the uploaded file print(uploaded_file_to_explain) X_test_inverse = pd.read_csv(uploaded_file_to_explain) if uploaded_file_to_explain.type == "text/csv" else pd.read_excel(uploaded_file_to_explain) # Save DataFrame st.session_state['X_test_inverse_df'] = X_test_inverse.to_json() st.session_state['data_chosen'] = True # Flag to indicate data is chosen if st.session_state['data_chosen'] == True: # Load DataFrame from session state X_test_inverse = pd.read_json(st.session_state['X_test_inverse_df']) # Always display the DataFrame to ensure it's visible for selection st.write("Test DataFrame:") st.write(X_test_inverse) # Let users select a row, selection is dynamic and updates session state selected_index = st.selectbox("Select a row:", options=X_test_inverse.index, key="selected_index") selected_row = X_test_inverse.loc[[st.session_state['selected_index']]] st.write("Selected Data for Explanation:") st.write(selected_row) st.session_state['selected_row'] = selected_row #Explain selected row if st.button("Explain"): model=st.session_state['model'] Exp_df_test_sample,result,result_original_format=model.explain(torch.from_numpy(model.preprocessor.transform_X(st.session_state['selected_row']).values.astype('float32')),include_causal_analysis=True) st.session_state['explanation_made']=True st.session_state['Exp_df_test_sample']=Exp_df_test_sample st.session_state['result_original_format']=result_original_format st.session_state['result']=result #Print explanations if st.session_state['explanation_made']==True: st.write("Explanation DataFrame:") st.write(st.session_state['Exp_df_test_sample']) st.write("Predicted Output: (Network format)") st.write(st.session_state['result'].detach().numpy().astype('str')) if output_type==1: if np.sign(st.session_state['result'].detach().numpy())>0: st.write("Result here is positive; this means output class below is represented by positive sign. In the explanation dataframe, positive contributions increase class likelihood") else: st.write("Result here is negative; this means output class below is represented by negative sign. In the explanation dataframe, negative contributions increase class likelihood") st.write("Predicted Output: (original format)") st.write(st.session_state['result_original_format']) #Data generation part st.subheader("Generate Data From Scratch") if st.button("Generate"): model=st.session_state['model'] generated_sample_nn_friendly, generated_sample_original_input_format,output=model.generate() st.write("Generated Data:") st.write(generated_sample_original_input_format) Exp_df_generated_sample,result,result_original_format=model.explain(generated_sample_nn_friendly,include_causal_analysis=True) st.write("Explanation DataFrame:") st.write(Exp_df_generated_sample) st.write("Predicted Output: (Network format)") st.write(result.detach().numpy().astype('str')) if output_type==1: if np.sign(result.detach().numpy())>0: st.write("Result here is positive; this means output class below is represented by positive sign. In the explanation dataframe, positive contributions increase class likelihood") else: st.write("Result here is negative; this means output class below is represented by negative sign. In the explanation dataframe, negative contributions increase class likelihood") st.write("Predicted Output: (original format)") st.write(result_original_format)