Spaces:
Runtime error
Runtime error
| import streamlit as st | |
| import pandas as pd | |
| import numpy as np | |
| from LEURN import LEURN | |
| import torch | |
| from DATA import split_and_processing | |
| from TRAINER import Trainer | |
| import numpy as np | |
| import openml | |
| # Initialize or reset session states if necessary | |
| if 'init' not in st.session_state: | |
| st.session_state['training_completed'] = False | |
| st.session_state['data_chosen'] = False | |
| st.session_state['init'] = True | |
| st.session_state['selected_row']=False | |
| st.session_state['explanation_made']=False | |
| st.session_state['result']=False | |
| # Streamlit application layout | |
| st.title("LEURN") | |
| # Usage | |
| st.subheader("Usage") | |
| st.write("1. Upload your training excel/csv file. The file should contain all the features and the target to be predicted") | |
| st.write("2. Select target variable to predict") | |
| st.write("3. Select categorical variables in your dataset") | |
| st.write("4. Select neural network hyperparameters") | |
| st.write("5. Press Train Neural Network") | |
| st.write("6. If you want to make inference and explain decisions, go to 7, if you want to generate data, go to 10") | |
| st.write("7. Upload your test excel/csv file. The file should only contain features, and not target") | |
| st.write("8. Select the row you want to make inference and explain.") | |
| st.write("9. Press explain button.") | |
| st.write("10. Press generate button. This directly generates new samples with explanations and output.") | |
| # Upload csv or excel | |
| st.subheader("File Uploader") | |
| uploaded_file = st.file_uploader("Upload your Excel/CSV file", type=["csv", "xlsx"]) | |
| if uploaded_file is not None: | |
| def are_all_strings(series): | |
| return all(isinstance(item, str) for item in series) | |
| # Reading the uploaded file | |
| df = pd.read_csv(uploaded_file) if uploaded_file.type == "text/csv" else pd.read_excel(uploaded_file) | |
| st.write("Data Preview:") | |
| st.write(df.head()) | |
| st.subheader("Categorical Feature and Target Selection") | |
| # Selecting the target variable | |
| target = st.selectbox("Select the target variable", options=df.columns) | |
| # Define features and target | |
| X = df.drop(target, axis=1) | |
| y = df[target] | |
| attribute_names = X.columns | |
| # Select categorical variables | |
| st.write("Select categorical variables:") | |
| categoricals = [st.checkbox(f"{col} is categorical", key=col,value=are_all_strings(X[col])) for col in X.columns] | |
| # User input for model parameters | |
| st.subheader("Model Training Parameters") | |
| depth = st.selectbox("Select Model Depth", options=[1, 2, 3, 4, 5], index=2) | |
| batch_size = st.selectbox("Select Batch Size", options=[64, 128, 256, 512, 1024, 2048, 4096], index=4) | |
| lr = st.selectbox("Select Learning Rate", options=[1e-4, 5e-4, 1e-3, 5e-3, 1e-2], index=3) | |
| epochs = st.number_input("Enter Number of Epochs", min_value=1, max_value=1000, value=300) | |
| droprate = st.slider("Select Dropout Rate", min_value=0.0, max_value=1.0, value=0.0, step=0.05) | |
| output_type = st.radio("Select Output Type (0: regression, 1: binary classification, 2: multi-class classification)", options=[0, 1, 2], index=0) | |
| if st.button("Train Neural Network"): | |
| device = torch.device("cuda" if torch.cuda.is_available() else "cpu") | |
| #Split and process | |
| X_train, X_val, X_test, y_train, y_val, y_test, preprocessor = split_and_processing(X, y, categoricals, output_type, attribute_names) | |
| #Initialize model | |
| model = LEURN(preprocessor, depth=depth, droprate=droprate).to(device) | |
| #Train model | |
| model_trainer = Trainer(model, X_train, X_val, y_train, y_val, lr=lr, batch_size=batch_size, epochs=epochs, problem_type=output_type, verbose=False) | |
| model_trainer.train() | |
| #Load best model | |
| model.load_state_dict(model_trainer.best_model) | |
| #Get performances | |
| perf_train = model_trainer.evaluate(X_train, y_train) | |
| perf_val = model_trainer.evaluate(X_val, y_val) | |
| perf_test = model_trainer.evaluate(X_test, y_test) | |
| st.session_state['perf_train']=perf_train | |
| st.session_state['perf_val']=perf_val | |
| st.session_state['perf_test']=perf_test | |
| #Save test dataset and model to explain/generate later | |
| X_test_inverse = preprocessor.inverse_transform_X(X_test) | |
| X_test_inverse.to_csv('test.csv',index=False) | |
| st.session_state['training_completed'] = True | |
| st.session_state['model'] = model # Adjusted for compatibility | |
| if st.session_state['training_completed'] == True: | |
| #Print performances | |
| st.write("Here are performances, try different hyperparameters if not satisfied") | |
| if output_type == 0: | |
| st.subheader("Training Results (MSE)") | |
| elif output_type == 1: | |
| st.subheader("Training Results (ROC-AUC)") | |
| else: | |
| st.subheader("Training Results (ACC)") | |
| st.write(f"Training Score: {st.session_state['perf_train']:.4f}") | |
| st.write(f"Validation Score: {st.session_state['perf_val']:.4f}") | |
| st.write(f"Test Score: {st.session_state['perf_test']:.4f}") | |
| # File uploader for explanation | |
| st.subheader("Explain New Inputs") | |
| uploaded_file_to_explain = st.file_uploader("Upload your Excel/CSV file to explain. Uploaded file should not have the target variable.", type=["csv", "xlsx"]) | |
| if uploaded_file_to_explain is not None: | |
| # Reading the uploaded file | |
| print(uploaded_file_to_explain) | |
| X_test_inverse = pd.read_csv(uploaded_file_to_explain) if uploaded_file_to_explain.type == "text/csv" else pd.read_excel(uploaded_file_to_explain) | |
| # Save DataFrame | |
| st.session_state['X_test_inverse_df'] = X_test_inverse.to_json() | |
| st.session_state['data_chosen'] = True # Flag to indicate data is chosen | |
| if st.session_state['data_chosen'] == True: | |
| # Load DataFrame from session state | |
| X_test_inverse = pd.read_json(st.session_state['X_test_inverse_df']) | |
| # Always display the DataFrame to ensure it's visible for selection | |
| st.write("Test DataFrame:") | |
| st.write(X_test_inverse) | |
| # Let users select a row, selection is dynamic and updates session state | |
| selected_index = st.selectbox("Select a row:", options=X_test_inverse.index, key="selected_index") | |
| selected_row = X_test_inverse.loc[[st.session_state['selected_index']]] | |
| st.write("Selected Data for Explanation:") | |
| st.write(selected_row) | |
| st.session_state['selected_row'] = selected_row | |
| #Explain selected row | |
| if st.button("Explain"): | |
| model=st.session_state['model'] | |
| Exp_df_test_sample,result,result_original_format=model.explain(torch.from_numpy(model.preprocessor.transform_X(st.session_state['selected_row']).values.astype('float32')),include_causal_analysis=True) | |
| st.session_state['explanation_made']=True | |
| st.session_state['Exp_df_test_sample']=Exp_df_test_sample | |
| st.session_state['result_original_format']=result_original_format | |
| st.session_state['result']=result | |
| #Print explanations | |
| if st.session_state['explanation_made']==True: | |
| st.write("Explanation DataFrame:") | |
| st.write(st.session_state['Exp_df_test_sample']) | |
| st.write("Predicted Output: (Network format)") | |
| st.write(st.session_state['result'].detach().numpy().astype('str')) | |
| if output_type==1: | |
| if np.sign(st.session_state['result'].detach().numpy())>0: | |
| st.write("Result here is positive; this means output class below is represented by positive sign. In the explanation dataframe, positive contributions increase class likelihood") | |
| else: | |
| st.write("Result here is negative; this means output class below is represented by negative sign. In the explanation dataframe, negative contributions increase class likelihood") | |
| st.write("Predicted Output: (original format)") | |
| st.write(st.session_state['result_original_format']) | |
| #Data generation part | |
| st.subheader("Generate Data From Scratch") | |
| if st.button("Generate"): | |
| model=st.session_state['model'] | |
| generated_sample_nn_friendly, generated_sample_original_input_format,output=model.generate() | |
| st.write("Generated Data:") | |
| st.write(generated_sample_original_input_format) | |
| Exp_df_generated_sample,result,result_original_format=model.explain(generated_sample_nn_friendly,include_causal_analysis=True) | |
| st.write("Explanation DataFrame:") | |
| st.write(Exp_df_generated_sample) | |
| st.write("Predicted Output: (Network format)") | |
| st.write(result.detach().numpy().astype('str')) | |
| if output_type==1: | |
| if np.sign(result.detach().numpy())>0: | |
| st.write("Result here is positive; this means output class below is represented by positive sign. In the explanation dataframe, positive contributions increase class likelihood") | |
| else: | |
| st.write("Result here is negative; this means output class below is represented by negative sign. In the explanation dataframe, negative contributions increase class likelihood") | |
| st.write("Predicted Output: (original format)") | |
| st.write(result_original_format) | |