# %%writefile app.py import joblib import pandas as pd import numpy as np import streamlit as st from sklearn.preprocessing import LabelEncoder from lime.lime_tabular import LimeTabularExplainer # ------------------------------------------------------------------------------------- df = pd.read_csv("combined.csv", index_col=0) df2 = df.copy() obj_columns = df.select_dtypes(include=['object']).columns num_columns = df.select_dtypes(include='number').columns le_dict = {} classes_dict = {} for col in obj_columns: le = LabelEncoder() df2[col] = le.fit_transform(df[col]) le_dict[col] = le classes_dict[col] = le.classes_ df2['G1'] = df2.pop('G1') df2['G2'] = df2.pop('G2') df2['G3'] = df2.pop('G3') X = df2.iloc[:,:-1] y = df2.iloc[:,-1] allCol = X.columns # ------------------------------------------------------------------------------------- # Load the model from the file joblib_file = "xgb_model.joblib" loaded_model = joblib.load(joblib_file) # ------------------------------------------------------------------------------------- variableExpl = [] with open('student.txt', 'r', encoding='utf-8') as file: for line in file: cleaned_line = line.strip() # Append each cleaned line as a row to the list variableExpl.append(cleaned_line) variableExpl.pop(0) for i in range(5): variableExpl.pop(-1) for i in range(2): variableExpl.pop(-3) variableExplDict = {} for i in variableExpl: variableExplDict[i.split()[1]] = i # ------------------------------------------------------------------------------------- def predict_score(inputs): if any(value == '' for value in inputs): return "Please enter all the inputs." #------------------------------------------------------------------------------------------- # Create a dictionary for each input input_df = pd.DataFrame(np.array(inputs).reshape(1, -1), columns=allCol) #------------------------------------------------------------------------------------------- # label encode each input for col in obj_columns: if col in input_df.columns: input_df[col] = le_dict[col].transform(input_df[col]) #------------------------------------------------------------------------------------------- # Make predictions pred = loaded_model.predict(input_df) # Ensure all columns are numeric input_df = input_df.astype(float) #------------------------------------------------------------------------------------------- # Create a LIME explainer explainer = LimeTabularExplainer(training_data=X.values, mode="regression", feature_names=allCol, verbose=True) exp = explainer.explain_instance(data_row=input_df.iloc[0].to_numpy(), predict_fn=loaded_model.predict, num_features=33) impacts = {} for item in exp.as_list(): impacts[item[0]] = item[1] explTable = pd.DataFrame(np.array(list(impacts.values())).reshape(1,-1), columns=impacts.keys()).T explTable = explTable.rename(columns={0: 'ImpactOnPrediction'}) explTable['Positive/Negative'] = explTable['ImpactOnPrediction'].apply(lambda x: 'Negative' if x < 0 else 'Positive') return pred, explTable #------------------------------------------------------------------------------------------- # Streamlit app st.title("Student's Final Grade Prediction") # Input inputs = [] for variable in variableExplDict: st.write(variableExplDict[variable]) if variable in obj_columns: value = st.selectbox(variable, classes_dict[variable], key=variable) # Create a dropdown menu else: value = st.text_input(variable, key=variable) inputs.append(value) # Predict button if st.button("Predict"): score, explantn = predict_score(inputs) st.write("Prediction: ", score) st.write("Impact on prediction:", explantn) # Clear button functionality if st.button("Clear"): st.experimental_rerun() # !streamlit run app.py