Spaces:

siddop
/

Attrition_Predictor

Sleeping

App Files Files Community

siddop commited on Apr 30, 2024

Commit

847b911

1 Parent(s): 3274b50

Create app.py

Browse files

Files changed (1) hide show

app.py +138 -0

app.py ADDED Viewed

	@@ -0,0 +1,138 @@

+import pandas as pd
+import numpy as np
+from sklearn.preprocessing import LabelEncoder, MinMaxScaler
+from tensorflow import keras
+from sklearn.model_selection import train_test_split
+from sklearn.metrics import r2_score
+from keras.models import Sequential
+from keras.layers import Dense, Dropout, BatchNormalization
+from keras import regularizers
+import tensorflow as tf
+import joblib
+from nltk.tokenize import word_tokenize
+import re
+from lime.lime_tabular import LimeTabularExplainer
+import gradio as gr
+# label encode object columns
+le_dict = {}
+df = pd.read_csv(r"C:\Users\bhati\Documents\MachineLearning\FreelanceProject\SimpleAttritionPredictionsWithSuggestions\Data.csv")
+df2 = df.copy()
+object_cols = df2.select_dtypes(include=['object']).columns
+object_cols = object_cols.delete(object_cols.get_loc('Attrition'))
+int_cols = df2.select_dtypes(exclude=['object']).columns
+for col in object_cols:
+    le = LabelEncoder()
+    df2[col] = le.fit_transform(df[col])
+    le_dict[col] = le
+X = df2.iloc[:, :-1]
+y = df2.iloc[:, -1]
+colList = []
+for col in object_cols:
+    colList.append(col)
+for col in int_cols:
+    colList.append(col)
+classes_dict = {}
+for col in object_cols:
+    le_col = LabelEncoder()
+    df2[col] = le_col.fit_transform(df[col])
+    classes_dict[col] = le_col.classes_
+scaler = MinMaxScaler()
+X_scaled = scaler.fit_transform(X)
+# Split the data into training and test sets
+X_train, X_test, y_train, y_test = train_test_split(X_scaled, y, test_size=0.2, random_state=0)
+# Load the model
+loaded_model = tf.keras.models.load_model('Final_NN_model.keras')
+# Your machine learning model function
+def predict_label(*args):
+    if '' in args:
+        return "Please fill in all inputs", pd.DataFrame([['awaiting inputs', 'awaiting inputs']], columns=["Feature", "Importance"])
+    # Create empty dictionaries to hold the input data
+    input_dict = {}
+    input_df = {}
+    # Map inputs and col names
+    for i, col in enumerate(colList):
+        input_dict[col] = args[i]
+    # Rearrange columns as X df
+    for col in X.columns:
+        input_df[col] = input_dict[col]
+    # Add the input data to the DataFrame
+    input_df = pd.DataFrame([input_df], columns=input_df.keys())
+    # Encode labels of ibject columns
+    for col in le_dict:
+        input_df[col] = le_dict[col].transform(input_df[col])
+    # Scale columns
+    input_df = scaler.transform(input_df)
+    # Load the pre-trained pipeline
+    loaded_model = tf.keras.models.load_model('Final_NN_model.keras')
+    # Make predictions
+    pred = round(loaded_model.predict(input_df.reshape(1, -1))[0][0], 4)*100
+    # Explain the prediction
+    exp = explainer.explain_instance(input_df[0], loaded_model.predict, labels=(0, ), num_features=len(X.columns))
+    # Create dictionary to store top 5 influencing features
+    top5 = {}
+    for i in range(5):
+        for word in word_tokenize(exp.as_list(0)[i][0]):
+            if re.findall(r'[a-zA-Z]+', word):
+                feature = word
+                weight = round(exp.as_list(0)[i][1], 2)
+        top5[feature] = weight
+    # Convert dictionary to list of tuples for Gradio Table
+    top5_table = [(key, value) for key, value in top5.items()]
+    # top5_table = pd.DataFrame(top5_table, columns=["Feature", "Importance"])
+    # Return prediction
+    if pred<=0.30:
+        return f"Low probability ({pred:.2f}%) of attrition", top5_table
+    elif pred<=0.70:
+        return f"Some probability ({pred:.2f}%) of attrition", top5_table
+    else:
+        return f"High probability ({pred:.2f}%) of attrition", top5_table
+# Define the inputs with names and descriptions
+obj_config = [gr.Dropdown(label=name, choices=sorted(classes_dict[name].tolist())) for name in object_cols]
+int_config = [gr.Textbox(label=name, placeholder='enter a number') for name in int_cols]
+# Concatenate the two sets of input configurations
+input_config = obj_config + int_config
+# Gradio Interface
+iface = gr.Interface(
+    title="Attrition Prediction",
+    description = "The values shown under top features shows influence of each feature on the prediction. A higher number indicates that the feature is more influential in determining the prediction, while a lower number indicates less influence.",
+    allow_flagging='never',
+    fn=predict_label,
+    inputs=input_config,
+    outputs=[
+        gr.Textbox(label="Prediction"),
+        gr.DataFrame(headers=["Feature", "Importance"], label="Top 5 featured influencing prediction")
+    ],
+    live=False # Set live to True to see the interface while running the code
+)
+# Launch the Gradio interface
+iface.launch(share=True)