Spaces:
Sleeping
Sleeping
andrewicus
commited on
Commit
β’
f8b2bd0
1
Parent(s):
a62d129
full updates
Browse files- imgs/ml-flow1.jpeg +0 -0
- imgs/ml-flow2.jpeg +0 -0
- imgs/ml-flow3.jpeg +0 -0
- imgs/ml-flow4.jpeg +0 -0
- pages/02 π€ Model Prediction.py +45 -21
- pages/03 π§βπ» Explainable AI.py +80 -1
- pages/04 𦦠MLflow.py +15 -233
- requirements.txt +3 -1
imgs/ml-flow1.jpeg
ADDED
imgs/ml-flow2.jpeg
ADDED
imgs/ml-flow3.jpeg
ADDED
imgs/ml-flow4.jpeg
ADDED
pages/02 π€ Model Prediction.py
CHANGED
@@ -11,7 +11,9 @@ import matplotlib.pyplot as plt
|
|
11 |
from sklearn.linear_model import LogisticRegression
|
12 |
from sklearn.metrics import classification_report
|
13 |
from codecarbon import EmissionsTracker
|
|
|
14 |
import time
|
|
|
15 |
|
16 |
url = "https://upload.wikimedia.org/wikipedia/commons/6/6a/DoorDash_Logo.svg"
|
17 |
st.image(url, output_format="PNG", width=300)
|
@@ -25,7 +27,7 @@ df['Education'] = df['Education'].astype('category').cat.codes
|
|
25 |
df['Marital_Status'] = df['Marital_Status'].astype('category').cat.codes
|
26 |
df = df.drop(["Dt_Customer"], axis = 1)
|
27 |
df = df.drop(["ID"], axis = 1)
|
28 |
-
params = st.multiselect("Select Parameters", df.columns, default = ["
|
29 |
model = st.selectbox("Select Model", ["Logistic Regression", "K-Nearest Neighbors", "Decision Tree"])
|
30 |
|
31 |
if not params:
|
@@ -39,29 +41,24 @@ else:
|
|
39 |
model_start_time = time.time()
|
40 |
tracker = EmissionsTracker()
|
41 |
tracker.start()
|
|
|
42 |
if(model == "Logistic Regression"):
|
43 |
-
|
44 |
-
|
45 |
-
model_accuracy = logmodel.predict(X_test)
|
46 |
elif(model == "K-Nearest Neighbors"):
|
47 |
-
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
model_accuracy = clf.predict(X_test)
|
55 |
-
|
56 |
import graphviz
|
57 |
from sklearn.tree import export_graphviz
|
58 |
-
|
59 |
-
# Assuming `clf` and `X` are defined somewhere in your code
|
60 |
-
|
61 |
# Your code for exporting the decision tree graph
|
62 |
feature_names = X.columns
|
63 |
feature_cols = X.columns
|
64 |
-
dot_data = export_graphviz(
|
65 |
feature_names=feature_cols,
|
66 |
class_names=['0', '1'],
|
67 |
filled=True, rounded=True,
|
@@ -70,12 +67,39 @@ else:
|
|
70 |
# Display the graph using streamlit_graphviz
|
71 |
st.graphviz_chart(dot_data)
|
72 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
73 |
model_end_time = time.time()
|
74 |
model_execution_time = model_end_time - model_start_time
|
75 |
|
76 |
-
|
77 |
emissions = tracker.stop()
|
78 |
-
print(f"Estimated emissions for training the model: {emissions:.4f} kg of CO2")
|
79 |
|
80 |
-
st.
|
81 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
from sklearn.linear_model import LogisticRegression
|
12 |
from sklearn.metrics import classification_report
|
13 |
from codecarbon import EmissionsTracker
|
14 |
+
from sklearn.metrics import accuracy_score, precision_score, f1_score, classification_report
|
15 |
import time
|
16 |
+
from shapash.explainer.smart_explainer import SmartExplainer
|
17 |
|
18 |
url = "https://upload.wikimedia.org/wikipedia/commons/6/6a/DoorDash_Logo.svg"
|
19 |
st.image(url, output_format="PNG", width=300)
|
|
|
27 |
df['Marital_Status'] = df['Marital_Status'].astype('category').cat.codes
|
28 |
df = df.drop(["Dt_Customer"], axis = 1)
|
29 |
df = df.drop(["ID"], axis = 1)
|
30 |
+
params = st.multiselect("Select Parameters", df.columns, default = ["AcceptedCmp5", "Recency", "AcceptedCmp3", "AcceptedCmp1", "NumWebVisitsMonth"])
|
31 |
model = st.selectbox("Select Model", ["Logistic Regression", "K-Nearest Neighbors", "Decision Tree"])
|
32 |
|
33 |
if not params:
|
|
|
41 |
model_start_time = time.time()
|
42 |
tracker = EmissionsTracker()
|
43 |
tracker.start()
|
44 |
+
|
45 |
if(model == "Logistic Regression"):
|
46 |
+
model = LogisticRegression()
|
47 |
+
model.fit(X_train,y_train)
|
|
|
48 |
elif(model == "K-Nearest Neighbors"):
|
49 |
+
numNeighbors = st.number_input('N Neighbors', 2, 10)
|
50 |
+
model = KNeighborsClassifier(n_neighbors = numNeighbors)
|
51 |
+
model.fit(X_train,y_train)
|
52 |
+
elif(model == "Decision Tree"):
|
53 |
+
maxDepth = st.number_input('Tree Depth', 2, 6)
|
54 |
+
model = DecisionTreeClassifier(max_depth=maxDepth)
|
55 |
+
model.fit(X_train,y_train)
|
|
|
|
|
56 |
import graphviz
|
57 |
from sklearn.tree import export_graphviz
|
|
|
|
|
|
|
58 |
# Your code for exporting the decision tree graph
|
59 |
feature_names = X.columns
|
60 |
feature_cols = X.columns
|
61 |
+
dot_data = export_graphviz(model, out_file=None,
|
62 |
feature_names=feature_cols,
|
63 |
class_names=['0', '1'],
|
64 |
filled=True, rounded=True,
|
|
|
67 |
# Display the graph using streamlit_graphviz
|
68 |
st.graphviz_chart(dot_data)
|
69 |
|
70 |
+
|
71 |
+
y_pred = model.predict(X_test)
|
72 |
+
st.dataframe(
|
73 |
+
pd.DataFrame(
|
74 |
+
classification_report(y_test, y_pred, output_dict=True)
|
75 |
+
).transpose()
|
76 |
+
)
|
77 |
+
f1 = f1_score(y_test, y_pred)
|
78 |
+
precision = precision_score(y_test, y_pred, average='binary') # Use average='binary' for binary classification
|
79 |
+
model_accuracy = metrics.accuracy_score(y_test, y_pred)
|
80 |
+
|
81 |
model_end_time = time.time()
|
82 |
model_execution_time = model_end_time - model_start_time
|
83 |
|
|
|
84 |
emissions = tracker.stop()
|
|
|
85 |
|
86 |
+
st.header("Key Metrics")
|
87 |
+
|
88 |
+
col1, col2, col3 = st.columns(3)
|
89 |
+
|
90 |
+
# Metric 1: Accuracy
|
91 |
+
col1.metric(label="Accuracy", value=str(round(model_accuracy*100, 2)) + "%")
|
92 |
+
|
93 |
+
col2.metric(label="F1 Score", value = str(round(f1*100, 2)) + "%")
|
94 |
+
|
95 |
+
col3.metric(label="Precision", value = str(round(precision*100, 2)) + "%")
|
96 |
+
|
97 |
+
|
98 |
+
col21, col22 = st.columns(2)
|
99 |
+
# Metric 2: Execution time
|
100 |
+
col21.metric(label="Execution time", value=str(round(model_execution_time, 2)) + "s")
|
101 |
+
|
102 |
+
# Metric 3: CO2 Emissions
|
103 |
+
col22.metric(label="CO2 Emissions", value=str(round(emissions, 2)) + "kg")
|
104 |
+
|
105 |
+
|
pages/03 π§βπ» Explainable AI.py
CHANGED
@@ -1,9 +1,88 @@
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
from PIL import Image
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
4 |
|
5 |
url = "https://upload.wikimedia.org/wikipedia/commons/6/6a/DoorDash_Logo.svg"
|
6 |
-
st.image(url,
|
7 |
|
8 |
st.title("Explainable AI")
|
9 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
from PIL import Image
|
4 |
+
import sklearn.metrics as sk_metrics
|
5 |
+
from sklearn.model_selection import train_test_split
|
6 |
+
from sklearn.neighbors import KNeighborsClassifier
|
7 |
+
from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier
|
8 |
+
from sklearn.model_selection import train_test_split # Import train_test_split function
|
9 |
+
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation
|
10 |
+
import matplotlib.pyplot as plt
|
11 |
+
from sklearn.linear_model import LogisticRegression
|
12 |
+
from sklearn.metrics import classification_report
|
13 |
+
from codecarbon import EmissionsTracker
|
14 |
+
from sklearn.metrics import accuracy_score, precision_score, f1_score, classification_report
|
15 |
+
import time
|
16 |
+
from shapash.explainer.smart_explainer import SmartExplainer
|
17 |
|
18 |
url = "https://upload.wikimedia.org/wikipedia/commons/6/6a/DoorDash_Logo.svg"
|
19 |
+
st.image(url, output_format="PNG", width=300)
|
20 |
|
21 |
st.title("Explainable AI")
|
22 |
|
23 |
+
df_unclean = pd.read_csv("ifood-data.csv")
|
24 |
+
df = df_unclean.dropna()
|
25 |
+
df = df[df["Year_Birth"] > 1940]
|
26 |
+
df['Education'] = df['Education'].astype('category').cat.codes
|
27 |
+
df['Marital_Status'] = df['Marital_Status'].astype('category').cat.codes
|
28 |
+
df = df.drop(["Dt_Customer"], axis = 1)
|
29 |
+
df = df.drop(["ID"], axis = 1)
|
30 |
+
|
31 |
+
params = df.drop('Response', axis = 1).columns
|
32 |
+
|
33 |
+
X = df.drop(labels = ['Response'], axis = 1)
|
34 |
+
X = df[params]
|
35 |
+
y = df["Response"]
|
36 |
+
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3, random_state = 42)
|
37 |
+
model_start_time = time.time()
|
38 |
+
tracker = EmissionsTracker()
|
39 |
+
tracker.start()
|
40 |
+
|
41 |
+
model = DecisionTreeClassifier(max_depth=6)
|
42 |
+
model.fit(X_train,y_train)
|
43 |
+
import graphviz
|
44 |
+
from sklearn.tree import export_graphviz
|
45 |
+
# Your code for exporting the decision tree graph
|
46 |
+
feature_names = X.columns
|
47 |
+
feature_cols = X.columns
|
48 |
+
dot_data = export_graphviz(model, out_file=None,
|
49 |
+
feature_names=feature_cols,
|
50 |
+
class_names=['0', '1'],
|
51 |
+
filled=True, rounded=True,
|
52 |
+
special_characters=True)
|
53 |
+
|
54 |
+
# Display the graph using streamlit_graphviz
|
55 |
+
st.graphviz_chart(dot_data)
|
56 |
+
|
57 |
+
y_pred = model.predict(X_test)
|
58 |
+
# st.dataframe(
|
59 |
+
# pd.DataFrame(
|
60 |
+
# classification_report(y_test, y_pred, output_dict=True)
|
61 |
+
# ).transpose()
|
62 |
+
# )
|
63 |
+
f1 = f1_score(y_test, y_pred)
|
64 |
+
precision = precision_score(y_test, y_pred, average='binary') # Use average='binary' for binary classification
|
65 |
+
model_accuracy = metrics.accuracy_score(y_test, y_pred)
|
66 |
+
|
67 |
+
model_end_time = time.time()
|
68 |
+
model_execution_time = model_end_time - model_start_time
|
69 |
+
|
70 |
+
emissions = tracker.stop()
|
71 |
+
|
72 |
+
# Compile SmartExplainer
|
73 |
+
xpl = SmartExplainer(model)
|
74 |
+
y_pred = pd.Series(y_pred)
|
75 |
+
X_test = X_test.reset_index(drop=True)
|
76 |
+
xpl.compile(x=X_test, y_pred=y_pred)
|
77 |
+
|
78 |
+
|
79 |
+
st.plotly_chart(xpl.plot.features_importance(), use_container_width = True)
|
80 |
+
|
81 |
+
import random
|
82 |
+
subset = random.choices(X_test.index, k =50)
|
83 |
+
st.plotly_chart(xpl.plot.features_importance(selection=subset), use_container_width = True)
|
84 |
+
|
85 |
+
paramChoice = st.selectbox("Select Parameter", params)
|
86 |
+
|
87 |
+
st.plotly_chart(xpl.plot.contribution_plot(paramChoice), use_container_width = True)
|
88 |
+
|
pages/04 𦦠MLflow.py
CHANGED
@@ -1,245 +1,27 @@
|
|
1 |
-
|
2 |
import pandas as pd
|
3 |
-
|
4 |
-
|
|
|
|
|
5 |
from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier
|
6 |
from sklearn.model_selection import train_test_split # Import train_test_split function
|
7 |
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation
|
8 |
import matplotlib.pyplot as plt
|
9 |
from sklearn.linear_model import LogisticRegression
|
10 |
from sklearn.metrics import classification_report
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
df.head()
|
16 |
-
|
17 |
-
df['Education'] = df['Education'].astype('category').cat.codes
|
18 |
-
df['Marital_Status'] = df['Marital_Status'].astype('category').cat.codes
|
19 |
-
|
20 |
-
df = df.drop(["Dt_Customer"], axis = 1)
|
21 |
-
df = df.drop(["ID"], axis = 1)
|
22 |
-
|
23 |
-
df = df.dropna()
|
24 |
-
|
25 |
-
df.head()
|
26 |
-
|
27 |
-
plt.figure(figsize=(16, 10))
|
28 |
-
sns.heatmap(df.corr(), annot=True)
|
29 |
-
plt.show()
|
30 |
-
|
31 |
-
X = df.drop(labels = ['Response'], axis = 1)
|
32 |
-
y = df["Response"]
|
33 |
-
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size = 0.3, random_state = 42)
|
34 |
-
|
35 |
-
logmodel = LogisticRegression()
|
36 |
-
|
37 |
-
logmodel.fit(X_train,y_train)
|
38 |
-
|
39 |
-
prediction = logmodel.predict(X_test)
|
40 |
-
|
41 |
-
print(classification_report(y_test,prediction))
|
42 |
-
|
43 |
-
# Create Decision Tree classifer object
|
44 |
-
clf = DecisionTreeClassifier()
|
45 |
-
|
46 |
-
# Train Decision Tree Classifer
|
47 |
-
clf = clf.fit(X_train,y_train)
|
48 |
-
|
49 |
-
#Predict the response for test dataset
|
50 |
-
y_pred = clf.predict(X_test)
|
51 |
-
|
52 |
-
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
|
53 |
-
|
54 |
-
feature_cols = X.columns
|
55 |
-
feature_cols
|
56 |
-
|
57 |
-
from sklearn.tree import export_graphviz
|
58 |
-
feature_names = X.columns
|
59 |
-
dot_data = export_graphviz(clf, out_file=None,
|
60 |
-
|
61 |
-
feature_names=feature_cols,
|
62 |
-
|
63 |
-
class_names=['0','1'],
|
64 |
-
|
65 |
-
filled=True, rounded=True,
|
66 |
-
|
67 |
-
special_characters=True)
|
68 |
-
|
69 |
-
graph = graphviz.Source(dot_data)
|
70 |
-
graph
|
71 |
-
|
72 |
-
# Create Decision Tree classifer object
|
73 |
-
clf = DecisionTreeClassifier(max_depth=3)
|
74 |
-
|
75 |
-
# Train Decision Tree Classifer
|
76 |
-
clf = clf.fit(X_train,y_train)
|
77 |
-
|
78 |
-
#Predict the response for test dataset
|
79 |
-
y_pred = clf.predict(X_test)
|
80 |
-
|
81 |
-
# Model Accuracy, how often is the classifier correct?
|
82 |
-
print("Accuracy:",metrics.accuracy_score(y_test, y_pred))
|
83 |
-
|
84 |
-
import graphviz
|
85 |
-
from sklearn.tree import export_graphviz
|
86 |
-
feature_names = X.columns
|
87 |
-
dot_data = export_graphviz(clf, out_file=None,
|
88 |
-
|
89 |
-
feature_names=feature_cols,
|
90 |
-
|
91 |
-
class_names=['0','1'],
|
92 |
-
|
93 |
-
filled=True, rounded=True,
|
94 |
-
|
95 |
-
special_characters=True)
|
96 |
-
|
97 |
-
graph = graphviz.Source(dot_data)
|
98 |
-
graph
|
99 |
-
|
100 |
from shapash.explainer.smart_explainer import SmartExplainer
|
101 |
|
102 |
-
|
103 |
-
|
104 |
-
y_pred = pd.Series(y_pred)
|
105 |
-
X_test = X_test.reset_index(drop=True)
|
106 |
-
xpl.compile(x=X_test, y_pred=y_pred)
|
107 |
-
|
108 |
-
xpl.plot.features_importance()
|
109 |
-
|
110 |
-
from sklearn.neighbors import KNeighborsClassifier
|
111 |
-
|
112 |
-
knn = KNeighborsClassifier()
|
113 |
-
|
114 |
-
knn.fit(X_train, y_train)
|
115 |
-
|
116 |
-
results = knn.predict(X_test)
|
117 |
-
|
118 |
-
print("Accuracy:",metrics.accuracy_score(y_test, results))
|
119 |
-
|
120 |
-
# Import necessary libraries
|
121 |
-
import numpy as np # a Python library used for working with arrays
|
122 |
-
import pandas as pd # it allows us to analyze big data and make conclusions based on statistical theories
|
123 |
-
|
124 |
-
from pycaret.datasets import get_data # allows you to easily access and load built-in datasets for machine learning experimentation
|
125 |
-
from pycaret.classification import * # imports all the classification-related functions
|
126 |
-
from sklearn.model_selection import train_test_split # This function is commonly used to split a dataset into training and testing subsets.
|
127 |
-
import mlflow # MLflow is an open-source platform for managing the machine learning lifecycle
|
128 |
-
from sklearn import metrics as sk_metrics # imports the metrics module from the sklearn library and use various evaluation metrics and scoring functions provided by sci)kit
|
129 |
-
|
130 |
-
# Split data into training and testing sets
|
131 |
-
loan_train, loan_test = train_test_split(df, test_size=0.2, random_state=42)
|
132 |
-
|
133 |
-
# Initialize PyCaret setup with the training set
|
134 |
-
cls1 = setup(data = loan_train, target = 'Response')
|
135 |
-
|
136 |
-
# Compare all models and select top 3
|
137 |
-
top3 = compare_models(include=['lr', 'knn', 'dt'], n_select=3)
|
138 |
-
|
139 |
-
# Log each model into mlflow separately
|
140 |
-
for i, model in enumerate(top3, 1):
|
141 |
-
with mlflow.start_run(run_name = f"Model: {model}"):
|
142 |
-
model_name = "model_" + str(i)
|
143 |
-
|
144 |
-
# Log model
|
145 |
-
mlflow.sklearn.log_model(model, model_name)
|
146 |
-
|
147 |
-
# Log parameters
|
148 |
-
params = model.get_params()
|
149 |
-
for key, value in params.items():
|
150 |
-
mlflow.log_param(key, value)
|
151 |
-
|
152 |
-
# Predict on the testing set and log metrics
|
153 |
-
y_pred = predict_model(model, data=loan_test.drop('Response', axis=1))
|
154 |
-
y_test = loan_test['Response']
|
155 |
-
|
156 |
-
# Calculate metrics
|
157 |
-
accuracy = sk_metrics.accuracy_score(y_test, y_pred["prediction_label"])
|
158 |
-
precision = sk_metrics.precision_score(y_test, y_pred["prediction_label"], average='weighted')
|
159 |
-
recall = sk_metrics.recall_score(y_test, y_pred["prediction_label"], average='weighted')
|
160 |
-
f1 = sk_metrics.f1_score(y_test, y_pred["prediction_label"], average='weighted')
|
161 |
-
|
162 |
-
# Log metrics
|
163 |
-
mlflow.log_metric("Accuracy", accuracy)
|
164 |
-
mlflow.log_metric("Precision", precision)
|
165 |
-
mlflow.log_metric("Recall", recall)
|
166 |
-
mlflow.log_metric("F1 Score", f1)
|
167 |
-
|
168 |
-
mlflow.end_run()
|
169 |
-
|
170 |
-
|
171 |
-
# Split data into training and testing sets
|
172 |
-
loan_train, loan_test = train_test_split(df, test_size=0.2, random_state=42)
|
173 |
-
|
174 |
-
# Define the list of max_depth values to try
|
175 |
-
max_depth_values = [2, 4, 6, 8, 10]
|
176 |
-
|
177 |
-
# Loop over each max_depth value
|
178 |
-
for depth in max_depth_values:
|
179 |
-
with mlflow.start_run(run_name=f"Decision Tree (Max Depth: {depth})"):
|
180 |
-
# Initialize and train the decision tree model
|
181 |
-
model = DecisionTreeClassifier(max_depth=depth)
|
182 |
-
model.fit(loan_train.drop('Response', axis=1), loan_train['Response'])
|
183 |
-
|
184 |
-
# Log model parameters
|
185 |
-
mlflow.log_param("max_depth", depth)
|
186 |
-
|
187 |
-
# Predict on the testing set and log metrics
|
188 |
-
y_pred = model.predict(loan_test.drop('Response', axis=1))
|
189 |
-
y_test = loan_test['Response']
|
190 |
-
|
191 |
-
# Calculate metrics
|
192 |
-
accuracy = sk_metrics.accuracy_score(y_test, y_pred)
|
193 |
-
precision = sk_metrics.precision_score(y_test, y_pred, average='weighted')
|
194 |
-
recall = sk_metrics.recall_score(y_test, y_pred, average='weighted')
|
195 |
-
f1 = sk_metrics.f1_score(y_test, y_pred, average='weighted')
|
196 |
-
|
197 |
-
# Log metrics
|
198 |
-
mlflow.log_metric("Accuracy", accuracy)
|
199 |
-
mlflow.log_metric("Precision", precision)
|
200 |
-
mlflow.log_metric("Recall", recall)
|
201 |
-
mlflow.log_metric("F1 Score", f1)
|
202 |
-
|
203 |
-
# Log the trained model
|
204 |
-
mlflow.sklearn.log_model(model, "decision_tree_model")
|
205 |
-
|
206 |
-
mlflow.end_run()
|
207 |
-
|
208 |
-
from sklearn.neighbors import KNeighborsClassifier
|
209 |
-
|
210 |
-
# Split data into training and testing sets
|
211 |
-
loan_train, loan_test = train_test_split(df, test_size=0.2, random_state=42)
|
212 |
-
|
213 |
-
# Define the list of n_neighbors values to try
|
214 |
-
n_neighbors_values = [3, 5, 7, 9, 11]
|
215 |
-
|
216 |
-
# Loop over each n_neighbors value
|
217 |
-
for n_neighbors in n_neighbors_values:
|
218 |
-
with mlflow.start_run(run_name=f"KNN (n_neighbors: {n_neighbors})"):
|
219 |
-
# Initialize and train the KNN model
|
220 |
-
model = KNeighborsClassifier(n_neighbors=n_neighbors)
|
221 |
-
model.fit(loan_train.drop('Response', axis=1), loan_train['Response'])
|
222 |
-
|
223 |
-
# Log model parameters
|
224 |
-
mlflow.log_param("n_neighbors", n_neighbors)
|
225 |
-
|
226 |
-
# Predict on the testing set and log metrics
|
227 |
-
y_pred = model.predict(loan_test.drop('Response', axis=1))
|
228 |
-
y_test = loan_test['Response']
|
229 |
-
|
230 |
-
# Calculate metrics
|
231 |
-
accuracy = sk_metrics.accuracy_score(y_test, y_pred)
|
232 |
-
precision = sk_metrics.precision_score(y_test, y_pred, average='weighted')
|
233 |
-
recall = sk_metrics.recall_score(y_test, y_pred, average='weighted')
|
234 |
-
f1 = sk_metrics.f1_score(y_test, y_pred, average='weighted')
|
235 |
|
236 |
-
|
237 |
-
mlflow.log_metric("Accuracy", accuracy)
|
238 |
-
mlflow.log_metric("Precision", precision)
|
239 |
-
mlflow.log_metric("Recall", recall)
|
240 |
-
mlflow.log_metric("F1 Score", f1)
|
241 |
|
242 |
-
|
243 |
-
|
|
|
|
|
244 |
|
245 |
-
mlflow.end_run()
|
|
|
1 |
+
import streamlit as st
|
2 |
import pandas as pd
|
3 |
+
from PIL import Image
|
4 |
+
import sklearn.metrics as sk_metrics
|
5 |
+
from sklearn.model_selection import train_test_split
|
6 |
+
from sklearn.neighbors import KNeighborsClassifier
|
7 |
from sklearn.tree import DecisionTreeClassifier # Import Decision Tree Classifier
|
8 |
from sklearn.model_selection import train_test_split # Import train_test_split function
|
9 |
from sklearn import metrics #Import scikit-learn metrics module for accuracy calculation
|
10 |
import matplotlib.pyplot as plt
|
11 |
from sklearn.linear_model import LogisticRegression
|
12 |
from sklearn.metrics import classification_report
|
13 |
+
from codecarbon import EmissionsTracker
|
14 |
+
from sklearn.metrics import accuracy_score, precision_score, f1_score, classification_report
|
15 |
+
import time
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
16 |
from shapash.explainer.smart_explainer import SmartExplainer
|
17 |
|
18 |
+
url = "https://upload.wikimedia.org/wikipedia/commons/6/6a/DoorDash_Logo.svg"
|
19 |
+
st.image(url, output_format="PNG", width=300)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
20 |
|
21 |
+
st.title("MLflow Dashboard")
|
|
|
|
|
|
|
|
|
22 |
|
23 |
+
st.image('imgs/ml-flow1.jpeg', caption='MLflow Dashbaord')
|
24 |
+
st.image('imgs/ml-flow2.jpeg', caption='Comparing Models')
|
25 |
+
st.image('imgs/ml-flow3.jpeg', caption='Models sorted by Accuracy')
|
26 |
+
st.image('imgs/ml-flow4.jpeg', caption='Winning Model')
|
27 |
|
|
requirements.txt
CHANGED
@@ -7,4 +7,6 @@ tensorflow
|
|
7 |
matplotlib
|
8 |
streamlit
|
9 |
seaborn
|
10 |
-
graphviz
|
|
|
|
|
|
7 |
matplotlib
|
8 |
streamlit
|
9 |
seaborn
|
10 |
+
graphviz
|
11 |
+
shapash
|
12 |
+
shapash[report]
|