Spaces:
Sleeping
Sleeping
Commit
•
701c14e
1
Parent(s):
09db070
New dataset
Browse files- app.py +39 -18
- german_credit.csv +0 -0
- german_credit_from_r.csv +0 -0
- tree.png +0 -0
app.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
|
|
3 |
from sklearn.model_selection import train_test_split
|
4 |
from sklearn.pipeline import Pipeline
|
5 |
from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
@@ -10,17 +11,15 @@ from sklearn.tree import DecisionTreeClassifier, plot_tree
|
|
10 |
import matplotlib as plt
|
11 |
from sklearn import metrics
|
12 |
import graphviz as graphviz
|
|
|
|
|
|
|
13 |
|
14 |
# load data
|
15 |
-
data = pd.read_csv('
|
16 |
-
data.drop('Unnamed: 0', axis=1, inplace=True)
|
17 |
|
18 |
-
# recode
|
19 |
-
data['Credit_risk'] = data['Credit_risk'].map({
|
20 |
-
|
21 |
-
# replace missing values
|
22 |
-
data['Saving_accounts'].fillna('unknown', inplace=True)
|
23 |
-
data['Checking_account'].fillna('unknown', inplace=True)
|
24 |
|
25 |
# extract variable names
|
26 |
vars = data.columns.tolist()
|
@@ -53,10 +52,13 @@ data
|
|
53 |
#st.divider()
|
54 |
|
55 |
# header: predictors
|
|
|
|
|
|
|
|
|
56 |
st.header('Predictors')
|
57 |
st.write('Please select up to 3 predictors:')
|
58 |
-
selected = st.multiselect(
|
59 |
-
'', vars, max_selections=3)
|
60 |
#st.divider()
|
61 |
|
62 |
# header: model
|
@@ -64,13 +66,11 @@ st.header('Model')
|
|
64 |
|
65 |
X_train_f = X_train.loc[:, selected]
|
66 |
|
67 |
-
numeric_features = ["Age", "Job", "Credit_amount", "Duration"]
|
68 |
numeric_features_selected = list(set(selected) & set(numeric_features))
|
69 |
numeric_transformer = Pipeline(
|
70 |
steps=[("imputer", SimpleImputer())]
|
71 |
)
|
72 |
|
73 |
-
categorical_features = ["Sex", "Housing", "Saving_accounts", "Checking_account", "Purpose"]
|
74 |
categorical_features_selected = list(set(selected) & set(categorical_features))
|
75 |
categorical_transformer = Pipeline(
|
76 |
steps=[
|
@@ -87,20 +87,41 @@ preprocessor = ColumnTransformer(
|
|
87 |
if selected == []:
|
88 |
st.write('Please select at least 1 predictor.')
|
89 |
else:
|
90 |
-
|
|
|
91 |
pipe.fit(X_train_f, y_train)
|
92 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
93 |
st.graphviz_chart(mytree)
|
94 |
|
|
|
95 |
#st.divider()
|
96 |
|
97 |
# header: accuracy
|
98 |
st.header('Accuracy')
|
99 |
|
100 |
-
|
101 |
-
score = pipe.score(X_test, y_test)
|
102 |
-
score
|
103 |
-
except:
|
104 |
st.write('Please select at least 1 predictor.')
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
105 |
|
106 |
#st.divider()
|
|
|
1 |
import streamlit as st
|
2 |
import pandas as pd
|
3 |
+
import numpy as np
|
4 |
from sklearn.model_selection import train_test_split
|
5 |
from sklearn.pipeline import Pipeline
|
6 |
from sklearn.preprocessing import StandardScaler, OneHotEncoder
|
|
|
11 |
import matplotlib as plt
|
12 |
from sklearn import metrics
|
13 |
import graphviz as graphviz
|
14 |
+
import dtreeviz
|
15 |
+
import collections
|
16 |
+
import pydotplus
|
17 |
|
18 |
# load data
|
19 |
+
data = pd.read_csv('german_credit_from_r.csv')
|
|
|
20 |
|
21 |
+
# recode credit risk
|
22 |
+
data['Credit_risk'] = data['Credit_risk'].map({'GOOD': 0, 'BAD': 1})
|
|
|
|
|
|
|
|
|
23 |
|
24 |
# extract variable names
|
25 |
vars = data.columns.tolist()
|
|
|
52 |
#st.divider()
|
53 |
|
54 |
# header: predictors
|
55 |
+
numeric_features = ["Duration", "Credit_amount", "Installment_rate", "Resident_since", "Age", "Existing_credits", "People_maintenance_for"]
|
56 |
+
categorical_features = ["Account_status", "Credit_history", "Purpose", "Savings_bonds", "Present_employment_since", "Other_debtors_guarantors", "Property", "Other_installment_plans", "Housing", "Job", "Telephone", "Foreign_worker", "Gender"]
|
57 |
+
#categorical_features = []
|
58 |
+
|
59 |
st.header('Predictors')
|
60 |
st.write('Please select up to 3 predictors:')
|
61 |
+
selected = st.multiselect('', numeric_features+categorical_features, max_selections=3)
|
|
|
62 |
#st.divider()
|
63 |
|
64 |
# header: model
|
|
|
66 |
|
67 |
X_train_f = X_train.loc[:, selected]
|
68 |
|
|
|
69 |
numeric_features_selected = list(set(selected) & set(numeric_features))
|
70 |
numeric_transformer = Pipeline(
|
71 |
steps=[("imputer", SimpleImputer())]
|
72 |
)
|
73 |
|
|
|
74 |
categorical_features_selected = list(set(selected) & set(categorical_features))
|
75 |
categorical_transformer = Pipeline(
|
76 |
steps=[
|
|
|
87 |
if selected == []:
|
88 |
st.write('Please select at least 1 predictor.')
|
89 |
else:
|
90 |
+
maxd = st.slider('Max depth', min_value=1, max_value=10, value=2, step=1)
|
91 |
+
pipe = Pipeline([("preprocessor", preprocessor), ('classifier', tree.DecisionTreeClassifier(max_depth=maxd))])
|
92 |
pipe.fit(X_train_f, y_train)
|
93 |
+
fn = pipe[:-1].get_feature_names_out()
|
94 |
+
fn = [item.replace("cat__", "").replace("num__", "") for item in fn]
|
95 |
+
labels = pipe.named_steps["classifier"].classes_
|
96 |
+
labels = [str(item) for item in labels]
|
97 |
+
mytree = tree.export_graphviz(pipe.named_steps["classifier"],
|
98 |
+
feature_names = fn,
|
99 |
+
class_names=labels,
|
100 |
+
label = 'none',
|
101 |
+
filled = True,
|
102 |
+
leaves_parallel = True,
|
103 |
+
impurity= False,
|
104 |
+
proportion = True,
|
105 |
+
rotate=False,
|
106 |
+
out_file=None)
|
107 |
+
|
108 |
st.graphviz_chart(mytree)
|
109 |
|
110 |
+
|
111 |
#st.divider()
|
112 |
|
113 |
# header: accuracy
|
114 |
st.header('Accuracy')
|
115 |
|
116 |
+
if selected == []:
|
|
|
|
|
|
|
117 |
st.write('Please select at least 1 predictor.')
|
118 |
+
else:
|
119 |
+
preds = pd.DataFrame(pipe.predict_proba(X_test))
|
120 |
+
preds.columns = ['prob_0', 'prob_1']
|
121 |
+
fpr, tpr, thresholds = metrics.roc_curve(y_test, preds["prob_1"], pos_label=1)
|
122 |
+
|
123 |
+
st.write('AUC: ', np.round(metrics.auc(fpr, tpr),3))
|
124 |
+
st.write('Precision: ', np.round(metrics.precision_score(y_test, pipe.predict(X_test)),3))
|
125 |
+
st.write('Recall: ', np.round(metrics.recall_score(y_test, pipe.predict(X_test)),3))
|
126 |
|
127 |
#st.divider()
|
german_credit.csv
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
german_credit_from_r.csv
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tree.png
ADDED
![]() |