olivermueller commited on
Commit
701c14e
1 Parent(s): 09db070

New dataset

Browse files
Files changed (4) hide show
  1. app.py +39 -18
  2. german_credit.csv +0 -0
  3. german_credit_from_r.csv +0 -0
  4. tree.png +0 -0
app.py CHANGED
@@ -1,5 +1,6 @@
1
  import streamlit as st
2
  import pandas as pd
 
3
  from sklearn.model_selection import train_test_split
4
  from sklearn.pipeline import Pipeline
5
  from sklearn.preprocessing import StandardScaler, OneHotEncoder
@@ -10,17 +11,15 @@ from sklearn.tree import DecisionTreeClassifier, plot_tree
10
  import matplotlib as plt
11
  from sklearn import metrics
12
  import graphviz as graphviz
 
 
 
13
 
14
  # load data
15
- data = pd.read_csv('german_credit.csv')
16
- data.drop('Unnamed: 0', axis=1, inplace=True)
17
 
18
- # recode label
19
- data['Credit_risk'] = data['Credit_risk'].map({1: 'low', 2: 'high'})
20
-
21
- # replace missing values
22
- data['Saving_accounts'].fillna('unknown', inplace=True)
23
- data['Checking_account'].fillna('unknown', inplace=True)
24
 
25
  # extract variable names
26
  vars = data.columns.tolist()
@@ -53,10 +52,13 @@ data
53
  #st.divider()
54
 
55
  # header: predictors
 
 
 
 
56
  st.header('Predictors')
57
  st.write('Please select up to 3 predictors:')
58
- selected = st.multiselect(
59
- '', vars, max_selections=3)
60
  #st.divider()
61
 
62
  # header: model
@@ -64,13 +66,11 @@ st.header('Model')
64
 
65
  X_train_f = X_train.loc[:, selected]
66
 
67
- numeric_features = ["Age", "Job", "Credit_amount", "Duration"]
68
  numeric_features_selected = list(set(selected) & set(numeric_features))
69
  numeric_transformer = Pipeline(
70
  steps=[("imputer", SimpleImputer())]
71
  )
72
 
73
- categorical_features = ["Sex", "Housing", "Saving_accounts", "Checking_account", "Purpose"]
74
  categorical_features_selected = list(set(selected) & set(categorical_features))
75
  categorical_transformer = Pipeline(
76
  steps=[
@@ -87,20 +87,41 @@ preprocessor = ColumnTransformer(
87
  if selected == []:
88
  st.write('Please select at least 1 predictor.')
89
  else:
90
- pipe = Pipeline([("preprocessor", preprocessor), ('classifier', tree.DecisionTreeClassifier(max_depth=2))])
 
91
  pipe.fit(X_train_f, y_train)
92
- mytree = tree.export_graphviz(pipe.named_steps["classifier"], out_file=None)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
93
  st.graphviz_chart(mytree)
94
 
 
95
  #st.divider()
96
 
97
  # header: accuracy
98
  st.header('Accuracy')
99
 
100
- try:
101
- score = pipe.score(X_test, y_test)
102
- score
103
- except:
104
  st.write('Please select at least 1 predictor.')
 
 
 
 
 
 
 
 
105
 
106
  #st.divider()
 
1
  import streamlit as st
2
  import pandas as pd
3
+ import numpy as np
4
  from sklearn.model_selection import train_test_split
5
  from sklearn.pipeline import Pipeline
6
  from sklearn.preprocessing import StandardScaler, OneHotEncoder
 
11
  import matplotlib as plt
12
  from sklearn import metrics
13
  import graphviz as graphviz
14
+ import dtreeviz
15
+ import collections
16
+ import pydotplus
17
 
18
  # load data
19
+ data = pd.read_csv('german_credit_from_r.csv')
 
20
 
21
+ # recode credit risk
22
+ data['Credit_risk'] = data['Credit_risk'].map({'GOOD': 0, 'BAD': 1})
 
 
 
 
23
 
24
  # extract variable names
25
  vars = data.columns.tolist()
 
52
  #st.divider()
53
 
54
  # header: predictors
55
+ numeric_features = ["Duration", "Credit_amount", "Installment_rate", "Resident_since", "Age", "Existing_credits", "People_maintenance_for"]
56
+ categorical_features = ["Account_status", "Credit_history", "Purpose", "Savings_bonds", "Present_employment_since", "Other_debtors_guarantors", "Property", "Other_installment_plans", "Housing", "Job", "Telephone", "Foreign_worker", "Gender"]
57
+ #categorical_features = []
58
+
59
  st.header('Predictors')
60
  st.write('Please select up to 3 predictors:')
61
+ selected = st.multiselect('', numeric_features+categorical_features, max_selections=3)
 
62
  #st.divider()
63
 
64
  # header: model
 
66
 
67
  X_train_f = X_train.loc[:, selected]
68
 
 
69
  numeric_features_selected = list(set(selected) & set(numeric_features))
70
  numeric_transformer = Pipeline(
71
  steps=[("imputer", SimpleImputer())]
72
  )
73
 
 
74
  categorical_features_selected = list(set(selected) & set(categorical_features))
75
  categorical_transformer = Pipeline(
76
  steps=[
 
87
  if selected == []:
88
  st.write('Please select at least 1 predictor.')
89
  else:
90
+ maxd = st.slider('Max depth', min_value=1, max_value=10, value=2, step=1)
91
+ pipe = Pipeline([("preprocessor", preprocessor), ('classifier', tree.DecisionTreeClassifier(max_depth=maxd))])
92
  pipe.fit(X_train_f, y_train)
93
+ fn = pipe[:-1].get_feature_names_out()
94
+ fn = [item.replace("cat__", "").replace("num__", "") for item in fn]
95
+ labels = pipe.named_steps["classifier"].classes_
96
+ labels = [str(item) for item in labels]
97
+ mytree = tree.export_graphviz(pipe.named_steps["classifier"],
98
+ feature_names = fn,
99
+ class_names=labels,
100
+ label = 'none',
101
+ filled = True,
102
+ leaves_parallel = True,
103
+ impurity= False,
104
+ proportion = True,
105
+ rotate=False,
106
+ out_file=None)
107
+
108
  st.graphviz_chart(mytree)
109
 
110
+
111
  #st.divider()
112
 
113
  # header: accuracy
114
  st.header('Accuracy')
115
 
116
+ if selected == []:
 
 
 
117
  st.write('Please select at least 1 predictor.')
118
+ else:
119
+ preds = pd.DataFrame(pipe.predict_proba(X_test))
120
+ preds.columns = ['prob_0', 'prob_1']
121
+ fpr, tpr, thresholds = metrics.roc_curve(y_test, preds["prob_1"], pos_label=1)
122
+
123
+ st.write('AUC: ', np.round(metrics.auc(fpr, tpr),3))
124
+ st.write('Precision: ', np.round(metrics.precision_score(y_test, pipe.predict(X_test)),3))
125
+ st.write('Recall: ', np.round(metrics.recall_score(y_test, pipe.predict(X_test)),3))
126
 
127
  #st.divider()
german_credit.csv CHANGED
The diff for this file is too large to render. See raw diff
 
german_credit_from_r.csv ADDED
The diff for this file is too large to render. See raw diff
 
tree.png ADDED