SoooSlooow commited on
Commit
8031b06
1 Parent(s): e8b068a

init commit

Browse files
app.py ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess
2
+ import random
3
+ from typing import Any
4
+
5
+ import gradio as gr
6
+ import joblib
7
+ import numpy as np
8
+ import pandas as pd
9
+
10
+ OUTPUT_DATA_PATH = "data/processed/app_dataset.csv"
11
+ PREDICTIONS_PATH = "models/predictions/app_predictions.csv"
12
+ UNIQUE_VALUES_PATH = "models/other/unique_column_values.pkl"
13
+
14
+
15
+ def predict(*args: tuple) -> Any:
16
+ app_df = pd.DataFrame(data=[args], columns=columns, index=[0])
17
+ app_df.to_csv(OUTPUT_DATA_PATH, index=False)
18
+ subprocess.run(
19
+ [
20
+ "python",
21
+ "-m",
22
+ "src.models.make_predictions",
23
+ "data/processed/app_dataset.csv",
24
+ "models/final_model.pkl",
25
+ "models/predictions/app_predictions.csv",
26
+ ],
27
+ shell=True,
28
+ )
29
+ predictions = np.genfromtxt(PREDICTIONS_PATH, delimiter=",", skip_header=1)
30
+ if predictions[2] == 1:
31
+ message = "Client is considered bad. Issuance of credit is not recommended."
32
+ else:
33
+ message = "Client is considered good. Issuance of credit is allowed."
34
+ return round(predictions[0], 3), message
35
+
36
+
37
+ columns = (
38
+ "YEARS_BIRTH",
39
+ "CODE_GENDER",
40
+ "AMT_INCOME_TOTAL",
41
+ "NAME_INCOME_TYPE",
42
+ "YEARS_EMPLOYED",
43
+ "OCCUPATION_TYPE",
44
+ "NAME_EDUCATION_TYPE",
45
+ "CNT_FAM_MEMBERS",
46
+ "CNT_CHILDREN",
47
+ "NAME_FAMILY_STATUS",
48
+ "FLAG_OWN_CAR",
49
+ "FLAG_OWN_REALTY",
50
+ "NAME_HOUSING_TYPE",
51
+ "FLAG_PHONE",
52
+ "FLAG_WORK_PHONE",
53
+ "FLAG_EMAIL",
54
+ )
55
+ unique_values = joblib.load(UNIQUE_VALUES_PATH)
56
+
57
+ with gr.Blocks() as demo:
58
+ with gr.Row():
59
+ with gr.Column():
60
+ age = gr.Slider(label="Age", minimum=18, maximum=90, step=1, randomize=True)
61
+ sex = gr.Dropdown(
62
+ label="Sex",
63
+ choices=unique_values["CODE_GENDER"],
64
+ value=lambda: random.choice(unique_values["CODE_GENDER"]),
65
+ )
66
+ annual_income = gr.Slider(
67
+ label="Annual income",
68
+ minimum=0,
69
+ maximum=7000000,
70
+ step=10000,
71
+ randomize=True,
72
+ )
73
+ income_type = gr.Dropdown(
74
+ label="Income type",
75
+ choices=unique_values["NAME_INCOME_TYPE"],
76
+ value=lambda: random.choice(unique_values["NAME_INCOME_TYPE"]),
77
+ )
78
+ work_experience = gr.Slider(
79
+ label="Work experience at current position",
80
+ minimum=0,
81
+ maximum=75,
82
+ step=1,
83
+ randomize=True,
84
+ )
85
+ occupation_type = gr.Dropdown(
86
+ label="Occupation type",
87
+ choices=unique_values["OCCUPATION_TYPE"],
88
+ value=lambda: random.choice(unique_values["OCCUPATION_TYPE"]),
89
+ )
90
+ education_type = gr.Dropdown(
91
+ label="Education type",
92
+ choices=unique_values["NAME_EDUCATION_TYPE"],
93
+ value=lambda: random.choice(unique_values["NAME_EDUCATION_TYPE"]),
94
+ )
95
+ amount_of_family_members = gr.Slider(
96
+ label="Amount of family members",
97
+ minimum=0,
98
+ maximum=12,
99
+ step=1,
100
+ randomize=True,
101
+ )
102
+ amount_of_children = gr.Slider(
103
+ label="Amount of children",
104
+ minimum=0,
105
+ maximum=10,
106
+ step=1,
107
+ randomize=True,
108
+ )
109
+
110
+ with gr.Column():
111
+ family_status = gr.Dropdown(
112
+ label="Family status",
113
+ choices=unique_values["NAME_FAMILY_STATUS"],
114
+ value=lambda: random.choice(unique_values["NAME_FAMILY_STATUS"]),
115
+ )
116
+ flag_own_car = gr.Dropdown(
117
+ label="Having a car",
118
+ choices=unique_values["FLAG_OWN_REALTY"],
119
+ value=lambda: random.choice(unique_values["FLAG_OWN_REALTY"]),
120
+ )
121
+ flag_own_realty = gr.Dropdown(
122
+ label="Having a realty",
123
+ choices=unique_values["FLAG_OWN_REALTY"],
124
+ value=lambda: random.choice(unique_values["FLAG_OWN_REALTY"]),
125
+ )
126
+ housing_type = gr.Dropdown(
127
+ label="Housing type",
128
+ choices=unique_values["NAME_HOUSING_TYPE"],
129
+ value=lambda: random.choice(unique_values["NAME_HOUSING_TYPE"]),
130
+ )
131
+ flag_phone = gr.Dropdown(
132
+ label="Having a phone",
133
+ choices=unique_values["FLAG_PHONE"],
134
+ value=lambda: random.choice(unique_values["FLAG_PHONE"]),
135
+ )
136
+ flag_work_phone = gr.Dropdown(
137
+ label="Having a work phone",
138
+ choices=unique_values["FLAG_WORK_PHONE"],
139
+ value=lambda: random.choice(unique_values["FLAG_WORK_PHONE"]),
140
+ )
141
+ flag_email = gr.Dropdown(
142
+ label="Having an email",
143
+ choices=unique_values["FLAG_EMAIL"],
144
+ value=lambda: random.choice(unique_values["FLAG_EMAIL"]),
145
+ )
146
+
147
+ with gr.Column():
148
+ label_1 = gr.Label(label="Client rating")
149
+ label_2 = gr.Textbox(label="Client verdict (client is considered bad if client rating < 0.99)")
150
+ with gr.Row():
151
+ predict_btn = gr.Button(value="Predict")
152
+ predict_btn.click(
153
+ predict,
154
+ inputs=[
155
+ age,
156
+ sex,
157
+ annual_income,
158
+ income_type,
159
+ work_experience,
160
+ occupation_type,
161
+ education_type,
162
+ amount_of_family_members,
163
+ amount_of_children,
164
+ family_status,
165
+ flag_own_car,
166
+ flag_own_realty,
167
+ housing_type,
168
+ flag_phone,
169
+ flag_work_phone,
170
+ flag_email,
171
+ ],
172
+ outputs=[label_1, label_2],
173
+ )
174
+
175
+ demo.launch()
data/processed/app_dataset.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ YEARS_BIRTH,CODE_GENDER,AMT_INCOME_TOTAL,NAME_INCOME_TYPE,YEARS_EMPLOYED,OCCUPATION_TYPE,NAME_EDUCATION_TYPE,CNT_FAM_MEMBERS,CNT_CHILDREN,NAME_FAMILY_STATUS,FLAG_OWN_CAR,FLAG_OWN_REALTY,NAME_HOUSING_TYPE,FLAG_PHONE,FLAG_WORK_PHONE,FLAG_EMAIL
2
+ 63,M,5000000,Commercial associate,74,IT staff,Secondary / secondary special,4,5,Married,Yes,Yes,Municipal apartment,No,No,No
models/predictions/app_predictions.csv ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ proba_0,proba_1,label
2
+ 0.9910329199509368,0.008967080049063221,0.0
requirements.txt ADDED
Binary file (2.43 kB). View file
 
src/__init__.py ADDED
File without changes
src/models/__init__.py ADDED
File without changes
src/models/make_predictions.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import click
2
+ import joblib
3
+ import numpy as np
4
+ import pandas as pd
5
+
6
+
7
+ @click.command()
8
+ @click.argument("input_data_path", type=click.Path(exists=True))
9
+ @click.argument("input_model_path", type=click.Path(exists=True))
10
+ @click.argument("output_predictions_path", type=click.Path())
11
+ def make_predictions(
12
+ input_data_path: str, input_model_path: str, output_predictions_path: str
13
+ ) -> None:
14
+ """
15
+ Предсказывает значения меток в входных данных, используя подаваемую на вход модель.
16
+ Предсказания записываются в csv-файл с тремя столбцами. В первые два столбца записываются вероятности
17
+ отнесения объекта к классу 0 и 1 соответственно, в третий - предсказываемая метка объекта на основе
18
+ выбранного порога вероятности.
19
+ :param input_data_path: путь к данным
20
+ :param input_model_path: путь к обученной модели
21
+ :param output_predictions_path: путь к файлу с получаемыми предсказаниями
22
+ """
23
+ df = pd.read_csv(input_data_path)
24
+ X = df.drop(["BAD_CLIENT"], axis=1, errors="ignore")
25
+
26
+ model = joblib.load(input_model_path)
27
+ probas = model.predict_proba(X)
28
+ labels = (probas[:, 1] > 0.01).astype(int)
29
+ predictions = pd.DataFrame(
30
+ data=np.column_stack([probas, labels]), columns=["proba_0", "proba_1", "label"]
31
+ )
32
+
33
+ predictions.to_csv(output_predictions_path, index=False)
34
+
35
+
36
+ if __name__ == "__main__":
37
+ make_predictions()
38
+
39
+ """
40
+ python -m src.models.make_predictions processed/processed/test_dataset.csv models/final_model.pkl reports/predictions.csv
41
+ """