adollbo commited on
Commit
a22458b
1 Parent(s): 6877f18

added comments and ran black for proper formatting

Browse files
Files changed (1) hide show
  1. utils.py +138 -61
utils.py CHANGED
@@ -8,28 +8,51 @@ import random
8
  from datetime import datetime, timedelta
9
  from babel.numbers import format_currency
10
 
11
- COL_NAMES = ['Transaction date',
12
- 'Transaction type',
13
- 'Amount transferred',
14
- 'Sender\'s initial balance',
15
- 'Sender\'s new balance',
16
- 'Recipient\'s initial balance',
17
- 'Recipient\'s new balance',
18
- "Sender exactly credited",
19
- "Receiver exactly credited",
20
- 'Large amount',
21
- 'Frequent receiver',
22
- 'Merchant receiver',
23
- 'Sender ID',
24
- 'Receiver ID']
25
-
26
- feature_texts = {0: "Date of transaction", 1: "Amount transferred", 2: "Initial balance of sender", 3: "New balance of sender",
27
- 4: "Initial balance of recipient", 5: "New balance of recipient", 6: "Sender's balance was exactly credited",
28
- 7: "Receiver's balance was exactly credited", 8: "Large amount", 9: "Frequent receiver of transactions", 10: "Receiver is merchant", 11: "Sender ID", 12: "Receiver ID",
29
- 13: "Transaction type is Cash out", 14: "Transaction type is Transfer", 15: "Transaction type is Payment", 16: "Transaction type is Cash in", 17: "Transaction type is Debit"}
30
-
31
- CATEGORIES = np.array(['CASH_OUT', 'TRANSFER', 'PAYMENT', 'CASH_IN', 'DEBIT'])
32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
  def transformation(input, categories):
34
  new_x = input
35
  cat = np.array(input[1])
@@ -38,21 +61,32 @@ def transformation(input, categories):
38
  match_index = np.where(categories == cat)[0]
39
  result_array[match_index] = 1
40
  new_x.extend(result_array.tolist())
41
- python_objects = [np_type.item() if isinstance(np_type, np.generic) else np_type for np_type in new_x]
 
 
 
42
  return python_objects
43
 
 
 
44
  def get_request_body(datapoint):
45
  data = datapoint.iloc[0].tolist()
46
  instances = [int(x) if isinstance(x, (np.int32, np.int64)) else x for x in data]
47
- request_body = {'instances': [instances]}
48
  return request_body
49
 
50
 
 
51
  def get_explainability_texts(shap_values, feature_texts):
52
  # Separate positive and negative values, keep indice as corresponds to key
53
  positive_dict = {index: val for index, val in enumerate(shap_values) if val > 0}
54
  # Sort dictionaries based on the magnitude of values
55
- sorted_positive_indices = [index for index, _ in sorted(positive_dict.items(), key=lambda item: abs(item[1]), reverse=True)]
 
 
 
 
 
56
  positive_texts = [feature_texts[x] for x in sorted_positive_indices]
57
  positive_texts = positive_texts[2:]
58
  sorted_positive_indices = sorted_positive_indices[2:]
@@ -62,37 +96,56 @@ def get_explainability_texts(shap_values, feature_texts):
62
  return positive_texts, sorted_positive_indices
63
 
64
 
 
 
65
  def random_past_date_from_last_year():
66
  one_year_ago = datetime.now() - timedelta(days=365)
67
  random_days = random.randint(0, (datetime.now() - one_year_ago).days)
68
  random_date = one_year_ago + timedelta(days=random_days)
69
- return random_date.strftime('%Y-%m-%d')
 
70
 
 
71
  def get_explainability_values(pos_indices, data):
72
- rounded_data = [round(value, 2) if isinstance(value, float) else value for value in data]
 
 
73
  transformed_data = transformation(input=rounded_data, categories=CATEGORIES)
74
  vals = []
75
  for idx in pos_indices:
76
- if idx in range(6,11) or idx in range(13,18):
77
  val = str(bool(transformed_data[idx])).capitalize()
78
  else:
79
  val = transformed_data[idx]
80
  vals.append(val)
81
  return vals
82
 
83
- def modify_datapoint(datapoint): # should return list, with correct numbers/amounts, and date
 
 
 
 
84
  data = datapoint.iloc[0].tolist()
85
  data[0] = random_past_date_from_last_year()
86
  modified_amounts = data.copy()
87
  if any(val > 12000 for val in data[2:7]):
88
- modified_amounts[2:7] = [value / 100 if value != 0 else 0 for value in data[2:7]]
 
 
89
  if any(val > 120000 for val in modified_amounts[2:7]):
90
  new_list = [value / 10 if value != 0 else 0 for value in modified_amounts[2:7]]
91
  modified_amounts[2:7] = new_list
92
- rounded_data = [round(value, 2) if isinstance(value, float) else value for value in modified_amounts]
93
- rounded_data[2:7] = [format_currency(value, 'EUR', locale='en_GB') for value in rounded_data[2:7]]
 
 
 
 
 
94
  return rounded_data
95
 
 
 
96
  def get_weights(shap_values, sorted_indices, target_sum=0.95):
97
  weights = [shap_values[x] for x in sorted_indices]
98
  total_sum = sum(weights)
@@ -100,6 +153,8 @@ def get_weights(shap_values, sorted_indices, target_sum=0.95):
100
  scaled_values = [val * (target_sum / total_sum) for val in weights]
101
  return scaled_values
102
 
 
 
103
  def get_fake_certainty():
104
  # Generate a random certainty between 75% and 99%
105
  fake_certainty = uniform(0.75, 0.99)
@@ -107,20 +162,28 @@ def get_fake_certainty():
107
  return formatted_fake_certainty
108
 
109
 
 
110
  def get_random_suspicious_transaction(data):
111
- suspicious_data=data[data["isFraud"]==1]
112
- max_n=len(suspicious_data)
113
- random_nr=randrange(max_n)
114
- suspicous_transaction = suspicious_data[random_nr-1:random_nr].drop("isFraud", axis=1)
 
 
115
  return suspicous_transaction
116
 
117
 
118
- def send_evaluation(client, deployment_id, request_log_id, prediction_log_id, evaluation_input):
 
 
 
119
  """Send evaluation to Deeploy."""
120
  try:
121
  with st.spinner("Submitting response..."):
122
  # Call the explain endpoint as it also includes the prediction
123
- client.evaluate(deployment_id, request_log_id, prediction_log_id, evaluation_input)
 
 
124
  return True
125
  except Exception as e:
126
  logging.error(e)
@@ -132,6 +195,7 @@ def send_evaluation(client, deployment_id, request_log_id, prediction_log_id, ev
132
  st.write(f"Error message: {e}")
133
 
134
 
 
135
  def get_model_url():
136
  """Get model url and retrieve workspace id and deployment id from it"""
137
  model_url = st.text_area(
@@ -148,37 +212,50 @@ def get_model_url():
148
  deployment_id = ""
149
  return model_url, workspace_id, deployment_id
150
 
 
 
151
  def get_comment_explanation(certainty, explainability_texts, explainability_values):
152
- cleaned = [x.replace(':', '') for x in explainability_texts]
153
- fi = [f'{cleaned[i]} is {x}' for i, x in enumerate(explainability_values)]
154
- fi.insert(0, 'Important suspicious features: ')
155
- result = '\n'.join(fi)
156
- comment = f"Model certainty is {certainty}" + '\n''\n' + result
157
  return comment
158
 
 
 
159
  def create_data_input_table(data, col_names):
160
  st.subheader("Transaction details")
161
  data[7:12] = [bool(value) for value in data[7:12]]
162
- rounded_list = [round(value, 2) if isinstance(value, float) else value for value in data]
163
- df = pd.DataFrame({"Feature name": col_names, "Value": rounded_list })
164
- st.dataframe(df, hide_index=True, width=475, height=35*len(df)+38) # use_container_width=True
 
 
 
 
165
 
166
- # Create a function to generate a table
 
167
  def create_table(texts, values, weights, title):
168
- df = pd.DataFrame({"Feature Explanation": texts, 'Value': values, 'Weight': weights})
169
- st.markdown(f'#### {title}') # Markdown for styling
170
- st.dataframe(df, hide_index=True, width=475, column_config={
171
- 'Weight': st.column_config.ProgressColumn(
172
- 'Weight',
173
- width='small',
174
- format="%.2f",
175
- min_value=0,
176
- max_value=1
177
- )
178
- }) #use_container_width=True
179
-
180
-
181
- def ChangeButtonColour(widget_label, font_color, background_color='transparent'):
 
 
 
 
182
  htmlstr = f"""
183
  <script>
184
  var elements = window.parent.document.querySelectorAll('button');
@@ -190,4 +267,4 @@ def ChangeButtonColour(widget_label, font_color, background_color='transparent')
190
  }}
191
  </script>
192
  """
193
- components.html(f"{htmlstr}", height=0, width=0)
 
8
  from datetime import datetime, timedelta
9
  from babel.numbers import format_currency
10
 
11
+ # Column names for data input
12
+ COL_NAMES = [
13
+ "Transaction date",
14
+ "Transaction type",
15
+ "Amount transferred",
16
+ "Sender's initial balance",
17
+ "Sender's new balance",
18
+ "Recipient's initial balance",
19
+ "Recipient's new balance",
20
+ "Sender exactly credited",
21
+ "Receiver exactly credited",
22
+ "Large amount",
23
+ "Frequent receiver",
24
+ "Merchant receiver",
25
+ "Sender ID",
26
+ "Receiver ID",
27
+ ]
 
 
 
 
28
 
29
+ # Texts for explanation
30
+ feature_texts = {
31
+ 0: "Date of transaction",
32
+ 1: "Amount transferred",
33
+ 2: "Initial balance of sender",
34
+ 3: "New balance of sender",
35
+ 4: "Initial balance of recipient",
36
+ 5: "New balance of recipient",
37
+ 6: "Sender's balance was exactly credited",
38
+ 7: "Receiver's balance was exactly credited",
39
+ 8: "Large amount",
40
+ 9: "Frequent receiver of transactions",
41
+ 10: "Receiver is merchant",
42
+ 11: "Sender ID",
43
+ 12: "Receiver ID",
44
+ 13: "Transaction type is Cash out",
45
+ 14: "Transaction type is Transfer",
46
+ 15: "Transaction type is Payment",
47
+ 16: "Transaction type is Cash in",
48
+ 17: "Transaction type is Debit",
49
+ }
50
+
51
+ # categories for one hot encoding
52
+ CATEGORIES = np.array(["CASH_OUT", "TRANSFER", "PAYMENT", "CASH_IN", "DEBIT"])
53
+
54
+
55
+ # one hot encoding
56
  def transformation(input, categories):
57
  new_x = input
58
  cat = np.array(input[1])
 
61
  match_index = np.where(categories == cat)[0]
62
  result_array[match_index] = 1
63
  new_x.extend(result_array.tolist())
64
+ python_objects = [
65
+ np_type.item() if isinstance(np_type, np.generic) else np_type
66
+ for np_type in new_x
67
+ ]
68
  return python_objects
69
 
70
+
71
+ # func to make the request body in the right format for the client
72
  def get_request_body(datapoint):
73
  data = datapoint.iloc[0].tolist()
74
  instances = [int(x) if isinstance(x, (np.int32, np.int64)) else x for x in data]
75
+ request_body = {"instances": [instances]}
76
  return request_body
77
 
78
 
79
+ # func for sorting and retrieving the explanation texts
80
  def get_explainability_texts(shap_values, feature_texts):
81
  # Separate positive and negative values, keep indice as corresponds to key
82
  positive_dict = {index: val for index, val in enumerate(shap_values) if val > 0}
83
  # Sort dictionaries based on the magnitude of values
84
+ sorted_positive_indices = [
85
+ index
86
+ for index, _ in sorted(
87
+ positive_dict.items(), key=lambda item: abs(item[1]), reverse=True
88
+ )
89
+ ]
90
  positive_texts = [feature_texts[x] for x in sorted_positive_indices]
91
  positive_texts = positive_texts[2:]
92
  sorted_positive_indices = sorted_positive_indices[2:]
 
96
  return positive_texts, sorted_positive_indices
97
 
98
 
99
+ # func to generate random date from the past year to replace var "steps" with
100
+ # in the input data, to make it more understandable
101
  def random_past_date_from_last_year():
102
  one_year_ago = datetime.now() - timedelta(days=365)
103
  random_days = random.randint(0, (datetime.now() - one_year_ago).days)
104
  random_date = one_year_ago + timedelta(days=random_days)
105
+ return random_date.strftime("%Y-%m-%d")
106
+
107
 
108
+ # func for retrieving the values for explanations, requires some data engineering
109
  def get_explainability_values(pos_indices, data):
110
+ rounded_data = [
111
+ round(value, 2) if isinstance(value, float) else value for value in data
112
+ ]
113
  transformed_data = transformation(input=rounded_data, categories=CATEGORIES)
114
  vals = []
115
  for idx in pos_indices:
116
+ if idx in range(6, 11) or idx in range(13, 18):
117
  val = str(bool(transformed_data[idx])).capitalize()
118
  else:
119
  val = transformed_data[idx]
120
  vals.append(val)
121
  return vals
122
 
123
+
124
+ # func to modify the values of currency to make it more similar to euro
125
+ def modify_datapoint(
126
+ datapoint,
127
+ ): # should return list, with correct numbers/amounts, and date
128
  data = datapoint.iloc[0].tolist()
129
  data[0] = random_past_date_from_last_year()
130
  modified_amounts = data.copy()
131
  if any(val > 12000 for val in data[2:7]):
132
+ modified_amounts[2:7] = [
133
+ value / 100 if value != 0 else 0 for value in data[2:7]
134
+ ]
135
  if any(val > 120000 for val in modified_amounts[2:7]):
136
  new_list = [value / 10 if value != 0 else 0 for value in modified_amounts[2:7]]
137
  modified_amounts[2:7] = new_list
138
+ rounded_data = [
139
+ round(value, 2) if isinstance(value, float) else value
140
+ for value in modified_amounts
141
+ ]
142
+ rounded_data[2:7] = [
143
+ format_currency(value, "EUR", locale="en_GB") for value in rounded_data[2:7]
144
+ ]
145
  return rounded_data
146
 
147
+
148
+ # func to retireve the weights of the features to be presented as explanation
149
  def get_weights(shap_values, sorted_indices, target_sum=0.95):
150
  weights = [shap_values[x] for x in sorted_indices]
151
  total_sum = sum(weights)
 
153
  scaled_values = [val * (target_sum / total_sum) for val in weights]
154
  return scaled_values
155
 
156
+
157
+ # func to generate a fake certainty for the model to make it more realistic
158
  def get_fake_certainty():
159
  # Generate a random certainty between 75% and 99%
160
  fake_certainty = uniform(0.75, 0.99)
 
162
  return formatted_fake_certainty
163
 
164
 
165
+ # func to get a datapoint marked as fraud in the dataset to be passed to the model
166
  def get_random_suspicious_transaction(data):
167
+ suspicious_data = data[data["isFraud"] == 1]
168
+ max_n = len(suspicious_data)
169
+ random_nr = randrange(max_n)
170
+ suspicous_transaction = suspicious_data[random_nr - 1 : random_nr].drop(
171
+ "isFraud", axis=1
172
+ )
173
  return suspicous_transaction
174
 
175
 
176
+ # func to send the evaluation to Deeploy
177
+ def send_evaluation(
178
+ client, deployment_id, request_log_id, prediction_log_id, evaluation_input
179
+ ):
180
  """Send evaluation to Deeploy."""
181
  try:
182
  with st.spinner("Submitting response..."):
183
  # Call the explain endpoint as it also includes the prediction
184
+ client.evaluate(
185
+ deployment_id, request_log_id, prediction_log_id, evaluation_input
186
+ )
187
  return True
188
  except Exception as e:
189
  logging.error(e)
 
195
  st.write(f"Error message: {e}")
196
 
197
 
198
+ # func to retrieve model url and important vars for Deeploy client
199
  def get_model_url():
200
  """Get model url and retrieve workspace id and deployment id from it"""
201
  model_url = st.text_area(
 
212
  deployment_id = ""
213
  return model_url, workspace_id, deployment_id
214
 
215
+
216
+ # func to create the prefilled text for the disagree button
217
  def get_comment_explanation(certainty, explainability_texts, explainability_values):
218
+ cleaned = [x.replace(":", "") for x in explainability_texts]
219
+ fi = [f"{cleaned[i]} is {x}" for i, x in enumerate(explainability_values)]
220
+ fi.insert(0, "Important suspicious features: ")
221
+ result = "\n".join(fi)
222
+ comment = f"Model certainty is {certainty}" + "\n" "\n" + result
223
  return comment
224
 
225
+
226
+ # func to create the data input table
227
  def create_data_input_table(data, col_names):
228
  st.subheader("Transaction details")
229
  data[7:12] = [bool(value) for value in data[7:12]]
230
+ rounded_list = [
231
+ round(value, 2) if isinstance(value, float) else value for value in data
232
+ ]
233
+ df = pd.DataFrame({"Feature name": col_names, "Value": rounded_list})
234
+ st.dataframe(
235
+ df, hide_index=True, width=475, height=35 * len(df) + 38
236
+ ) # use_container_width=True
237
 
238
+
239
+ # func to create the explanation table
240
  def create_table(texts, values, weights, title):
241
+ df = pd.DataFrame(
242
+ {"Feature Explanation": texts, "Value": values, "Weight": weights}
243
+ )
244
+ st.markdown(f"#### {title}") # Markdown for styling
245
+ st.dataframe(
246
+ df,
247
+ hide_index=True,
248
+ width=475,
249
+ column_config={
250
+ "Weight": st.column_config.ProgressColumn(
251
+ "Weight", width="small", format="%.2f", min_value=0, max_value=1
252
+ )
253
+ },
254
+ ) # use_container_width=True
255
+
256
+
257
+ # func to change button colors
258
+ def ChangeButtonColour(widget_label, font_color, background_color="transparent"):
259
  htmlstr = f"""
260
  <script>
261
  var elements = window.parent.document.querySelectorAll('button');
 
267
  }}
268
  </script>
269
  """
270
+ components.html(f"{htmlstr}", height=0, width=0)