Michelle Lam commited on
Commit
37d1f1c
1 Parent(s): 51bb6f7

Sets default scaffolding method to 'personal' method; adjusts topic selection with new preds_df columns; removes print and log statements

Browse files
audit_utils.py CHANGED
@@ -115,8 +115,6 @@ readable_to_internal = {
115
  }
116
  internal_to_readable = {v: k for k, v in readable_to_internal.items()}
117
 
118
- def get_system_preds_df():
119
- return system_preds_df
120
 
121
  ########################################
122
  # Data storage helper functions
@@ -455,7 +453,7 @@ def get_predictions_by_user_and_item(predictions):
455
  # - model: trained model
456
  # - user_ids: list of user IDs to compute predictions for
457
  # - sys_eval_df: dataframe of system eval labels (pre-computed)
458
- def get_preds_df(model, user_ids, sys_eval_df=sys_eval_df, bins=BINS):
459
  # Prep dataframe for all predictions we'd like to request
460
  start = time.time()
461
  sys_eval_comment_ids = sys_eval_df.item_id.unique().tolist()
@@ -464,7 +462,8 @@ def get_preds_df(model, user_ids, sys_eval_df=sys_eval_df, bins=BINS):
464
  for user_id in user_ids:
465
  empty_ratings_rows.extend([[user_id, c_id, 0] for c_id in sys_eval_comment_ids])
466
  empty_ratings_df = pd.DataFrame(empty_ratings_rows, columns=["user_id", "item_id", "rating"])
467
- print("setup", time.time() - start)
 
468
 
469
  # Evaluate model to get predictions
470
  start = time.time()
@@ -472,7 +471,8 @@ def get_preds_df(model, user_ids, sys_eval_df=sys_eval_df, bins=BINS):
472
  eval_set_data = Dataset.load_from_df(empty_ratings_df, reader)
473
  _, testset = train_test_split(eval_set_data, test_size=1.)
474
  predictions = model.test(testset)
475
- print("train_test_split", time.time() - start)
 
476
 
477
  # Update dataframe with predictions
478
  start = time.time()
@@ -513,7 +513,7 @@ def train_user_model(ratings_df, train_df=train_df, model_eval_df=model_eval_df,
513
  # - train_df: dataframe of training labels
514
  # - model_eval_df: dataframe of model eval labels (validation set)
515
  # - model_type: type of model to train
516
- def train_model(train_df, model_eval_df, model_type="SVD", sim_type=None, user_based=True):
517
  # Train model
518
  reader = Reader(rating_scale=(0, 4))
519
  train_data = Dataset.load_from_df(train_df, reader)
@@ -542,7 +542,8 @@ def train_model(train_df, model_eval_df, model_type="SVD", sim_type=None, user_b
542
  mae = accuracy.mae(predictions)
543
  mse = accuracy.mse(predictions)
544
 
545
- print(f"MAE: {mae}, MSE: {mse}, RMSE: {rmse}, FCP: {fcp}")
 
546
  perf = [mae, mse, rmse, fcp]
547
 
548
  return algo, perf
@@ -1038,7 +1039,7 @@ def plot_overall_vis_cluster(cur_user, preds_df, error_type, n_comments=None, bi
1038
 
1039
  return final_plot, df
1040
 
1041
- def get_cluster_comments(df, error_type, threshold=TOXIC_THRESHOLD, sys_col="rating_sys", use_model=True):
1042
  df["user_color"] = [get_user_color(user, threshold) for user in df["pred"].tolist()] # get cell colors
1043
  df["system_color"] = [get_user_color(sys, threshold) for sys in df[sys_col].tolist()] # get cell colors
1044
  df["error_color"] = [get_system_color(sys, user, threshold) for sys, user in zip(df[sys_col].tolist(), df["pred"].tolist())] # get cell colors
@@ -1049,7 +1050,8 @@ def get_cluster_comments(df, error_type, threshold=TOXIC_THRESHOLD, sys_col="rat
1049
  if use_model:
1050
  df = df.sort_values(by=["error_amt"], ascending=False) # surface largest errors first
1051
  else:
1052
- print("get_cluster_comments; not using model")
 
1053
  df = df.sort_values(by=[sys_col], ascending=True)
1054
 
1055
  df["id"] = df["item_id"]
 
115
  }
116
  internal_to_readable = {v: k for k, v in readable_to_internal.items()}
117
 
 
 
118
 
119
  ########################################
120
  # Data storage helper functions
 
453
  # - model: trained model
454
  # - user_ids: list of user IDs to compute predictions for
455
  # - sys_eval_df: dataframe of system eval labels (pre-computed)
456
+ def get_preds_df(model, user_ids, sys_eval_df=sys_eval_df, bins=BINS, debug=False):
457
  # Prep dataframe for all predictions we'd like to request
458
  start = time.time()
459
  sys_eval_comment_ids = sys_eval_df.item_id.unique().tolist()
 
462
  for user_id in user_ids:
463
  empty_ratings_rows.extend([[user_id, c_id, 0] for c_id in sys_eval_comment_ids])
464
  empty_ratings_df = pd.DataFrame(empty_ratings_rows, columns=["user_id", "item_id", "rating"])
465
+ if debug:
466
+ print("setup", time.time() - start)
467
 
468
  # Evaluate model to get predictions
469
  start = time.time()
 
471
  eval_set_data = Dataset.load_from_df(empty_ratings_df, reader)
472
  _, testset = train_test_split(eval_set_data, test_size=1.)
473
  predictions = model.test(testset)
474
+ if debug:
475
+ print("train_test_split", time.time() - start)
476
 
477
  # Update dataframe with predictions
478
  start = time.time()
 
513
  # - train_df: dataframe of training labels
514
  # - model_eval_df: dataframe of model eval labels (validation set)
515
  # - model_type: type of model to train
516
+ def train_model(train_df, model_eval_df, model_type="SVD", sim_type=None, user_based=True, debug=False):
517
  # Train model
518
  reader = Reader(rating_scale=(0, 4))
519
  train_data = Dataset.load_from_df(train_df, reader)
 
542
  mae = accuracy.mae(predictions)
543
  mse = accuracy.mse(predictions)
544
 
545
+ if debug:
546
+ print(f"MAE: {mae}, MSE: {mse}, RMSE: {rmse}, FCP: {fcp}")
547
  perf = [mae, mse, rmse, fcp]
548
 
549
  return algo, perf
 
1039
 
1040
  return final_plot, df
1041
 
1042
+ def get_cluster_comments(df, error_type, threshold=TOXIC_THRESHOLD, sys_col="rating_sys", use_model=True, debug=False):
1043
  df["user_color"] = [get_user_color(user, threshold) for user in df["pred"].tolist()] # get cell colors
1044
  df["system_color"] = [get_user_color(sys, threshold) for sys in df[sys_col].tolist()] # get cell colors
1045
  df["error_color"] = [get_system_color(sys, user, threshold) for sys, user in zip(df[sys_col].tolist(), df["pred"].tolist())] # get cell colors
 
1050
  if use_model:
1051
  df = df.sort_values(by=["error_amt"], ascending=False) # surface largest errors first
1052
  else:
1053
+ if debug:
1054
+ print("get_cluster_comments; not using model")
1055
  df = df.sort_values(by=[sys_col], ascending=True)
1056
 
1057
  df["id"] = df["item_id"]
indie_label_svelte/src/ClusterResults.svelte CHANGED
@@ -55,12 +55,10 @@
55
  //your code goes here on location change
56
  let cur_url = window.location.href;
57
  let cur_url_elems = cur_url.split("#");
58
- // console.log(cur_url_elems)
59
  if (cur_url_elems.length > 0) {
60
  let path = cur_url_elems[2];
61
  if (path == "comment") {
62
  let comment_id = cur_url_elems[1].split("/")[0];
63
- console.log("comment_id", comment_id)
64
  selected_comment_id = parseInt(comment_id);
65
  let table_ind = null;
66
  for (let i = 0; i < items.length; i++) {
@@ -130,7 +128,6 @@
130
  items = data["cluster_comments"];
131
  set_length = items.length;
132
  }
133
- // console.log(set_length);
134
 
135
  let cur_open_evidence;
136
  open_evidence.subscribe(value => {
 
55
  //your code goes here on location change
56
  let cur_url = window.location.href;
57
  let cur_url_elems = cur_url.split("#");
 
58
  if (cur_url_elems.length > 0) {
59
  let path = cur_url_elems[2];
60
  if (path == "comment") {
61
  let comment_id = cur_url_elems[1].split("/")[0];
 
62
  selected_comment_id = parseInt(comment_id);
63
  let table_ind = null;
64
  for (let i = 0; i < items.length; i++) {
 
128
  items = data["cluster_comments"];
129
  set_length = items.length;
130
  }
 
131
 
132
  let cur_open_evidence;
133
  open_evidence.subscribe(value => {
indie_label_svelte/src/Explore.svelte CHANGED
@@ -48,7 +48,6 @@
48
  const text = await response.text();
49
  const data = JSON.parse(text);
50
  cur_examples = JSON.parse(data["examples"]);
51
- console.log(cur_examples); // TEMP
52
  return true;
53
  }
54
  </script>
 
48
  const text = await response.text();
49
  const data = JSON.parse(text);
50
  cur_examples = JSON.parse(data["examples"]);
 
51
  return true;
52
  }
53
  </script>
indie_label_svelte/src/HypothesisPanel.svelte CHANGED
@@ -35,14 +35,11 @@
35
  // Handle routing
36
  let searchParams = new URLSearchParams(window.location.search);
37
  let scaffold_method = searchParams.get("scaffold");
 
 
 
38
  let topic_vis_method = searchParams.get("topic_vis_method");
39
 
40
- // TODO: connect to selected["error_type"] so changes on main panel affect report panel
41
- // let cur_error_type;
42
- // error_type.subscribe(value => {
43
- // cur_error_type = value;
44
- // });
45
-
46
  // Handle drawer
47
  let open = false;
48
  let selected = null;
 
35
  // Handle routing
36
  let searchParams = new URLSearchParams(window.location.search);
37
  let scaffold_method = searchParams.get("scaffold");
38
+ if (scaffold_method == null) {
39
+ scaffold_method = "personal"; // Default to personalized model scaffold
40
+ }
41
  let topic_vis_method = searchParams.get("topic_vis_method");
42
 
 
 
 
 
 
 
43
  // Handle drawer
44
  let open = false;
45
  let selected = null;
indie_label_svelte/src/KeywordSearch.svelte CHANGED
@@ -36,7 +36,6 @@
36
  keyword: keyword,
37
  error_type: cur_error_type,
38
  };
39
- console.log("topic_df_ids", topic_df_ids);
40
  let params = new URLSearchParams(req_params).toString();
41
  const response = await fetch("./get_cluster_results?" + params);
42
  const text = await response.text();
 
36
  keyword: keyword,
37
  error_type: cur_error_type,
38
  };
 
39
  let params = new URLSearchParams(req_params).toString();
40
  const response = await fetch("./get_cluster_results?" + params);
41
  const text = await response.text();
indie_label_svelte/src/Labeling.svelte CHANGED
@@ -93,7 +93,6 @@
93
  const response = await fetch("./get_group_model?" + params);
94
  const text = await response.text();
95
  const data = JSON.parse(text);
96
- console.log("getGroupModel", data);
97
  return data
98
  }
99
 
 
93
  const response = await fetch("./get_group_model?" + params);
94
  const text = await response.text();
95
  const data = JSON.parse(text);
 
96
  return data
97
  }
98
 
indie_label_svelte/src/TopicTraining.svelte CHANGED
@@ -75,7 +75,6 @@
75
  topic: topic,
76
  };
77
 
78
- console.log("topic training model name", model_name);
79
  let params = new URLSearchParams(req_params).toString();
80
  const response = await fetch("./get_personalized_model_topic?" + params); // TODO
81
  const text = await response.text();
@@ -84,7 +83,6 @@
84
  model_name = data["new_model_name"];
85
  model_chosen.update((value) => model_name);
86
 
87
- console.log("topicTraining", data);
88
  return data;
89
  }
90
  </script>
 
75
  topic: topic,
76
  };
77
 
 
78
  let params = new URLSearchParams(req_params).toString();
79
  const response = await fetch("./get_personalized_model_topic?" + params); // TODO
80
  const text = await response.text();
 
83
  model_name = data["new_model_name"];
84
  model_chosen.update((value) => model_name);
85
 
 
86
  return data;
87
  }
88
  </script>
server.py CHANGED
@@ -203,7 +203,7 @@ def get_group_size():
203
  ########################################
204
  # ROUTE: /GET_GROUP_MODEL
205
  @app.route("/get_group_model")
206
- def get_group_model():
207
  # Fetch info for initial labeling component
208
  model_name = request.args.get("model_name")
209
  user = request.args.get("user")
@@ -236,7 +236,8 @@ def get_group_model():
236
  mae, mse, rmse, avg_diff, ratings_prev = utils.train_updated_model(model_name, ratings_grp, user)
237
 
238
  duration = time.time() - start
239
- print("Time to train/cache:", duration)
 
240
 
241
  context = {
242
  "group_size": group_size,
@@ -360,13 +361,14 @@ def get_personalized_model(debug=DEBUG):
360
  ########################################
361
  # ROUTE: /GET_PERSONALIZED_MODEL_TOPIC
362
  @app.route("/get_personalized_model_topic")
363
- def get_personalized_model_topic():
364
  model_name = request.args.get("model_name")
365
  ratings_json = request.args.get("ratings")
366
  user = request.args.get("user")
367
  ratings = json.loads(ratings_json)
368
  topic = request.args.get("topic")
369
- print(ratings)
 
370
  start = time.time()
371
 
372
  # Modify model name
@@ -375,14 +377,13 @@ def get_personalized_model_topic():
375
 
376
  # Handle existing or new model cases
377
  # Train model and cache predictions using new labels
378
- print("get_personalized_model_topic train")
 
379
  mae, mse, rmse, avg_diff, ratings_prev = utils.train_updated_model(model_name, ratings, user, topic=topic)
380
 
381
- duration = time.time() - start
382
- print("Time to train/cache:", duration)
383
-
384
- def round_metric(x):
385
- return np.round(abs(x), 3)
386
 
387
  results = {
388
  "success": "success",
@@ -499,8 +500,8 @@ def get_topic_errors(df, topic_vis_method, threshold=2):
499
  topic_errors = {}
500
  for topic in topics:
501
  t_df = df[df["topic"] == topic]
502
- y_true = t_df["pred"].to_numpy()
503
- y_pred = t_df["rating"].to_numpy()
504
  if topic_vis_method == "mae":
505
  t_err = mean_absolute_error(y_true, y_pred)
506
  elif topic_vis_method == "mse":
@@ -508,8 +509,8 @@ def get_topic_errors(df, topic_vis_method, threshold=2):
508
  elif topic_vis_method == "avg_diff":
509
  t_err = np.mean(y_true - y_pred)
510
  elif topic_vis_method == "fp_proportion":
511
- y_true = [0 if rating < threshold else 1 for rating in t_df["pred"].tolist()]
512
- y_pred = [0 if rating < threshold else 1 for rating in t_df["rating"].tolist()]
513
  try:
514
  tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
515
  except:
@@ -517,8 +518,8 @@ def get_topic_errors(df, topic_vis_method, threshold=2):
517
  total = float(len(y_true))
518
  t_err = fp / total
519
  elif topic_vis_method == "fn_proportion":
520
- y_true = [0 if rating < threshold else 1 for rating in t_df["pred"].tolist()]
521
- y_pred = [0 if rating < threshold else 1 for rating in t_df["rating"].tolist()]
522
  try:
523
  tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
524
  except:
@@ -529,16 +530,14 @@ def get_topic_errors(df, topic_vis_method, threshold=2):
529
 
530
  return topic_errors
531
 
532
- def get_personal_scaffold(cur_user, model, topic_vis_method, n_topics=200, n=5):
533
  threshold = utils.get_toxic_threshold()
534
 
535
  # Get topics with greatest amount of error
536
  preds_file = utils.get_preds_file(cur_user, model)
537
  with open(preds_file, "rb") as f:
538
  preds_df = pickle.load(f)
539
- system_preds_df = utils.get_system_preds_df()
540
- preds_df_mod = preds_df.merge(system_preds_df, on="item_id", how="left", suffixes=('', '_sys'))
541
- preds_df_mod = preds_df_mod[preds_df_mod["user_id"] == cur_user].sort_values(by=["item_id"]).reset_index()
542
  preds_df_mod = preds_df_mod[preds_df_mod["topic_id"] < n_topics]
543
 
544
  if topic_vis_method == "median":
@@ -557,11 +556,12 @@ def get_personal_scaffold(cur_user, model, topic_vis_method, n_topics=200, n=5):
557
  df = preds_df_mod.groupby(["topic", "user_id"]).mean().reset_index()
558
 
559
  # Get system error
560
- df = df[(df["topic"] != "53_maiareficco_kallystas_dyisisitmanila_tractorsazi") & (df["topic"] != "79_idiot_dumb_stupid_dumber")]
 
561
 
562
  if topic_vis_method == "median" or topic_vis_method == "mean":
563
- df["error_magnitude"] = [utils.get_error_magnitude(sys, user, threshold) for sys, user in zip(df["rating"].tolist(), df["pred"].tolist())]
564
- df["error_type"] = [utils.get_error_type_radio(sys, user, threshold) for sys, user in zip(df["rating"].tolist(), df["pred"].tolist())]
565
 
566
  df_under = df[df["error_type"] == "System is under-sensitive"]
567
  df_under = df_under.sort_values(by=["error_magnitude"], ascending=False).head(n) # surface largest errors first
@@ -577,17 +577,21 @@ def get_personal_scaffold(cur_user, model, topic_vis_method, n_topics=200, n=5):
577
  elif topic_vis_method == "fp_fn":
578
  df_under = df.sort_values(by=["fn_proportion"], ascending=False).head(n)
579
  df_under = df_under[df_under["fn_proportion"] > 0]
 
 
580
  report_under = [get_empty_report(row["topic"], "System is under-sensitive") for _, row in df_under.iterrows()]
581
 
582
  df_over = df.sort_values(by=["fp_proportion"], ascending=False).head(n)
583
  df_over = df_over[df_over["fp_proportion"] > 0]
 
 
584
  report_over = [get_empty_report(row["topic"], "System is over-sensitive") for _, row in df_over.iterrows()]
585
 
586
  reports = (report_under + report_over)
587
  random.shuffle(reports)
588
  else:
589
  df = df.sort_values(by=[topic_vis_method], ascending=False).head(n * 2)
590
- df["error_type"] = [utils.get_error_type_radio(sys, user, threshold) for sys, user in zip(df["rating"].tolist(), df["pred"].tolist())]
591
  reports = [get_empty_report(row["topic"], row["error_type"]) for _, row in df.iterrows()]
592
 
593
  return reports
 
203
  ########################################
204
  # ROUTE: /GET_GROUP_MODEL
205
  @app.route("/get_group_model")
206
+ def get_group_model(debug=DEBUG):
207
  # Fetch info for initial labeling component
208
  model_name = request.args.get("model_name")
209
  user = request.args.get("user")
 
236
  mae, mse, rmse, avg_diff, ratings_prev = utils.train_updated_model(model_name, ratings_grp, user)
237
 
238
  duration = time.time() - start
239
+ if debug:
240
+ print("Time to train/cache:", duration)
241
 
242
  context = {
243
  "group_size": group_size,
 
361
  ########################################
362
  # ROUTE: /GET_PERSONALIZED_MODEL_TOPIC
363
  @app.route("/get_personalized_model_topic")
364
+ def get_personalized_model_topic(debug=DEBUG):
365
  model_name = request.args.get("model_name")
366
  ratings_json = request.args.get("ratings")
367
  user = request.args.get("user")
368
  ratings = json.loads(ratings_json)
369
  topic = request.args.get("topic")
370
+ if debug:
371
+ print(ratings)
372
  start = time.time()
373
 
374
  # Modify model name
 
377
 
378
  # Handle existing or new model cases
379
  # Train model and cache predictions using new labels
380
+ if debug:
381
+ print("get_personalized_model_topic train")
382
  mae, mse, rmse, avg_diff, ratings_prev = utils.train_updated_model(model_name, ratings, user, topic=topic)
383
 
384
+ if debug:
385
+ duration = time.time() - start
386
+ print("Time to train/cache:", duration)
 
 
387
 
388
  results = {
389
  "success": "success",
 
500
  topic_errors = {}
501
  for topic in topics:
502
  t_df = df[df["topic"] == topic]
503
+ y_true = t_df["pred"].to_numpy() # Predicted user rating (treated as ground truth)
504
+ y_pred = t_df["rating_sys"].to_numpy() # System rating (which we're auditing)
505
  if topic_vis_method == "mae":
506
  t_err = mean_absolute_error(y_true, y_pred)
507
  elif topic_vis_method == "mse":
 
509
  elif topic_vis_method == "avg_diff":
510
  t_err = np.mean(y_true - y_pred)
511
  elif topic_vis_method == "fp_proportion":
512
+ y_true = [0 if rating < threshold else 1 for rating in y_true]
513
+ y_pred = [0 if rating < threshold else 1 for rating in y_pred]
514
  try:
515
  tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
516
  except:
 
518
  total = float(len(y_true))
519
  t_err = fp / total
520
  elif topic_vis_method == "fn_proportion":
521
+ y_true = [0 if rating < threshold else 1 for rating in y_true]
522
+ y_pred = [0 if rating < threshold else 1 for rating in y_pred]
523
  try:
524
  tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
525
  except:
 
530
 
531
  return topic_errors
532
 
533
+ def get_personal_scaffold(cur_user, model, topic_vis_method, n_topics=200, n=5, debug=DEBUG):
534
  threshold = utils.get_toxic_threshold()
535
 
536
  # Get topics with greatest amount of error
537
  preds_file = utils.get_preds_file(cur_user, model)
538
  with open(preds_file, "rb") as f:
539
  preds_df = pickle.load(f)
540
+ preds_df_mod = preds_df[preds_df["user_id"] == cur_user].sort_values(by=["item_id"]).reset_index()
 
 
541
  preds_df_mod = preds_df_mod[preds_df_mod["topic_id"] < n_topics]
542
 
543
  if topic_vis_method == "median":
 
556
  df = preds_df_mod.groupby(["topic", "user_id"]).mean().reset_index()
557
 
558
  # Get system error
559
+ junk_topics = ["53_maiareficco_kallystas_dyisisitmanila_tractorsazi", "-1_dude_bullshit_fight_ain"]
560
+ df = df[~df["topic"].isin(junk_topics)] # Exclude known "junk topics"
561
 
562
  if topic_vis_method == "median" or topic_vis_method == "mean":
563
+ df["error_magnitude"] = [utils.get_error_magnitude(sys, user, threshold) for sys, user in zip(df["rating_sys"].tolist(), df["pred"].tolist())]
564
+ df["error_type"] = [utils.get_error_type_radio(sys, user, threshold) for sys, user in zip(df["rating_sys"].tolist(), df["pred"].tolist())]
565
 
566
  df_under = df[df["error_type"] == "System is under-sensitive"]
567
  df_under = df_under.sort_values(by=["error_magnitude"], ascending=False).head(n) # surface largest errors first
 
577
  elif topic_vis_method == "fp_fn":
578
  df_under = df.sort_values(by=["fn_proportion"], ascending=False).head(n)
579
  df_under = df_under[df_under["fn_proportion"] > 0]
580
+ if debug:
581
+ print(df_under[["topic", "fn_proportion"]])
582
  report_under = [get_empty_report(row["topic"], "System is under-sensitive") for _, row in df_under.iterrows()]
583
 
584
  df_over = df.sort_values(by=["fp_proportion"], ascending=False).head(n)
585
  df_over = df_over[df_over["fp_proportion"] > 0]
586
+ if debug:
587
+ print(df_over[["topic", "fp_proportion"]])
588
  report_over = [get_empty_report(row["topic"], "System is over-sensitive") for _, row in df_over.iterrows()]
589
 
590
  reports = (report_under + report_over)
591
  random.shuffle(reports)
592
  else:
593
  df = df.sort_values(by=[topic_vis_method], ascending=False).head(n * 2)
594
+ df["error_type"] = [utils.get_error_type_radio(sys, user, threshold) for sys, user in zip(df["rating_sys"].tolist(), df["pred"].tolist())]
595
  reports = [get_empty_report(row["topic"], row["error_type"]) for _, row in df.iterrows()]
596
 
597
  return reports