Spaces:
Runtime error
Runtime error
Michelle Lam
commited on
Commit
•
37d1f1c
1
Parent(s):
51bb6f7
Sets default scaffolding method to 'personal' method; adjusts topic selection with new preds_df columns; removes print and log statements
Browse files- audit_utils.py +11 -9
- indie_label_svelte/src/ClusterResults.svelte +0 -3
- indie_label_svelte/src/Explore.svelte +0 -1
- indie_label_svelte/src/HypothesisPanel.svelte +3 -6
- indie_label_svelte/src/KeywordSearch.svelte +0 -1
- indie_label_svelte/src/Labeling.svelte +0 -1
- indie_label_svelte/src/TopicTraining.svelte +0 -2
- server.py +28 -24
audit_utils.py
CHANGED
@@ -115,8 +115,6 @@ readable_to_internal = {
|
|
115 |
}
|
116 |
internal_to_readable = {v: k for k, v in readable_to_internal.items()}
|
117 |
|
118 |
-
def get_system_preds_df():
|
119 |
-
return system_preds_df
|
120 |
|
121 |
########################################
|
122 |
# Data storage helper functions
|
@@ -455,7 +453,7 @@ def get_predictions_by_user_and_item(predictions):
|
|
455 |
# - model: trained model
|
456 |
# - user_ids: list of user IDs to compute predictions for
|
457 |
# - sys_eval_df: dataframe of system eval labels (pre-computed)
|
458 |
-
def get_preds_df(model, user_ids, sys_eval_df=sys_eval_df, bins=BINS):
|
459 |
# Prep dataframe for all predictions we'd like to request
|
460 |
start = time.time()
|
461 |
sys_eval_comment_ids = sys_eval_df.item_id.unique().tolist()
|
@@ -464,7 +462,8 @@ def get_preds_df(model, user_ids, sys_eval_df=sys_eval_df, bins=BINS):
|
|
464 |
for user_id in user_ids:
|
465 |
empty_ratings_rows.extend([[user_id, c_id, 0] for c_id in sys_eval_comment_ids])
|
466 |
empty_ratings_df = pd.DataFrame(empty_ratings_rows, columns=["user_id", "item_id", "rating"])
|
467 |
-
|
|
|
468 |
|
469 |
# Evaluate model to get predictions
|
470 |
start = time.time()
|
@@ -472,7 +471,8 @@ def get_preds_df(model, user_ids, sys_eval_df=sys_eval_df, bins=BINS):
|
|
472 |
eval_set_data = Dataset.load_from_df(empty_ratings_df, reader)
|
473 |
_, testset = train_test_split(eval_set_data, test_size=1.)
|
474 |
predictions = model.test(testset)
|
475 |
-
|
|
|
476 |
|
477 |
# Update dataframe with predictions
|
478 |
start = time.time()
|
@@ -513,7 +513,7 @@ def train_user_model(ratings_df, train_df=train_df, model_eval_df=model_eval_df,
|
|
513 |
# - train_df: dataframe of training labels
|
514 |
# - model_eval_df: dataframe of model eval labels (validation set)
|
515 |
# - model_type: type of model to train
|
516 |
-
def train_model(train_df, model_eval_df, model_type="SVD", sim_type=None, user_based=True):
|
517 |
# Train model
|
518 |
reader = Reader(rating_scale=(0, 4))
|
519 |
train_data = Dataset.load_from_df(train_df, reader)
|
@@ -542,7 +542,8 @@ def train_model(train_df, model_eval_df, model_type="SVD", sim_type=None, user_b
|
|
542 |
mae = accuracy.mae(predictions)
|
543 |
mse = accuracy.mse(predictions)
|
544 |
|
545 |
-
|
|
|
546 |
perf = [mae, mse, rmse, fcp]
|
547 |
|
548 |
return algo, perf
|
@@ -1038,7 +1039,7 @@ def plot_overall_vis_cluster(cur_user, preds_df, error_type, n_comments=None, bi
|
|
1038 |
|
1039 |
return final_plot, df
|
1040 |
|
1041 |
-
def get_cluster_comments(df, error_type, threshold=TOXIC_THRESHOLD, sys_col="rating_sys", use_model=True):
|
1042 |
df["user_color"] = [get_user_color(user, threshold) for user in df["pred"].tolist()] # get cell colors
|
1043 |
df["system_color"] = [get_user_color(sys, threshold) for sys in df[sys_col].tolist()] # get cell colors
|
1044 |
df["error_color"] = [get_system_color(sys, user, threshold) for sys, user in zip(df[sys_col].tolist(), df["pred"].tolist())] # get cell colors
|
@@ -1049,7 +1050,8 @@ def get_cluster_comments(df, error_type, threshold=TOXIC_THRESHOLD, sys_col="rat
|
|
1049 |
if use_model:
|
1050 |
df = df.sort_values(by=["error_amt"], ascending=False) # surface largest errors first
|
1051 |
else:
|
1052 |
-
|
|
|
1053 |
df = df.sort_values(by=[sys_col], ascending=True)
|
1054 |
|
1055 |
df["id"] = df["item_id"]
|
|
|
115 |
}
|
116 |
internal_to_readable = {v: k for k, v in readable_to_internal.items()}
|
117 |
|
|
|
|
|
118 |
|
119 |
########################################
|
120 |
# Data storage helper functions
|
|
|
453 |
# - model: trained model
|
454 |
# - user_ids: list of user IDs to compute predictions for
|
455 |
# - sys_eval_df: dataframe of system eval labels (pre-computed)
|
456 |
+
def get_preds_df(model, user_ids, sys_eval_df=sys_eval_df, bins=BINS, debug=False):
|
457 |
# Prep dataframe for all predictions we'd like to request
|
458 |
start = time.time()
|
459 |
sys_eval_comment_ids = sys_eval_df.item_id.unique().tolist()
|
|
|
462 |
for user_id in user_ids:
|
463 |
empty_ratings_rows.extend([[user_id, c_id, 0] for c_id in sys_eval_comment_ids])
|
464 |
empty_ratings_df = pd.DataFrame(empty_ratings_rows, columns=["user_id", "item_id", "rating"])
|
465 |
+
if debug:
|
466 |
+
print("setup", time.time() - start)
|
467 |
|
468 |
# Evaluate model to get predictions
|
469 |
start = time.time()
|
|
|
471 |
eval_set_data = Dataset.load_from_df(empty_ratings_df, reader)
|
472 |
_, testset = train_test_split(eval_set_data, test_size=1.)
|
473 |
predictions = model.test(testset)
|
474 |
+
if debug:
|
475 |
+
print("train_test_split", time.time() - start)
|
476 |
|
477 |
# Update dataframe with predictions
|
478 |
start = time.time()
|
|
|
513 |
# - train_df: dataframe of training labels
|
514 |
# - model_eval_df: dataframe of model eval labels (validation set)
|
515 |
# - model_type: type of model to train
|
516 |
+
def train_model(train_df, model_eval_df, model_type="SVD", sim_type=None, user_based=True, debug=False):
|
517 |
# Train model
|
518 |
reader = Reader(rating_scale=(0, 4))
|
519 |
train_data = Dataset.load_from_df(train_df, reader)
|
|
|
542 |
mae = accuracy.mae(predictions)
|
543 |
mse = accuracy.mse(predictions)
|
544 |
|
545 |
+
if debug:
|
546 |
+
print(f"MAE: {mae}, MSE: {mse}, RMSE: {rmse}, FCP: {fcp}")
|
547 |
perf = [mae, mse, rmse, fcp]
|
548 |
|
549 |
return algo, perf
|
|
|
1039 |
|
1040 |
return final_plot, df
|
1041 |
|
1042 |
+
def get_cluster_comments(df, error_type, threshold=TOXIC_THRESHOLD, sys_col="rating_sys", use_model=True, debug=False):
|
1043 |
df["user_color"] = [get_user_color(user, threshold) for user in df["pred"].tolist()] # get cell colors
|
1044 |
df["system_color"] = [get_user_color(sys, threshold) for sys in df[sys_col].tolist()] # get cell colors
|
1045 |
df["error_color"] = [get_system_color(sys, user, threshold) for sys, user in zip(df[sys_col].tolist(), df["pred"].tolist())] # get cell colors
|
|
|
1050 |
if use_model:
|
1051 |
df = df.sort_values(by=["error_amt"], ascending=False) # surface largest errors first
|
1052 |
else:
|
1053 |
+
if debug:
|
1054 |
+
print("get_cluster_comments; not using model")
|
1055 |
df = df.sort_values(by=[sys_col], ascending=True)
|
1056 |
|
1057 |
df["id"] = df["item_id"]
|
indie_label_svelte/src/ClusterResults.svelte
CHANGED
@@ -55,12 +55,10 @@
|
|
55 |
//your code goes here on location change
|
56 |
let cur_url = window.location.href;
|
57 |
let cur_url_elems = cur_url.split("#");
|
58 |
-
// console.log(cur_url_elems)
|
59 |
if (cur_url_elems.length > 0) {
|
60 |
let path = cur_url_elems[2];
|
61 |
if (path == "comment") {
|
62 |
let comment_id = cur_url_elems[1].split("/")[0];
|
63 |
-
console.log("comment_id", comment_id)
|
64 |
selected_comment_id = parseInt(comment_id);
|
65 |
let table_ind = null;
|
66 |
for (let i = 0; i < items.length; i++) {
|
@@ -130,7 +128,6 @@
|
|
130 |
items = data["cluster_comments"];
|
131 |
set_length = items.length;
|
132 |
}
|
133 |
-
// console.log(set_length);
|
134 |
|
135 |
let cur_open_evidence;
|
136 |
open_evidence.subscribe(value => {
|
|
|
55 |
//your code goes here on location change
|
56 |
let cur_url = window.location.href;
|
57 |
let cur_url_elems = cur_url.split("#");
|
|
|
58 |
if (cur_url_elems.length > 0) {
|
59 |
let path = cur_url_elems[2];
|
60 |
if (path == "comment") {
|
61 |
let comment_id = cur_url_elems[1].split("/")[0];
|
|
|
62 |
selected_comment_id = parseInt(comment_id);
|
63 |
let table_ind = null;
|
64 |
for (let i = 0; i < items.length; i++) {
|
|
|
128 |
items = data["cluster_comments"];
|
129 |
set_length = items.length;
|
130 |
}
|
|
|
131 |
|
132 |
let cur_open_evidence;
|
133 |
open_evidence.subscribe(value => {
|
indie_label_svelte/src/Explore.svelte
CHANGED
@@ -48,7 +48,6 @@
|
|
48 |
const text = await response.text();
|
49 |
const data = JSON.parse(text);
|
50 |
cur_examples = JSON.parse(data["examples"]);
|
51 |
-
console.log(cur_examples); // TEMP
|
52 |
return true;
|
53 |
}
|
54 |
</script>
|
|
|
48 |
const text = await response.text();
|
49 |
const data = JSON.parse(text);
|
50 |
cur_examples = JSON.parse(data["examples"]);
|
|
|
51 |
return true;
|
52 |
}
|
53 |
</script>
|
indie_label_svelte/src/HypothesisPanel.svelte
CHANGED
@@ -35,14 +35,11 @@
|
|
35 |
// Handle routing
|
36 |
let searchParams = new URLSearchParams(window.location.search);
|
37 |
let scaffold_method = searchParams.get("scaffold");
|
|
|
|
|
|
|
38 |
let topic_vis_method = searchParams.get("topic_vis_method");
|
39 |
|
40 |
-
// TODO: connect to selected["error_type"] so changes on main panel affect report panel
|
41 |
-
// let cur_error_type;
|
42 |
-
// error_type.subscribe(value => {
|
43 |
-
// cur_error_type = value;
|
44 |
-
// });
|
45 |
-
|
46 |
// Handle drawer
|
47 |
let open = false;
|
48 |
let selected = null;
|
|
|
35 |
// Handle routing
|
36 |
let searchParams = new URLSearchParams(window.location.search);
|
37 |
let scaffold_method = searchParams.get("scaffold");
|
38 |
+
if (scaffold_method == null) {
|
39 |
+
scaffold_method = "personal"; // Default to personalized model scaffold
|
40 |
+
}
|
41 |
let topic_vis_method = searchParams.get("topic_vis_method");
|
42 |
|
|
|
|
|
|
|
|
|
|
|
|
|
43 |
// Handle drawer
|
44 |
let open = false;
|
45 |
let selected = null;
|
indie_label_svelte/src/KeywordSearch.svelte
CHANGED
@@ -36,7 +36,6 @@
|
|
36 |
keyword: keyword,
|
37 |
error_type: cur_error_type,
|
38 |
};
|
39 |
-
console.log("topic_df_ids", topic_df_ids);
|
40 |
let params = new URLSearchParams(req_params).toString();
|
41 |
const response = await fetch("./get_cluster_results?" + params);
|
42 |
const text = await response.text();
|
|
|
36 |
keyword: keyword,
|
37 |
error_type: cur_error_type,
|
38 |
};
|
|
|
39 |
let params = new URLSearchParams(req_params).toString();
|
40 |
const response = await fetch("./get_cluster_results?" + params);
|
41 |
const text = await response.text();
|
indie_label_svelte/src/Labeling.svelte
CHANGED
@@ -93,7 +93,6 @@
|
|
93 |
const response = await fetch("./get_group_model?" + params);
|
94 |
const text = await response.text();
|
95 |
const data = JSON.parse(text);
|
96 |
-
console.log("getGroupModel", data);
|
97 |
return data
|
98 |
}
|
99 |
|
|
|
93 |
const response = await fetch("./get_group_model?" + params);
|
94 |
const text = await response.text();
|
95 |
const data = JSON.parse(text);
|
|
|
96 |
return data
|
97 |
}
|
98 |
|
indie_label_svelte/src/TopicTraining.svelte
CHANGED
@@ -75,7 +75,6 @@
|
|
75 |
topic: topic,
|
76 |
};
|
77 |
|
78 |
-
console.log("topic training model name", model_name);
|
79 |
let params = new URLSearchParams(req_params).toString();
|
80 |
const response = await fetch("./get_personalized_model_topic?" + params); // TODO
|
81 |
const text = await response.text();
|
@@ -84,7 +83,6 @@
|
|
84 |
model_name = data["new_model_name"];
|
85 |
model_chosen.update((value) => model_name);
|
86 |
|
87 |
-
console.log("topicTraining", data);
|
88 |
return data;
|
89 |
}
|
90 |
</script>
|
|
|
75 |
topic: topic,
|
76 |
};
|
77 |
|
|
|
78 |
let params = new URLSearchParams(req_params).toString();
|
79 |
const response = await fetch("./get_personalized_model_topic?" + params); // TODO
|
80 |
const text = await response.text();
|
|
|
83 |
model_name = data["new_model_name"];
|
84 |
model_chosen.update((value) => model_name);
|
85 |
|
|
|
86 |
return data;
|
87 |
}
|
88 |
</script>
|
server.py
CHANGED
@@ -203,7 +203,7 @@ def get_group_size():
|
|
203 |
########################################
|
204 |
# ROUTE: /GET_GROUP_MODEL
|
205 |
@app.route("/get_group_model")
|
206 |
-
def get_group_model():
|
207 |
# Fetch info for initial labeling component
|
208 |
model_name = request.args.get("model_name")
|
209 |
user = request.args.get("user")
|
@@ -236,7 +236,8 @@ def get_group_model():
|
|
236 |
mae, mse, rmse, avg_diff, ratings_prev = utils.train_updated_model(model_name, ratings_grp, user)
|
237 |
|
238 |
duration = time.time() - start
|
239 |
-
|
|
|
240 |
|
241 |
context = {
|
242 |
"group_size": group_size,
|
@@ -360,13 +361,14 @@ def get_personalized_model(debug=DEBUG):
|
|
360 |
########################################
|
361 |
# ROUTE: /GET_PERSONALIZED_MODEL_TOPIC
|
362 |
@app.route("/get_personalized_model_topic")
|
363 |
-
def get_personalized_model_topic():
|
364 |
model_name = request.args.get("model_name")
|
365 |
ratings_json = request.args.get("ratings")
|
366 |
user = request.args.get("user")
|
367 |
ratings = json.loads(ratings_json)
|
368 |
topic = request.args.get("topic")
|
369 |
-
|
|
|
370 |
start = time.time()
|
371 |
|
372 |
# Modify model name
|
@@ -375,14 +377,13 @@ def get_personalized_model_topic():
|
|
375 |
|
376 |
# Handle existing or new model cases
|
377 |
# Train model and cache predictions using new labels
|
378 |
-
|
|
|
379 |
mae, mse, rmse, avg_diff, ratings_prev = utils.train_updated_model(model_name, ratings, user, topic=topic)
|
380 |
|
381 |
-
|
382 |
-
|
383 |
-
|
384 |
-
def round_metric(x):
|
385 |
-
return np.round(abs(x), 3)
|
386 |
|
387 |
results = {
|
388 |
"success": "success",
|
@@ -499,8 +500,8 @@ def get_topic_errors(df, topic_vis_method, threshold=2):
|
|
499 |
topic_errors = {}
|
500 |
for topic in topics:
|
501 |
t_df = df[df["topic"] == topic]
|
502 |
-
y_true = t_df["pred"].to_numpy()
|
503 |
-
y_pred = t_df["
|
504 |
if topic_vis_method == "mae":
|
505 |
t_err = mean_absolute_error(y_true, y_pred)
|
506 |
elif topic_vis_method == "mse":
|
@@ -508,8 +509,8 @@ def get_topic_errors(df, topic_vis_method, threshold=2):
|
|
508 |
elif topic_vis_method == "avg_diff":
|
509 |
t_err = np.mean(y_true - y_pred)
|
510 |
elif topic_vis_method == "fp_proportion":
|
511 |
-
y_true = [0 if rating < threshold else 1 for rating in
|
512 |
-
y_pred = [0 if rating < threshold else 1 for rating in
|
513 |
try:
|
514 |
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
|
515 |
except:
|
@@ -517,8 +518,8 @@ def get_topic_errors(df, topic_vis_method, threshold=2):
|
|
517 |
total = float(len(y_true))
|
518 |
t_err = fp / total
|
519 |
elif topic_vis_method == "fn_proportion":
|
520 |
-
y_true = [0 if rating < threshold else 1 for rating in
|
521 |
-
y_pred = [0 if rating < threshold else 1 for rating in
|
522 |
try:
|
523 |
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
|
524 |
except:
|
@@ -529,16 +530,14 @@ def get_topic_errors(df, topic_vis_method, threshold=2):
|
|
529 |
|
530 |
return topic_errors
|
531 |
|
532 |
-
def get_personal_scaffold(cur_user, model, topic_vis_method, n_topics=200, n=5):
|
533 |
threshold = utils.get_toxic_threshold()
|
534 |
|
535 |
# Get topics with greatest amount of error
|
536 |
preds_file = utils.get_preds_file(cur_user, model)
|
537 |
with open(preds_file, "rb") as f:
|
538 |
preds_df = pickle.load(f)
|
539 |
-
|
540 |
-
preds_df_mod = preds_df.merge(system_preds_df, on="item_id", how="left", suffixes=('', '_sys'))
|
541 |
-
preds_df_mod = preds_df_mod[preds_df_mod["user_id"] == cur_user].sort_values(by=["item_id"]).reset_index()
|
542 |
preds_df_mod = preds_df_mod[preds_df_mod["topic_id"] < n_topics]
|
543 |
|
544 |
if topic_vis_method == "median":
|
@@ -557,11 +556,12 @@ def get_personal_scaffold(cur_user, model, topic_vis_method, n_topics=200, n=5):
|
|
557 |
df = preds_df_mod.groupby(["topic", "user_id"]).mean().reset_index()
|
558 |
|
559 |
# Get system error
|
560 |
-
|
|
|
561 |
|
562 |
if topic_vis_method == "median" or topic_vis_method == "mean":
|
563 |
-
df["error_magnitude"] = [utils.get_error_magnitude(sys, user, threshold) for sys, user in zip(df["
|
564 |
-
df["error_type"] = [utils.get_error_type_radio(sys, user, threshold) for sys, user in zip(df["
|
565 |
|
566 |
df_under = df[df["error_type"] == "System is under-sensitive"]
|
567 |
df_under = df_under.sort_values(by=["error_magnitude"], ascending=False).head(n) # surface largest errors first
|
@@ -577,17 +577,21 @@ def get_personal_scaffold(cur_user, model, topic_vis_method, n_topics=200, n=5):
|
|
577 |
elif topic_vis_method == "fp_fn":
|
578 |
df_under = df.sort_values(by=["fn_proportion"], ascending=False).head(n)
|
579 |
df_under = df_under[df_under["fn_proportion"] > 0]
|
|
|
|
|
580 |
report_under = [get_empty_report(row["topic"], "System is under-sensitive") for _, row in df_under.iterrows()]
|
581 |
|
582 |
df_over = df.sort_values(by=["fp_proportion"], ascending=False).head(n)
|
583 |
df_over = df_over[df_over["fp_proportion"] > 0]
|
|
|
|
|
584 |
report_over = [get_empty_report(row["topic"], "System is over-sensitive") for _, row in df_over.iterrows()]
|
585 |
|
586 |
reports = (report_under + report_over)
|
587 |
random.shuffle(reports)
|
588 |
else:
|
589 |
df = df.sort_values(by=[topic_vis_method], ascending=False).head(n * 2)
|
590 |
-
df["error_type"] = [utils.get_error_type_radio(sys, user, threshold) for sys, user in zip(df["
|
591 |
reports = [get_empty_report(row["topic"], row["error_type"]) for _, row in df.iterrows()]
|
592 |
|
593 |
return reports
|
|
|
203 |
########################################
|
204 |
# ROUTE: /GET_GROUP_MODEL
|
205 |
@app.route("/get_group_model")
|
206 |
+
def get_group_model(debug=DEBUG):
|
207 |
# Fetch info for initial labeling component
|
208 |
model_name = request.args.get("model_name")
|
209 |
user = request.args.get("user")
|
|
|
236 |
mae, mse, rmse, avg_diff, ratings_prev = utils.train_updated_model(model_name, ratings_grp, user)
|
237 |
|
238 |
duration = time.time() - start
|
239 |
+
if debug:
|
240 |
+
print("Time to train/cache:", duration)
|
241 |
|
242 |
context = {
|
243 |
"group_size": group_size,
|
|
|
361 |
########################################
|
362 |
# ROUTE: /GET_PERSONALIZED_MODEL_TOPIC
|
363 |
@app.route("/get_personalized_model_topic")
|
364 |
+
def get_personalized_model_topic(debug=DEBUG):
|
365 |
model_name = request.args.get("model_name")
|
366 |
ratings_json = request.args.get("ratings")
|
367 |
user = request.args.get("user")
|
368 |
ratings = json.loads(ratings_json)
|
369 |
topic = request.args.get("topic")
|
370 |
+
if debug:
|
371 |
+
print(ratings)
|
372 |
start = time.time()
|
373 |
|
374 |
# Modify model name
|
|
|
377 |
|
378 |
# Handle existing or new model cases
|
379 |
# Train model and cache predictions using new labels
|
380 |
+
if debug:
|
381 |
+
print("get_personalized_model_topic train")
|
382 |
mae, mse, rmse, avg_diff, ratings_prev = utils.train_updated_model(model_name, ratings, user, topic=topic)
|
383 |
|
384 |
+
if debug:
|
385 |
+
duration = time.time() - start
|
386 |
+
print("Time to train/cache:", duration)
|
|
|
|
|
387 |
|
388 |
results = {
|
389 |
"success": "success",
|
|
|
500 |
topic_errors = {}
|
501 |
for topic in topics:
|
502 |
t_df = df[df["topic"] == topic]
|
503 |
+
y_true = t_df["pred"].to_numpy() # Predicted user rating (treated as ground truth)
|
504 |
+
y_pred = t_df["rating_sys"].to_numpy() # System rating (which we're auditing)
|
505 |
if topic_vis_method == "mae":
|
506 |
t_err = mean_absolute_error(y_true, y_pred)
|
507 |
elif topic_vis_method == "mse":
|
|
|
509 |
elif topic_vis_method == "avg_diff":
|
510 |
t_err = np.mean(y_true - y_pred)
|
511 |
elif topic_vis_method == "fp_proportion":
|
512 |
+
y_true = [0 if rating < threshold else 1 for rating in y_true]
|
513 |
+
y_pred = [0 if rating < threshold else 1 for rating in y_pred]
|
514 |
try:
|
515 |
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
|
516 |
except:
|
|
|
518 |
total = float(len(y_true))
|
519 |
t_err = fp / total
|
520 |
elif topic_vis_method == "fn_proportion":
|
521 |
+
y_true = [0 if rating < threshold else 1 for rating in y_true]
|
522 |
+
y_pred = [0 if rating < threshold else 1 for rating in y_pred]
|
523 |
try:
|
524 |
tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()
|
525 |
except:
|
|
|
530 |
|
531 |
return topic_errors
|
532 |
|
533 |
+
def get_personal_scaffold(cur_user, model, topic_vis_method, n_topics=200, n=5, debug=DEBUG):
|
534 |
threshold = utils.get_toxic_threshold()
|
535 |
|
536 |
# Get topics with greatest amount of error
|
537 |
preds_file = utils.get_preds_file(cur_user, model)
|
538 |
with open(preds_file, "rb") as f:
|
539 |
preds_df = pickle.load(f)
|
540 |
+
preds_df_mod = preds_df[preds_df["user_id"] == cur_user].sort_values(by=["item_id"]).reset_index()
|
|
|
|
|
541 |
preds_df_mod = preds_df_mod[preds_df_mod["topic_id"] < n_topics]
|
542 |
|
543 |
if topic_vis_method == "median":
|
|
|
556 |
df = preds_df_mod.groupby(["topic", "user_id"]).mean().reset_index()
|
557 |
|
558 |
# Get system error
|
559 |
+
junk_topics = ["53_maiareficco_kallystas_dyisisitmanila_tractorsazi", "-1_dude_bullshit_fight_ain"]
|
560 |
+
df = df[~df["topic"].isin(junk_topics)] # Exclude known "junk topics"
|
561 |
|
562 |
if topic_vis_method == "median" or topic_vis_method == "mean":
|
563 |
+
df["error_magnitude"] = [utils.get_error_magnitude(sys, user, threshold) for sys, user in zip(df["rating_sys"].tolist(), df["pred"].tolist())]
|
564 |
+
df["error_type"] = [utils.get_error_type_radio(sys, user, threshold) for sys, user in zip(df["rating_sys"].tolist(), df["pred"].tolist())]
|
565 |
|
566 |
df_under = df[df["error_type"] == "System is under-sensitive"]
|
567 |
df_under = df_under.sort_values(by=["error_magnitude"], ascending=False).head(n) # surface largest errors first
|
|
|
577 |
elif topic_vis_method == "fp_fn":
|
578 |
df_under = df.sort_values(by=["fn_proportion"], ascending=False).head(n)
|
579 |
df_under = df_under[df_under["fn_proportion"] > 0]
|
580 |
+
if debug:
|
581 |
+
print(df_under[["topic", "fn_proportion"]])
|
582 |
report_under = [get_empty_report(row["topic"], "System is under-sensitive") for _, row in df_under.iterrows()]
|
583 |
|
584 |
df_over = df.sort_values(by=["fp_proportion"], ascending=False).head(n)
|
585 |
df_over = df_over[df_over["fp_proportion"] > 0]
|
586 |
+
if debug:
|
587 |
+
print(df_over[["topic", "fp_proportion"]])
|
588 |
report_over = [get_empty_report(row["topic"], "System is over-sensitive") for _, row in df_over.iterrows()]
|
589 |
|
590 |
reports = (report_under + report_over)
|
591 |
random.shuffle(reports)
|
592 |
else:
|
593 |
df = df.sort_values(by=[topic_vis_method], ascending=False).head(n * 2)
|
594 |
+
df["error_type"] = [utils.get_error_type_radio(sys, user, threshold) for sys, user in zip(df["rating_sys"].tolist(), df["pred"].tolist())]
|
595 |
reports = [get_empty_report(row["topic"], row["error_type"]) for _, row in df.iterrows()]
|
596 |
|
597 |
return reports
|