djindjin's picture
sample size note
f798a78
import gradio as gr
import pandas as pd
import numpy as np
import random
num_labels = 3
headers = ["Benign","C1","C2"]
default_weights = pd.DataFrame([[0,1,2],[1,0,1],[2,1,0]],columns=headers)
example_conf_mats = [
pd.DataFrame([[80,10,0],
[20,80,20],
[0,10,80]],columns=headers),
pd.DataFrame([[80,10,10],
[10,80,10],
[10,10,80]],columns=headers),
pd.DataFrame([[80,10,20],
[0,80,0],
[20,10,80]],columns=headers),
pd.DataFrame([[800,100,100],
[100,800,100],
[100,100,800]],columns=headers),
pd.DataFrame([[800,100,200],
[0,800,0],
[200,100,800]],columns=headers),
]
def submit_vals(*argv):
argv = list(argv)
weights = np.zeros((num_labels, num_labels))
for i in range(num_labels):
for j in range(num_labels):
if i != j:
weights[i][j] = argv.pop(0)
weights_df = pd.DataFrame(weights, columns=headers)
return weights_df
def get_severity(input_df, weights_df):
weights_df.columns = input_df.columns
total = sum(sum(np.array(input_df)))
diag_total = sum(np.diag(input_df))
non_diag_total = total - diag_total
if non_diag_total == 0:
severity = 0
else:
severity = sum(sum(np.array(input_df.multiply(weights_df)))) / non_diag_total
return severity
def get_acc(input_df, weights_df):
input_df = input_df.astype(int)
accuracy = 100 * sum(np.diag(input_df)) / sum(sum(np.array(input_df)))
return accuracy, get_severity(input_df, weights_df)
def flatten(df):
return df.to_numpy().flatten()
def counts_to_df(sampled_vals, num_labels):
mat = np.zeros((num_labels,num_labels), dtype=int)
for v in sampled_vals:
q, mod = divmod(v, num_labels)
mat[q][mod] += 1
return pd.DataFrame(mat)
def bootstrap_sample(conf_mat, k):
num_labels = len(conf_mat.columns)
return counts_to_df(random.choices(population=range(num_labels**2), weights=flatten(conf_mat), k=k), num_labels)
def bootstrap_ci(conf_mat, weights_df, iters, k, percentile=95):
iters, k = int(iters), int(k)
vals = []
for i in range(iters):
print()
vals.append(get_severity(bootstrap_sample(conf_mat, k), weights_df))
dif = (100 - percentile) / 2
return [np.percentile(vals, dif), np.percentile(vals, 100-dif)]
with gr.Blocks() as demo:
with gr.Tab("Error severity matrix"):
with gr.Row():
with gr.Column():
sliders = []
for i in range(num_labels):
for j in range(num_labels):
if i != j:
sliders.append(gr.Slider(1, 5, value=np.abs(i-j), step=1, label="Impact of misclassifying "+ headers[j] + " as " + headers[i]))
submit_btn = gr.Button("Submit")
with gr.Column():
output_err_mat = gr.Dataframe(value = default_weights, datatype = "number", row_count = (num_labels, "fixed"), col_count=(num_labels,"fixed"), label="Error Severity Matrix", interactive=0, headers=headers)
submit_btn.click(submit_vals, inputs=sliders, outputs=output_err_mat)
with gr.Tab("Calculate accuracy and Error Severity"):
with gr.Row():
with gr.Column():
conf_df = gr.Dataframe(datatype = "number", row_count = (num_labels, "fixed"), col_count=(num_labels,"fixed"), label="Confusion Matrix", interactive=1, headers=headers)
submit_btn = gr.Button("Submit")
examples = gr.Examples(examples=example_conf_mats, inputs=[conf_df])
with gr.Column():
outputs = [gr.Textbox(label="Accuracy"), gr.Textbox(label="Error Severity")]
submit_btn.click(fn=get_acc, inputs=[conf_df,output_err_mat], outputs=outputs)
with gr.Row():
with gr.Column():
gr.Markdown("Use bootstrapping to compute the 95% confidence interval for the Error Severity Index above. As a rule of thumb, sample size can be set to the sum of values in the confusion matrix.")
ci_inputs = [gr.Textbox(label="Iterations", value=100), gr.Textbox(label="Sample size", value=300)]
submit_btn2 = gr.Button("Submit")
with gr.Column():
output_ci = gr.Textbox(label="95% Confidence Interval")
submit_btn2.click(fn=bootstrap_ci, inputs=[conf_df,output_err_mat, ci_inputs[0], ci_inputs[1]], outputs = output_ci)
demo.launch()