import gradio as gr import pandas as pd import numpy as np import random num_labels = 3 headers = ["Benign","C1","C2"] default_weights = pd.DataFrame([[0,1,2],[1,0,1],[2,1,0]],columns=headers) example_conf_mats = [ pd.DataFrame([[80,10,0], [20,80,20], [0,10,80]],columns=headers), pd.DataFrame([[80,10,10], [10,80,10], [10,10,80]],columns=headers), pd.DataFrame([[80,10,20], [0,80,0], [20,10,80]],columns=headers), pd.DataFrame([[800,100,100], [100,800,100], [100,100,800]],columns=headers), pd.DataFrame([[800,100,200], [0,800,0], [200,100,800]],columns=headers), ] def submit_vals(*argv): argv = list(argv) weights = np.zeros((num_labels, num_labels)) for i in range(num_labels): for j in range(num_labels): if i != j: weights[i][j] = argv.pop(0) weights_df = pd.DataFrame(weights, columns=headers) return weights_df def get_severity(input_df, weights_df): weights_df.columns = input_df.columns total = sum(sum(np.array(input_df))) diag_total = sum(np.diag(input_df)) non_diag_total = total - diag_total if non_diag_total == 0: severity = 0 else: severity = sum(sum(np.array(input_df.multiply(weights_df)))) / non_diag_total return severity def get_acc(input_df, weights_df): input_df = input_df.astype(int) accuracy = 100 * sum(np.diag(input_df)) / sum(sum(np.array(input_df))) return accuracy, get_severity(input_df, weights_df) def flatten(df): return df.to_numpy().flatten() def counts_to_df(sampled_vals, num_labels): mat = np.zeros((num_labels,num_labels), dtype=int) for v in sampled_vals: q, mod = divmod(v, num_labels) mat[q][mod] += 1 return pd.DataFrame(mat) def bootstrap_sample(conf_mat, k): num_labels = len(conf_mat.columns) return counts_to_df(random.choices(population=range(num_labels**2), weights=flatten(conf_mat), k=k), num_labels) def bootstrap_ci(conf_mat, weights_df, iters, k, percentile=95): iters, k = int(iters), int(k) vals = [] for i in range(iters): print() vals.append(get_severity(bootstrap_sample(conf_mat, k), weights_df)) dif = (100 - percentile) / 2 return [np.percentile(vals, dif), np.percentile(vals, 100-dif)] with gr.Blocks() as demo: with gr.Tab("Error severity matrix"): with gr.Row(): with gr.Column(): sliders = [] for i in range(num_labels): for j in range(num_labels): if i != j: sliders.append(gr.Slider(1, 5, value=np.abs(i-j), step=1, label="Impact of misclassifying "+ headers[j] + " as " + headers[i])) submit_btn = gr.Button("Submit") with gr.Column(): output_err_mat = gr.Dataframe(value = default_weights, datatype = "number", row_count = (num_labels, "fixed"), col_count=(num_labels,"fixed"), label="Error Severity Matrix", interactive=0, headers=headers) submit_btn.click(submit_vals, inputs=sliders, outputs=output_err_mat) with gr.Tab("Calculate accuracy and Error Severity"): with gr.Row(): with gr.Column(): conf_df = gr.Dataframe(datatype = "number", row_count = (num_labels, "fixed"), col_count=(num_labels,"fixed"), label="Confusion Matrix", interactive=1, headers=headers) submit_btn = gr.Button("Submit") examples = gr.Examples(examples=example_conf_mats, inputs=[conf_df]) with gr.Column(): outputs = [gr.Textbox(label="Accuracy"), gr.Textbox(label="Error Severity")] submit_btn.click(fn=get_acc, inputs=[conf_df,output_err_mat], outputs=outputs) with gr.Row(): with gr.Column(): gr.Markdown("Use bootstrapping to compute the 95% confidence interval for the Error Severity Index above. As a rule of thumb, sample size can be set to the sum of values in the confusion matrix.") ci_inputs = [gr.Textbox(label="Iterations", value=100), gr.Textbox(label="Sample size", value=300)] submit_btn2 = gr.Button("Submit") with gr.Column(): output_ci = gr.Textbox(label="95% Confidence Interval") submit_btn2.click(fn=bootstrap_ci, inputs=[conf_df,output_err_mat, ci_inputs[0], ci_inputs[1]], outputs = output_ci) demo.launch()