erceguder's picture
initial version
522b959
from PIL import Image
import gradio as gr
import numpy as np
from datasets import load_dataset
import os
import tempfile
dataset = load_dataset("erceguder/histocan-test", token=os.environ["HF_TOKEN"])
COLOR_PALETTE = {
'others': (0, 0, 0),
't-g1': (0, 192, 0),
't-g2': (255, 224, 32),
't-g3': (255, 0, 0),
'normal-mucosa': (0, 32, 255)
}
def files_uploaded(paths):
if len(paths) != 16:
raise gr.Error("16 segmentation masks are needed.")
uploaded_file_names = [paths[i].name.split('/')[-1] for i in range(16)]
for i in range(16):
if f"test{i:04d}.png" not in uploaded_file_names:
raise gr.Error(f"Uploaded file names are not recognized.")
def evaluate(paths):
if paths == None:
raise gr.Error("Upload segmentation masks first!")
# Init dicts for accumulating image metrics and calculating per-class scores
metrics = {}
for class_ in COLOR_PALETTE.keys():
idict = {
"tp": 0.0,
"fp": 0.0,
"tn": 0.0,
"fn": 0.0,
}
metrics[class_] = idict
scores = {}
for class_ in COLOR_PALETTE.keys():
idict = {
"recall": 0.0,
"precision": 0.0,
"f1": 0.0
}
scores[class_] = idict
tmpdir = tempfile.TemporaryDirectory()
for path in paths:
os.rename(path.name, os.path.join(tmpdir.name, path.name.split('/')[-1]))
for item in dataset["test"]:
pred_path = os.path.join(tmpdir.name, item["name"])
pred = np.array(Image.open(pred_path))
gt = np.array(item["annotation"])
assert gt.ndim == 2
assert pred.ndim == 3 and pred.shape[-1] == 3
assert gt.shape == pred.shape[:-1]
# Get predictions for all classes
out = [(pred == color).all(axis=-1) for color in COLOR_PALETTE.values()]
maps = np.stack(out)
# Calculate confusion matrix and metrics
for i, class_ in enumerate(COLOR_PALETTE.keys()):
class_pred = maps[i]
class_gt = (gt == i)
tp = np.sum(class_pred[class_gt==True])
fp = np.sum(class_pred[class_gt==False])
tn = np.sum(np.logical_not(class_pred)[class_gt==False])
fn = np.sum(np.logical_not(class_pred)[class_gt==True])
# Accumulate metrics for each class
metrics[class_]['tp'] += tp
metrics[class_]['fp'] += fp
metrics[class_]['tn'] += tn
metrics[class_]['fn'] += fn
# Init mean recall, precision and F1 score
mRecall = 0.0
mPrecision = 0.0
mF1 = 0.0
# Calculate recall, precision and f1 scores for each class
for i, class_ in enumerate(COLOR_PALETTE.keys()):
scores[class_]['recall'] = metrics[class_]['tp'] / (metrics[class_]['tp'] + metrics[class_]['fn']) if metrics[class_]['tp'] > 0 else 0.0
scores[class_]['precision'] = metrics[class_]['tp'] / (metrics[class_]['tp'] + metrics[class_]['fp']) if metrics[class_]['tp'] > 0 else 0.0
scores[class_]['f1'] = 2 * scores[class_]['precision'] * scores[class_]['recall'] / (scores[class_]['precision'] + scores[class_]['recall']) if (scores[class_]['precision'] != 0 and scores[class_]['recall'] != 0) else 0.0
mRecall += scores[class_]['recall']
mPrecision += scores[class_]['precision']
mF1 += scores[class_]['f1']
# Calculate mean recall, precision and F1 score over all classes
class_count = len(COLOR_PALETTE)
mRecall /= class_count
mPrecision /= class_count
mF1 /= class_count
tmpdir.cleanup()
result = """
<div align="center">
# Results
| | Others | T-G1 | T-G2 | T-G3 | Normal mucosa |
|-----------|--------|------|------|------|---------------|
| Precision | {:.2f} |{:.2f}|{:.2f}|{:.2f}| {:.2f} |
| Recall | {:.2f} |{:.2f}|{:.2f}|{:.2f}| {:.2f} |
| Dice | {:.2f} |{:.2f}|{:.2f}|{:.2f}| {:.2f} |
### mPrecision: {:.4f}
### mRecall: {:.4f}
### mDice: {:.4f}
</div>
"""
result = result.format(
scores["others"]["precision"],
scores["t-g1"]["precision"],
scores["t-g2"]["precision"],
scores["t-g3"]["precision"],
scores["normal-mucosa"]["precision"],
scores["others"]["recall"],
scores["t-g1"]["recall"],
scores["t-g2"]["recall"],
scores["t-g3"]["recall"],
scores["normal-mucosa"]["recall"],
scores["others"]["f1"],
scores["t-g1"]["f1"],
scores["t-g2"]["f1"],
scores["t-g3"]["f1"],
scores["normal-mucosa"]["f1"],
mPrecision,
mRecall,
mF1
)
return gr.Markdown(value=result)
if __name__ == "__main__":
with gr.Blocks() as demo:
gr.Markdown("# Histocan Test Set Evaluation Page")
files = gr.File(label="Upload your segmentation masks for the test set", file_count="multiple", file_types=["image"])
run = gr.Button(value="Evaluate!")
output = gr.Markdown(value="")
files.upload(files_uploaded, files, [])
run.click(evaluate, files, [output])
demo.queue(max_size=1)
demo.launch()