Spaces:
Runtime error
Runtime error
''' | |
use this command in terminal to run the evaluation script | |
torchrun --master-port 8888 --nproc_per_node 1 eval_scripts/model_evaluation.py --cfg-path eval_configs/minigptv2_benchmark_evaluation.yaml --dataset | |
''' | |
import sys | |
sys.path.append('.') | |
import os | |
import re | |
import json | |
import argparse | |
from collections import defaultdict | |
import random | |
import numpy as np | |
from PIL import Image | |
from tqdm import tqdm | |
import torch | |
from torch.utils.data import DataLoader | |
from minigpt4.common.config import Config | |
from minigpt4.common.eval_utils import prepare_texts, init_model, eval_parser, computeIoU | |
from minigpt4.conversation.conversation import CONV_VISION_minigptv2 | |
from minigpt4.datasets.datasets.mimic_cxr_dataset import evalMIMICDataset, evalDetectMimicDataset | |
from minigpt4.datasets.datasets.radvqa_dataset import evalRadVQADataset | |
from minigpt4.datasets.datasets.nlst_dataset import eval_NLST_Dataset | |
from minigpt4.datasets.datasets.rsna_dataset import evalRSNADataset | |
from minigpt4.datasets.datasets.SLAKE_dataset import evalSLAKEDataset | |
#import cleaning classes | |
from eval_scripts.clean_json import clean_mimic_json, clean_vqa_json, clean_detection_json | |
from eval_scripts.metrics import MIMIC_BERT_Sim, VQA_BERT_Sim, average_iou | |
def list_of_str(arg): | |
return list(map(str, arg.split(','))) | |
parser = eval_parser() | |
parser.add_argument("--dataset", type=list_of_str, help="dataset to evaluate") | |
args = parser.parse_args() | |
cfg = Config(args) | |
model, vis_processor = init_model(args) | |
model.eval() | |
CONV_VISION = CONV_VISION_minigptv2 | |
conv_temp = CONV_VISION.copy() | |
conv_temp.system = "" | |
model.eval() | |
save_path = cfg.run_cfg.save_path | |
def process_mimic_dataset(): | |
eval_file_path = cfg.evaluation_datasets_cfg[dataset]["eval_file_path"] | |
img_path = cfg.evaluation_datasets_cfg[dataset]["img_path"] | |
batch_size = cfg.evaluation_datasets_cfg[dataset]["batch_size"] | |
max_new_tokens = cfg.evaluation_datasets_cfg[dataset]["max_new_tokens"] | |
with open((eval_file_path), 'r') as f: | |
mimic = json.load(f) | |
data = evalMIMICDataset(mimic, vis_processor, img_path) | |
eval_dataloader = DataLoader(data, batch_size=batch_size, shuffle=False) | |
minigpt4_predict = defaultdict(list) | |
for images, questions, img_ids in tqdm(eval_dataloader): | |
texts = prepare_texts(questions, conv_temp) # warp the texts with conversation template | |
answers = model.generate(images, texts, max_new_tokens=max_new_tokens, do_sample=False) | |
for answer, img_id, question in zip(answers, img_ids, questions): | |
minigpt4_predict[img_id].append(answer) | |
file_save_path = os.path.join(save_path,"MIMIC_inference_results_stage3.json") | |
with open(file_save_path,'w') as f: | |
json.dump(minigpt4_predict, f) | |
clean_mimic_json(file_save_path, file_save_path) | |
# csv file path to save the BERT results per each case | |
output_csv_path = '/miniGPT-Med/metric_results/bert_similarity_scores.csv' | |
# in MIMIC_BERT_Sim add the path of the ground_truth then the path of the inference result | |
average_similarity = MIMIC_BERT_Sim(eval_file_path, file_save_path, output_csv_path) | |
#print the average BERT_Sim | |
print("Average BERT Similarity:", average_similarity) | |
def process_vqa_dataset(): | |
eval_file_path = cfg.evaluation_datasets_cfg[dataset]["eval_file_path"] | |
img_path = cfg.evaluation_datasets_cfg[dataset]["img_path"] | |
batch_size = cfg.evaluation_datasets_cfg[dataset]["batch_size"] | |
max_new_tokens = cfg.evaluation_datasets_cfg[dataset]["max_new_tokens"] | |
with open((eval_file_path), 'r') as f: | |
radVQA = json.load(f) | |
data = evalRadVQADataset(radVQA, vis_processor, img_path) | |
eval_dataloader = DataLoader(data, batch_size=batch_size, shuffle=False) | |
minigpt4_predict = defaultdict(list) | |
for images, questions, img_ids in tqdm(eval_dataloader): | |
texts = prepare_texts(questions, conv_temp) # warp the texts with conversation template | |
answers = model.generate(images, texts, max_new_tokens=max_new_tokens, do_sample=False) | |
for answer, img_id, question in zip(answers, img_ids, questions): | |
minigpt4_predict[img_id].append({"key":img_ids,"question": question.replace("[vqa]", "").strip() , "answer": answer}) | |
file_save_path = os.path.join(save_path,"radVQA_inference_results.json") | |
output_csv_path = '/miniGPT-Med/BERT_Sim_results/vqa_bert_similarity_scores.csv' | |
with open(file_save_path,'w') as f: | |
json.dump(minigpt4_predict, f) | |
clean_vqa_json(file_save_path, file_save_path) | |
VQA_BERT_Sim(eval_file_path, file_save_path, output_csv_path) | |
def process_nlst_dataset(): | |
eval_file_path = cfg.evaluation_datasets_cfg[dataset]["eval_file_path"] | |
img_path = cfg.evaluation_datasets_cfg[dataset]["img_path"] | |
batch_size = cfg.evaluation_datasets_cfg[dataset]["batch_size"] | |
max_new_tokens = cfg.evaluation_datasets_cfg[dataset]["max_new_tokens"] | |
with open((eval_file_path), 'r') as f: | |
nlst = json.load(f) | |
data = eval_NLST_Dataset(nlst, vis_processor, img_path) | |
eval_dataloader = DataLoader(data, batch_size=batch_size, shuffle=False) | |
minigpt4_predict = defaultdict(list) | |
resamples = [] | |
for images, questions, img_ids in tqdm(eval_dataloader): | |
texts = prepare_texts(questions, conv_temp) | |
answers = model.generate(images, texts, max_new_tokens=max_new_tokens, do_sample=False) | |
for answer, img_id, question in zip(answers, img_ids, questions): | |
# answer = answer.replace("<unk>","").replace(" ","").strip() | |
pattern = r'\{<\d{1,2}><\d{1,2}><\d{1,2}><\d{1,2}>\}' | |
minigpt4_predict[img_id].append(answer) | |
file_save_path = os.path.join(save_path,"NLST_inference_result.json") | |
with open(file_save_path,'w') as f: | |
json.dump(minigpt4_predict, f) | |
csv_pth = os.path.join(save_path,"NLST_IoU_results.csv") | |
clean_detection_json(file_save_path,file_save_path) | |
average_iou(eval_file_path, file_save_path, 512, 100, "NLST", csv_pth) | |
def process_rsna_dataset(): | |
eval_file_path = cfg.evaluation_datasets_cfg[dataset]["eval_file_path"] | |
print(eval_file_path) | |
img_path = cfg.evaluation_datasets_cfg[dataset]["img_path"] | |
batch_size = cfg.evaluation_datasets_cfg[dataset]["batch_size"] | |
max_new_tokens = cfg.evaluation_datasets_cfg[dataset]["max_new_tokens"] | |
print("----config----") | |
with open((eval_file_path), 'r') as f: | |
nlst = json.load(f) | |
data = evalRSNADataset(nlst, vis_processor, img_path) | |
eval_dataloader = DataLoader(data, batch_size=batch_size, shuffle=False) | |
minigpt4_predict = defaultdict(list) | |
resamples = [] | |
for images, questions, img_ids in tqdm(eval_dataloader): | |
texts = prepare_texts(questions, conv_temp) | |
answers = model.generate(images, texts, max_new_tokens=max_new_tokens, do_sample=False) | |
for answer, img_id, question in zip(answers, img_ids, questions): | |
# answer = answer.replace("<unk>","").replace(" ","").strip() | |
pattern = r'\{<\d{1,2}><\d{1,2}><\d{1,2}><\d{1,2}>\}' | |
minigpt4_predict[img_id].append(answer) | |
print(img_id) | |
print(answer) | |
file_save_path = os.path.join(save_path,"RSNA_inference_result.json") | |
with open(file_save_path,'w') as f: | |
json.dump(minigpt4_predict, f) | |
csv_pth = os.path.join(save_path,"RSNA_IoU_results.csv") | |
clean_detection_json(file_save_path,file_save_path) | |
average_iou(eval_file_path, file_save_path, 1024, 100, "rsna", csv_pth) | |
def process_detect_mimic(): | |
eval_file_path = cfg.evaluation_datasets_cfg[dataset]["eval_file_path"] | |
img_path = cfg.evaluation_datasets_cfg[dataset]["img_path"] | |
batch_size = cfg.evaluation_datasets_cfg[dataset]["batch_size"] | |
max_new_tokens = cfg.evaluation_datasets_cfg[dataset]["max_new_tokens"] | |
with open((eval_file_path), 'r') as f: | |
nlst = json.load(f) | |
data = evalDetectMimicDataset(nlst, vis_processor, img_path) | |
eval_dataloader = DataLoader(data, batch_size=batch_size, shuffle=False) | |
minigpt4_predict = defaultdict(list) | |
resamples = [] | |
for images, questions, img_ids in tqdm(eval_dataloader): | |
texts = prepare_texts(questions, conv_temp) | |
answers = model.generate(images, texts, max_new_tokens=max_new_tokens, do_sample=False) | |
for answer, img_id, question in zip(answers, img_ids, questions): | |
pattern = r'\{<\d{1,2}><\d{1,2}><\d{1,2}><\d{1,2}>\}' | |
minigpt4_predict[img_id].append(answer) | |
file_save_path = os.path.join(save_path,"Detect_MIMIC_inference_result.json") | |
with open(file_save_path,'w') as f: | |
json.dump(minigpt4_predict, f) | |
csv_pth = os.path.join(save_path,"MIMIC_IoU_results.csv") | |
clean_detection_json(file_save_path,file_save_path) | |
average_iou(eval_file_path, file_save_path, "to be specified soon", 100, "MIMIC", csv_pth) | |
def process_SLAKE_dataset(): | |
eval_file_path = cfg.evaluation_datasets_cfg[dataset]["eval_file_path"] | |
img_path = cfg.evaluation_datasets_cfg[dataset]["img_path"] | |
batch_size = cfg.evaluation_datasets_cfg[dataset]["batch_size"] | |
max_new_tokens = cfg.evaluation_datasets_cfg[dataset]["max_new_tokens"] | |
with open((eval_file_path), 'r') as f: | |
SLAKE = json.load(f) | |
data = evalSLAKEDataset(SLAKE, vis_processor, img_path) | |
eval_dataloader = DataLoader(data, batch_size=batch_size, shuffle=False) | |
minigpt4_predict = defaultdict(list) | |
resamples = [] | |
for images, questions, img_ids in tqdm(eval_dataloader): | |
texts = prepare_texts(questions, conv_temp) | |
answers = model.generate(images, texts, max_new_tokens=max_new_tokens, do_sample=False) | |
for answer, img_id, question in zip(answers, img_ids, questions): | |
# answer = answer.replace("<unk>","").replace(" ","").strip() | |
pattern = r'\{<\d{1,2}><\d{1,2}><\d{1,2}><\d{1,2}>\}' | |
minigpt4_predict[img_id].append(answer) | |
file_save_path = os.path.join(save_path,"SLAKE_inference_result.json") | |
with open(file_save_path,'w') as f: | |
json.dump(minigpt4_predict, f) | |
csv_pth = os.path.join(save_path,"SLAKE_IoU_results.csv") | |
clean_detection_json(file_save_path,file_save_path) | |
average_iou(eval_file_path, file_save_path, 100, 100, "SLAKE", csv_pth) | |
############################################################################ | |
for dataset in args.dataset: | |
if dataset == 'mimic_cxr': | |
process_mimic_dataset() | |
elif dataset == 'radvqa': | |
process_vqa_dataset() | |
elif dataset == 'nlst': | |
process_nlst_dataset() | |
elif dataset == 'rsna': | |
process_rsna_dataset() | |
elif dataset == 'detect_mimic': | |
process_detect_mimic() | |
elif dataset == 'SLAKE': | |
process_SLAKE_dataset() | |
else: | |
print(f"Dataset '{dataset}' is not supported.") |