|
from mmdet.datasets import RefCocoDataset |
|
|
|
from mmdet.datasets.transforms import LoadAnnotations |
|
from mmdet.evaluation import RefSegMetric |
|
import argparse |
|
from mmengine.config import Config |
|
from xtuner.model.utils import guess_load_checkpoint |
|
from xtuner.registry import BUILDER |
|
from xtuner.utils.constants import DEFAULT_IMAGE_TOKEN |
|
from accelerate import Accelerator |
|
from accelerate.utils import gather_object |
|
from mmdet.structures.mask import BitmapMasks |
|
|
|
from tqdm import tqdm |
|
import torch |
|
import torch.nn.functional as F |
|
from time import time |
|
|
|
from projects.f_llm.datasets.transforms import PILLoadImageFromFile, RefCOCO2PNG |
|
|
|
if __name__ == '__main__': |
|
parser = argparse.ArgumentParser( |
|
formatter_class=argparse.RawTextHelpFormatter) |
|
parser.add_argument('config', help='config file path.') |
|
parser.add_argument('--checkpoint', default=None, type=str) |
|
parser.add_argument('--debug', action='store_true') |
|
parser.add_argument('--ceph', action='store_true') |
|
parser.add_argument('--concat', action='store_true') |
|
args = parser.parse_args() |
|
|
|
|
|
accelerator = Accelerator() |
|
|
|
message = [f"Hello this is GPU {accelerator.process_index}"] |
|
|
|
messages = gather_object(message) |
|
|
|
accelerator.print(messages) |
|
|
|
cfg = Config.fromfile(args.config) |
|
prompt_template = cfg.prompt_template |
|
tokenizer = cfg.tokenizer |
|
image_processor = cfg.image_processor |
|
prompt = cfg.get('prompt', None) |
|
model = BUILDER.build(cfg.model) |
|
|
|
if args.checkpoint is not None: |
|
state_dict = guess_load_checkpoint(args.checkpoint) |
|
missing, unexpected = model.load_state_dict(state_dict, strict=False) |
|
accelerator.print(f"Unexpected parameters: {unexpected}") |
|
|
|
print(f"Start moving model to device: {accelerator.device}", flush=True) |
|
tik = time() |
|
model = model.to(device=accelerator.device) |
|
print( |
|
f"Finished moving model to device: {accelerator.device}, time used: {time() - tik}", flush=True) |
|
model.eval() |
|
|
|
if args.ceph: |
|
backend_args = dict( |
|
backend='petrel', |
|
path_mapping=dict({ |
|
'data/coco/train2014/': 'openmmlab:s3://openmmlab/datasets/detection/coco/train2014/' |
|
})) |
|
else: |
|
backend_args = None |
|
|
|
refcoco2png_params = dict( |
|
type=RefCOCO2PNG, |
|
image_processor=image_processor, |
|
tokenizer=tokenizer, |
|
prompt_template=prompt_template, |
|
concat=args.concat, |
|
image2tensor=cfg.get('image2tensor', True), |
|
add_image_token=cfg.get('add_image_token', False), |
|
image_token=cfg.get('image_token', DEFAULT_IMAGE_TOKEN) |
|
) |
|
accelerator.print(f"Do concatenation? {args.concat}") |
|
if prompt is not None: |
|
refcoco2png_params.update(prompt=prompt) |
|
|
|
|
|
test_pipeline = [ |
|
dict(type=PILLoadImageFromFile, backend_args=backend_args), |
|
dict( |
|
type=LoadAnnotations, |
|
with_mask=True, |
|
with_bbox=False, |
|
with_seg=False, |
|
with_label=False), |
|
refcoco2png_params |
|
] |
|
|
|
refcoco_subsets = dict() |
|
for split in ['val', 'testA', 'testB']: |
|
refcoco_subsets[f'refcoco_{split}'] = dict( |
|
ann_file='refcoco/instances.json', |
|
split_file='refcoco/refs(unc).p', |
|
split=split) |
|
|
|
for split in ['val', 'testA', 'testB']: |
|
refcoco_subsets[f'refcoco+_{split}'] = dict( |
|
ann_file='refcoco+/instances.json', |
|
split_file='refcoco+/refs(unc).p', |
|
split=split) |
|
|
|
for split in ['val', 'test']: |
|
refcoco_subsets[f'refcocog_{split}'] = dict( |
|
ann_file='refcocog/instances.json', |
|
split_file='refcocog/refs(umd).p', |
|
split=split) |
|
|
|
for name, subset in refcoco_subsets.items(): |
|
accelerator.print(f"Start evaluating {name}") |
|
dataset = RefCocoDataset( |
|
data_root='data/coco/', |
|
data_prefix=dict(img_path='train2014/'), |
|
text_mode='select_first', |
|
pipeline=test_pipeline, |
|
**subset |
|
) |
|
|
|
accelerator.wait_for_everyone() |
|
|
|
data_ids = list(range(len(dataset))) |
|
if args.debug: |
|
data_ids = data_ids[:100] |
|
|
|
results = [] |
|
|
|
with accelerator.split_between_processes(data_ids) as sub_ids: |
|
for idx in tqdm(sub_ids, disable=not accelerator.is_main_process): |
|
data_sample = dataset[idx] |
|
if args.concat: |
|
with torch.no_grad(): |
|
pred_mask_logits = model.predict(data_sample) |
|
|
|
gt_masks = data_sample['gt_masks'].numpy() > 0 |
|
pred_masks = F.interpolate(pred_mask_logits[None].float().sigmoid(), |
|
size=gt_masks.shape[-2:], mode='bilinear')[0].cpu() |
|
pred_masks = pred_masks > 0.5 |
|
|
|
assert len(pred_masks) == len(gt_masks) |
|
mask_cnt = pred_masks.shape[0] |
|
|
|
|
|
results.append(dict(pred_instances=dict(masks=pred_masks), |
|
gt_masks=BitmapMasks(masks=gt_masks, |
|
height=gt_masks.shape[1], |
|
width=gt_masks.shape[2])) |
|
) |
|
else: |
|
for sub_data_sample in data_sample: |
|
with torch.no_grad(): |
|
pred_mask_logits = model.predict(sub_data_sample) |
|
|
|
gt_masks = sub_data_sample['gt_masks'].numpy() > 0 |
|
pred_masks = F.interpolate(pred_mask_logits[None].float().sigmoid(), |
|
size=gt_masks.shape[-2:], mode='bilinear')[0].cpu() |
|
pred_masks = pred_masks > 0.5 |
|
|
|
assert len(pred_masks) == len(gt_masks) |
|
mask_cnt = pred_masks.shape[0] |
|
assert mask_cnt == 1 |
|
|
|
|
|
results.append(dict(pred_instances=dict(masks=pred_masks), |
|
gt_masks=BitmapMasks(masks=gt_masks, |
|
height=gt_masks.shape[1], |
|
width=gt_masks.shape[2])) |
|
) |
|
results = gather_object(results) |
|
if accelerator.is_main_process: |
|
accelerator.print( |
|
f"Collected {len(results)} result samples from all gpus") |
|
evaluator = RefSegMetric(metric=['cIoU', 'mIoU']) |
|
evaluator.process(data_batch=dict(), data_samples=results) |
|
metrics = evaluator.compute_metrics(evaluator.results) |
|
accelerator.print(f"Evaluation results on {name}: {metrics}") |
|
accelerator.print(f"Finished evaluating {name}") |
|
|