Spaces:
Running
on
Zero
Running
on
Zero
| from ola_vlm.eval.mmstar.smp import * | |
| from copy import deepcopy | |
| def MMStar_eval(eval_file): | |
| MMStar_score_l2 = { | |
| 'coarse perception': { | |
| 'image scene and topic': 0, | |
| 'image style & quality': 0, | |
| 'image emotion': 0 | |
| }, | |
| 'fine-grained perception': { | |
| 'object counting': 0, | |
| 'recognition': 0, | |
| 'localization': 0 | |
| }, | |
| 'instance reasoning': { | |
| 'single-instance reasoning': 0, | |
| 'cross-instance attribute reasoning': 0, | |
| 'cross-instance relation reasoning': 0 | |
| }, | |
| 'logical reasoning': { | |
| 'code & sequence reasoning': 0, | |
| 'diagram reasoning': 0, | |
| 'common reasoning': 0 | |
| }, | |
| 'science & technology': { | |
| 'biology & chemistry & physics': 0, | |
| 'electronics & energy & mechanical eng.': 0, | |
| 'geography & earth science & agriculture': 0 | |
| }, | |
| 'math': { | |
| 'geometry': 0, | |
| 'numeric commonsense and calculation': 0, | |
| 'statistical reasoning': 0 | |
| }, | |
| } | |
| MMStar_counter = deepcopy(MMStar_score_l2) | |
| logger = get_logger('Evaluation') | |
| data = load(eval_file) | |
| lt = len(data) | |
| lines = [data[i] for i in range(lt)] | |
| for i in tqdm(range(len(lines))): | |
| line = lines[i] | |
| predict = str(line['prediction']) | |
| answers = str(line['answer']) | |
| category = str(line['category']) | |
| l2_category = str(line['l2_category']) | |
| MMStar_counter[category][l2_category] += 1 | |
| answer = answers.lower().strip().replace('\n', ' ') | |
| predict = predict.lower().strip().replace('\n', ' ') | |
| try: | |
| if answer == predict[0]: | |
| MMStar_score_l2[category][l2_category] += 1 | |
| elif predict[0] == '(' and answer == predict[1]: | |
| MMStar_score_l2[category][l2_category] += 1 | |
| elif predict[0:7] == 'option ' and answer == predict[7]: | |
| MMStar_score_l2[category][l2_category] += 1 | |
| elif predict[0:14] == 'the answer is ' and answer == predict[14]: | |
| MMStar_score_l2[category][l2_category] += 1 | |
| except Exception as e: | |
| pass | |
| MMStar_score = {} | |
| MMStar_score['final score'] = 0 | |
| for k, v in MMStar_score_l2.items(): | |
| MMStar_score[k] = 0 | |
| for l2_k, l2_v in v.items(): | |
| MMStar_score[f'{k}({l2_k})'] = float(l2_v) / \ | |
| float(MMStar_counter[k][l2_k]) | |
| MMStar_score[k] += l2_v | |
| MMStar_score['final score'] += MMStar_score[k] | |
| MMStar_score[k] = float(MMStar_score[k]) / 250.0 | |
| MMStar_score['final score'] = float(MMStar_score['final score']) / 1500.0 | |
| score_pth = eval_file.replace('.jsonl', '_score.json') | |
| dump(MMStar_score, score_pth) | |
| logger.info( | |
| f'MMStar_eval successfully finished evaluating {eval_file}, results saved in {score_pth}') | |
| logger.info('Score: ') | |
| for key, value in MMStar_score.items(): | |
| logger.info('{}:{}'.format(key, value)) | |
| return MMStar_score | |