Spaces:
Running
Running
File size: 3,517 Bytes
89040ed |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 |
import os
from tqdm import tqdm
import numpy as np
from evaluation.evaluate_audioset import AudioSetEvaluator
from evaluation.evaluate_audiocaps import AudioCapsEvaluator
from evaluation.evaluate_vggsound import VGGSoundEvaluator
from evaluation.evaluate_music import MUSICEvaluator
from evaluation.evaluate_esc50 import ESC50Evaluator
from evaluation.evaluate_clotho import ClothoEvaluator
from models.clap_encoder import CLAP_Encoder
from utils import (
load_ss_model,
calculate_sdr,
calculate_sisdr,
parse_yaml,
get_mean_sdr_from_dict,
)
def eval(checkpoint_path, config_yaml='config/audiosep_base.yaml'):
log_dir = 'eval_logs'
os.makedirs(log_dir, exist_ok=True)
device = "cuda"
configs = parse_yaml(config_yaml)
# AudioSet Evaluators
audioset_evaluator = AudioSetEvaluator()
# AudioCaps Evaluator
audiocaps_evaluator = AudioCapsEvaluator()
# VGGSound+ Evaluator
vggsound_evaluator = VGGSoundEvaluator()
# Clotho Evaluator
clotho_evaluator = ClothoEvaluator()
# MUSIC Evaluator
music_evaluator = MUSICEvaluator()
# ESC-50 Evaluator
esc50_evaluator = ESC50Evaluator()
# Load model
query_encoder = CLAP_Encoder().eval()
pl_model = load_ss_model(
configs=configs,
checkpoint_path=checkpoint_path,
query_encoder=query_encoder
).to(device)
print(f'------- Start Evaluation -------')
# evaluation on Clotho
SISDR, SDRi = clotho_evaluator(pl_model)
msg_clotho = "Clotho Avg SDRi: {:.3f}, SISDR: {:.3f}".format(SDRi, SISDR)
print(msg_clotho)
# evaluation on VGGSound+ (YAN)
SISDR, SDRi = vggsound_evaluator(pl_model)
msg_vgg = "VGGSound Avg SDRi: {:.3f}, SISDR: {:.3f}".format(SDRi, SISDR)
print(msg_vgg)
# evaluation on MUSIC
SISDR, SDRi = music_evaluator(pl_model)
msg_music = "MUSIC Avg SDRi: {:.3f}, SISDR: {:.3f}".format(SDRi, SISDR)
print(msg_music)
# evaluation on ESC-50
SISDR, SDRi = esc50_evaluator(pl_model)
msg_esc50 = "ESC-50 Avg SDRi: {:.3f}, SISDR: {:.3f}".format(SDRi, SISDR)
print(msg_esc50)
# evaluation on AudioSet
stats_dict = audioset_evaluator(pl_model=pl_model)
median_sdris = {}
median_sisdrs = {}
for class_id in range(527):
median_sdris[class_id] = np.nanmedian(stats_dict["sdris_dict"][class_id])
median_sisdrs[class_id] = np.nanmedian(stats_dict["sisdrs_dict"][class_id])
SDRi = get_mean_sdr_from_dict(median_sdris)
SISDR = get_mean_sdr_from_dict(median_sisdrs)
msg_audioset = "AudioSet Avg SDRi: {:.3f}, SISDR: {:.3f}".format(SDRi, SISDR)
print(msg_audioset)
# evaluation on AudioCaps
SISDR, SDRi = audiocaps_evaluator(pl_model)
msg_audiocaps = "AudioCaps Avg SDRi: {:.3f}, SISDR: {:.3f}".format(SDRi, SISDR)
print(msg_audiocaps)
# evaluation on Clotho
SISDR, SDRi = clotho_evaluator(pl_model)
msg_clotho = "Clotho Avg SDRi: {:.3f}, SISDR: {:.3f}".format(SDRi, SISDR)
print(msg_clotho)
msgs = [msg_audioset, msg_vgg, msg_audiocaps, msg_clotho, msg_music, msg_esc50]
# open file in write mode
log_path = os.path.join(log_dir, 'eval_results.txt')
with open(log_path, 'w') as fp:
for msg in msgs:
fp.write(msg + '\n')
print(f'Eval log is written to {log_path} ...')
print('------------------------- Done ---------------------------')
if __name__ == '__main__':
eval(checkpoint_path='checkpoint/audiosep_base.ckpt')
|