#%% imports import os from benchmark_utils import ASRmanifest, wer_from_csv #%% setup paths corpora_root = '~/corpora/forSAGA/' # root path where audio files are, inserted in palce of $DATAROOT in manifest manif_root = '~/corpora/forSAGA/data_manifests/' # path to dir containing data manifest csvs output_dir = './ASR_output/' # where to save ASR output manifest='LEVI_LoFi_v2_TEST_punc+cased' # name of test manifest model_name= 'LEVI_whisper_medium.en' # name of model you want to evaluate hf_org = 'levicu' model_path = f'{hf_org}/{model_name}' #%% generate paths manifest_csv=os.path.join(manif_root, f'{manifest}.csv') out_csv=os.path.join(output_dir,f'{model_name}_on_{manifest}.csv') #%% Inference ASRmanifest( manifest_csv=manifest_csv, out_csv=out_csv, corpora_root=corpora_root, model_path=model_path, ) #%% Evaluation print(f'reading results from {out_csv}') print(f'{model_name} on {manifest}') wer_meas=wer_from_csv( out_csv, refcol='transcript', hypcol='asr', printout=True, text_norm_method='levi' # 'whisper','levi','none' )