rosyvs commited on
Commit
61c613f
1 Parent(s): 4f9e433

Upload https://huggingface.co/levicu/LEVI_whisper_medium.en/tree/main with huggingface_hub

Browse files
https:/huggingface.co/levicu/LEVI_whisper_medium.en/tree/main ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #%% imports
2
+ import os
3
+ from benchmark_utils import ASRmanifest, wer_from_csv
4
+
5
+
6
+ #%% setup paths
7
+ corpora_root = '/shared/corpora/forSAGA/' # root path where audio files are, inserted in palce of $DATAROOT in manifest
8
+ manif_root = '/shared/corpora/forSAGA/data_manifests/' # path to dir containing data manifest csvs
9
+ output_dir = './ASR_output/' # where to save ASR output
10
+ manifest='LEVI_LoFi_v2_TEST_norm_wer_isat' # name of test manifest
11
+ model_name= 'LEVI_whisper_medium.en' # name of save directory of model you want to evaluate
12
+ hf_org = 'levicu'
13
+ model_path = f'{hf_org}/{model_name}'
14
+
15
+ #%% setup paths for Rosy TESTING:
16
+ corpora_root = '/shared/corpora/' # root path where audio files are, inserted in palce of $DATAROOT in manifest
17
+ manif_root = '/shared/corpora/data_manifests/ASR/' # path to dir containing data manifest csvs
18
+ output_dir = '/home/rosy/whisat-output/' # where to save ASR output
19
+ manifest= 'LEVI_LoFi_v2_TEST_punc+cased' # name of test manifest
20
+ model_name= 'LEVI_LoFi_v2_MediumEN_Lora_Int8' # name of save directory of model you want to evaluate
21
+ model_path='/shared/models/LEVI_LoFi_v2_MediumEN_Lora_Int8/final/'
22
+ model_path='openai/whisper_medium.en'
23
+ #%%
24
+ # generate paths
25
+ manifest_csv=os.path.join(manif_root, f'{manifest}.csv')
26
+ out_csv=os.path.join(output_dir,f'{model_name}_on_{manifest}.csv')
27
+
28
+ #%% Inference
29
+ ASRmanifest(
30
+ manifest_csv=manifest_csv,
31
+ out_csv=out_csv,
32
+ corpora_root=corpora_root,
33
+ model_path=model_path,
34
+ )
35
+
36
+ #%% Evaluation
37
+ print(f'reading results from {out_csv}')
38
+ print(f'{model_name} on {manifest}')
39
+ wer_meas=wer_from_csv(
40
+ out_csv,
41
+ refcol='transcript',
42
+ hypcol='asr',
43
+ printout=True,
44
+ text_norm_method='levi'
45
+ )
46
+
47
+