File size: 2,856 Bytes
f78ed8b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
from utils import int2float
def handle_espnet_ASR_WER(ASR_audio_output,ASR_transcript):
    from versa import espnet_levenshtein_metric, espnet_wer_setup, owsm_levenshtein_metric, owsm_wer_setup, whisper_levenshtein_metric, whisper_wer_setup
    score_modules_espnet = {
        "module": espnet_levenshtein_metric,
        "args": espnet_wer_setup(
            model_tag="default",
            beam_size=1,
            text_cleaner="whisper_en",
            use_gpu=True,
        ),
    }
    dict1=score_modules_espnet["module"](
            score_modules_espnet["args"],
            int2float(ASR_audio_output[1]),
            ASR_transcript,
            ASR_audio_output[0],
    )
    espnet_wer=(dict1["espnet_wer_delete"]+dict1["espnet_wer_insert"]+dict1["espnet_wer_replace"])/(dict1["espnet_wer_insert"]+dict1["espnet_wer_replace"]+dict1["espnet_wer_equal"])
    espnet_cer=(dict1["espnet_cer_delete"]+dict1["espnet_cer_insert"]+dict1["espnet_cer_replace"])/(dict1["espnet_cer_insert"]+dict1["espnet_cer_replace"]+dict1["espnet_cer_equal"])
    score_modules_owsm = {
        "module": owsm_levenshtein_metric,
        "args": owsm_wer_setup(
            model_tag="default",
            beam_size=1,
            text_cleaner="whisper_en",
            use_gpu=True,
        ),
    }
    dict1=score_modules_owsm["module"](
            score_modules_owsm["args"],
            int2float(ASR_audio_output[1]),
            ASR_transcript,
            ASR_audio_output[0],
    )
    owsm_wer=(dict1["owsm_wer_delete"]+dict1["owsm_wer_insert"]+dict1["owsm_wer_replace"])/(dict1["owsm_wer_insert"]+dict1["owsm_wer_replace"]+dict1["owsm_wer_equal"])
    owsm_cer=(dict1["owsm_cer_delete"]+dict1["owsm_cer_insert"]+dict1["owsm_cer_replace"])/(dict1["owsm_cer_insert"]+dict1["owsm_cer_replace"]+dict1["owsm_cer_equal"])
    score_modules_whisper = {
        "module": whisper_levenshtein_metric,
        "args": whisper_wer_setup(
            model_tag="default",
            beam_size=1,
            text_cleaner="whisper_en",
            use_gpu=True,
        ),
    }
    dict1=score_modules_whisper["module"](
            score_modules_whisper["args"],
            int2float(ASR_audio_output[1]),
            ASR_transcript,
            ASR_audio_output[0],
    )
    whisper_wer=(dict1["whisper_wer_delete"]+dict1["whisper_wer_insert"]+dict1["whisper_wer_replace"])/(dict1["whisper_wer_insert"]+dict1["whisper_wer_replace"]+dict1["whisper_wer_equal"])
    whisper_cer=(dict1["whisper_cer_delete"]+dict1["whisper_cer_insert"]+dict1["whisper_cer_replace"])/(dict1["whisper_cer_insert"]+dict1["whisper_cer_replace"]+dict1["whisper_cer_equal"])
    return f"ESPnet WER: {espnet_wer*100:.2f}\nESPnet CER: {espnet_cer*100:.2f}\nOWSM WER: {owsm_wer*100:.2f}\nOWSM CER: {owsm_cer*100:.2f}\nWhisper WER: {whisper_wer*100:.2f}\nWhisper CER: {whisper_cer*100:.2f}"