File size: 2,582 Bytes
59619e5
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import gradio as gr
import pandas as pd
from css_html_js import custom_css

TITLE = """<h1 align="center" id="space-title">πŸ‡²πŸ‡Ύ Malaysian Speech-to-Text Leaderboard</h1>"""

INTRODUCTION_TEXT = """
πŸ“ The πŸ‡²πŸ‡Ύ Malaysian Speech-to-Text Leaderboard aims to track, rank and evaluate Malaysian Speech-to-Text models. All notebooks at https://github.com/mesolitica/malaysian-stt-benchmarks

## Dataset

πŸ“ˆ We evaluate models based on 2 datasets,

1. Fleurs MY-MS test set, Malay language, https://huggingface.co/datasets/malaysia-ai/fleurs-my-ms
2. IMDA TTS first 700 audio files, English language but with Manglish slang, https://huggingface.co/datasets/mesolitica/IMDA-TTS

During test we,

1. Lowercase.
2. Remove punctuations.
"""

open_source = [
    {
        'model': 'openai/whisper-large-v3',
        'Fleurs MY-MS CER': 0.027414635425413655,
        'Fleurs MY-MS WER': 0.0912705436045907,
        'IMDA TTS CER': 0.016648493852990828,
        'IMDA TTS WER': 0.0386282289139432,
    },
    {
        'model': 'openai/whisper-medium',
        'Fleurs MY-MS CER': 0.045260198639505075,
        'Fleurs MY-MS WER': 0.14913723876746685,
        'IMDA TTS CER': 0.02065587879424904,
        'IMDA TTS WER': 0.047277690563404855,
    },
    {
        'model': 'openai/whisper-small',
        'Fleurs MY-MS CER': 0.07028889922090295,
        'Fleurs MY-MS WER': 0.2327510905228186,
        'IMDA TTS CER': 0.024812471688517194,
        'IMDA TTS WER': 0.058901277294134434,
    },
    {
        'model': 'openai/whisper-base',
        'Fleurs MY-MS CER': 0.24820848114299138,
        'Fleurs MY-MS WER': 0.5164123884823085,
        'IMDA TTS CER': 0.03914533450681607,
        'IMDA TTS WER': 0.08951682444539587,
    },
    {
        'model': 'mesolitica/malaysian-whisper-medium',
    },
    {
        'model': 'mesolitica/malaysian-whisper-small',
        'Fleurs MY-MS CER': 0.03596621199151582,
        'Fleurs MY-MS WER': 0.12024457480764372,
        'IMDA TTS CER': 0.024228721439634855,
        'IMDA TTS WER': 0.05546294182008469,
    },
    {
        'model': 'mesolitica/malaysian-whisper-base',
        'Fleurs MY-MS CER': 0.07478803508650385,
        'Fleurs MY-MS WER': 0.21823941044294087,
        'IMDA TTS CER': 0.03982418421412676,
        'IMDA TTS WER': 0.08917690642690643,
    },
    {
        'model': 'mesolitica/malaysian-whisper-tiny',
    },
]

data = pd.DataFrame(open_source)

demo = gr.Blocks(css=custom_css)
with demo:
    gr.HTML(TITLE)
    gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
    gr.DataFrame(data)

demo.launch()