huseinzol05 commited on
Commit
59619e5
β€’
1 Parent(s): b6054b8

added scores

Browse files
Files changed (3) hide show
  1. .gitignore +1 -0
  2. app.py +82 -0
  3. css_html_js.py +111 -0
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ __pycache__
app.py ADDED
@@ -0,0 +1,82 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import pandas as pd
3
+ from css_html_js import custom_css
4
+
5
+ TITLE = """<h1 align="center" id="space-title">πŸ‡²πŸ‡Ύ Malaysian Speech-to-Text Leaderboard</h1>"""
6
+
7
+ INTRODUCTION_TEXT = """
8
+ πŸ“ The πŸ‡²πŸ‡Ύ Malaysian Speech-to-Text Leaderboard aims to track, rank and evaluate Malaysian Speech-to-Text models. All notebooks at https://github.com/mesolitica/malaysian-stt-benchmarks
9
+
10
+ ## Dataset
11
+
12
+ πŸ“ˆ We evaluate models based on 2 datasets,
13
+
14
+ 1. Fleurs MY-MS test set, Malay language, https://huggingface.co/datasets/malaysia-ai/fleurs-my-ms
15
+ 2. IMDA TTS first 700 audio files, English language but with Manglish slang, https://huggingface.co/datasets/mesolitica/IMDA-TTS
16
+
17
+ During test we,
18
+
19
+ 1. Lowercase.
20
+ 2. Remove punctuations.
21
+ """
22
+
23
+ open_source = [
24
+ {
25
+ 'model': 'openai/whisper-large-v3',
26
+ 'Fleurs MY-MS CER': 0.027414635425413655,
27
+ 'Fleurs MY-MS WER': 0.0912705436045907,
28
+ 'IMDA TTS CER': 0.016648493852990828,
29
+ 'IMDA TTS WER': 0.0386282289139432,
30
+ },
31
+ {
32
+ 'model': 'openai/whisper-medium',
33
+ 'Fleurs MY-MS CER': 0.045260198639505075,
34
+ 'Fleurs MY-MS WER': 0.14913723876746685,
35
+ 'IMDA TTS CER': 0.02065587879424904,
36
+ 'IMDA TTS WER': 0.047277690563404855,
37
+ },
38
+ {
39
+ 'model': 'openai/whisper-small',
40
+ 'Fleurs MY-MS CER': 0.07028889922090295,
41
+ 'Fleurs MY-MS WER': 0.2327510905228186,
42
+ 'IMDA TTS CER': 0.024812471688517194,
43
+ 'IMDA TTS WER': 0.058901277294134434,
44
+ },
45
+ {
46
+ 'model': 'openai/whisper-base',
47
+ 'Fleurs MY-MS CER': 0.24820848114299138,
48
+ 'Fleurs MY-MS WER': 0.5164123884823085,
49
+ 'IMDA TTS CER': 0.03914533450681607,
50
+ 'IMDA TTS WER': 0.08951682444539587,
51
+ },
52
+ {
53
+ 'model': 'mesolitica/malaysian-whisper-medium',
54
+ },
55
+ {
56
+ 'model': 'mesolitica/malaysian-whisper-small',
57
+ 'Fleurs MY-MS CER': 0.03596621199151582,
58
+ 'Fleurs MY-MS WER': 0.12024457480764372,
59
+ 'IMDA TTS CER': 0.024228721439634855,
60
+ 'IMDA TTS WER': 0.05546294182008469,
61
+ },
62
+ {
63
+ 'model': 'mesolitica/malaysian-whisper-base',
64
+ 'Fleurs MY-MS CER': 0.07478803508650385,
65
+ 'Fleurs MY-MS WER': 0.21823941044294087,
66
+ 'IMDA TTS CER': 0.03982418421412676,
67
+ 'IMDA TTS WER': 0.08917690642690643,
68
+ },
69
+ {
70
+ 'model': 'mesolitica/malaysian-whisper-tiny',
71
+ },
72
+ ]
73
+
74
+ data = pd.DataFrame(open_source)
75
+
76
+ demo = gr.Blocks(css=custom_css)
77
+ with demo:
78
+ gr.HTML(TITLE)
79
+ gr.Markdown(INTRODUCTION_TEXT, elem_classes="markdown-text")
80
+ gr.DataFrame(data)
81
+
82
+ demo.launch()
css_html_js.py ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ custom_css = """
2
+
3
+ .markdown-text {
4
+ font-size: 16px !important;
5
+ }
6
+
7
+ #models-to-add-text {
8
+ font-size: 18px !important;
9
+ }
10
+
11
+ #citation-button span {
12
+ font-size: 16px !important;
13
+ }
14
+
15
+ #citation-button textarea {
16
+ font-size: 16px !important;
17
+ }
18
+
19
+ #citation-button > label > button {
20
+ margin: 6px;
21
+ transform: scale(1.3);
22
+ }
23
+
24
+ #leaderboard-table {
25
+ margin-top: 15px
26
+ }
27
+
28
+ #leaderboard-table-lite {
29
+ margin-top: 15px
30
+ }
31
+
32
+ #search-bar-table-box > div:first-child {
33
+ background: none;
34
+ border: none;
35
+ }
36
+
37
+ #search-bar {
38
+ padding: 0px;
39
+ }
40
+
41
+ /* Hides the final AutoEvalColumn */
42
+ #llm-benchmark-tab-table table td:last-child,
43
+ #llm-benchmark-tab-table table th:last-child {
44
+ display: none;
45
+ }
46
+
47
+ /* Limit the width of the first AutoEvalColumn so that names don't expand too much */
48
+ table td:first-child,
49
+ table th:first-child {
50
+ max-width: 400px;
51
+ overflow: auto;
52
+ white-space: nowrap;
53
+ }
54
+
55
+ .tab-buttons button {
56
+ font-size: 20px;
57
+ }
58
+
59
+ #scale-logo {
60
+ border-style: none !important;
61
+ box-shadow: none;
62
+ display: block;
63
+ margin-left: auto;
64
+ margin-right: auto;
65
+ max-width: 600px;
66
+ }
67
+
68
+ #scale-logo .download {
69
+ display: none;
70
+ }
71
+ #filter_type{
72
+ border: 0;
73
+ padding-left: 0;
74
+ padding-top: 0;
75
+ }
76
+ #filter_type label {
77
+ display: flex;
78
+ }
79
+ #filter_type label > span{
80
+ margin-top: var(--spacing-lg);
81
+ margin-right: 0.5em;
82
+ }
83
+ #filter_type label > .wrap{
84
+ width: 103px;
85
+ }
86
+ #filter_type label > .wrap .wrap-inner{
87
+ padding: 2px;
88
+ }
89
+ #filter_type label > .wrap .wrap-inner input{
90
+ width: 1px
91
+ }
92
+ #filter-columns-type{
93
+ border:0;
94
+ padding:0.5;
95
+ }
96
+ #filter-columns-size{
97
+ border:0;
98
+ padding:0.5;
99
+ }
100
+ #box-filter > .form{
101
+ border: 0
102
+ }
103
+ """
104
+
105
+ get_window_url_params = """
106
+ function(url_params) {
107
+ const params = new URLSearchParams(window.location.search);
108
+ url_params = Object.fromEntries(params);
109
+ return url_params;
110
+ }
111
+ """