Spaces:
Running
Running
ardaatahan
commited on
Commit
•
ad25137
1
Parent(s):
99e6f55
filter out old data and add english and multilingual wer to performance tab
Browse files- constants.py +2 -0
- dashboard_data/device_map.json +3 -2
- dashboard_data/multilingual_confusion_matrices.json +9 -9
- dashboard_data/multilingual_results.csv +16 -16
- dashboard_data/performance_data.json +0 -0
- dashboard_data/support_data.csv +23 -23
- dashboard_data/version.json +1 -0
- main.py +31 -31
- multilingual_generate.py +1 -0
- performance_generate.py +18 -15
- utils.py +12 -4
constants.py
CHANGED
@@ -137,6 +137,8 @@ COL_NAMES = {
|
|
137 |
"device": "Device",
|
138 |
"os": "OS",
|
139 |
"parity": "Parity %",
|
|
|
|
|
140 |
}
|
141 |
|
142 |
|
|
|
137 |
"device": "Device",
|
138 |
"os": "OS",
|
139 |
"parity": "Parity %",
|
140 |
+
"english_wer": "English WER",
|
141 |
+
"multilingual_wer": "Multilingual WER",
|
142 |
}
|
143 |
|
144 |
|
dashboard_data/device_map.json
CHANGED
@@ -14,5 +14,6 @@
|
|
14 |
"iPhone14,7": "iPhone 14",
|
15 |
"iPhone16,1": "iPhone 15 Pro",
|
16 |
"iPhone16,2": "iPhone 15 Pro Max",
|
17 |
-
"iPhone17,1": "iPhone 16 Pro"
|
18 |
-
|
|
|
|
14 |
"iPhone14,7": "iPhone 14",
|
15 |
"iPhone16,1": "iPhone 15 Pro",
|
16 |
"iPhone16,2": "iPhone 15 Pro Max",
|
17 |
+
"iPhone17,1": "iPhone 16 Pro",
|
18 |
+
"iPhone17,3": "iPhone 16"
|
19 |
+
}
|
dashboard_data/multilingual_confusion_matrices.json
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
{
|
2 |
-
"
|
3 |
"not_forced": {
|
4 |
"matrix": [
|
5 |
[
|
@@ -11109,7 +11109,7 @@
|
|
11109 |
]
|
11110 |
}
|
11111 |
},
|
11112 |
-
"
|
11113 |
"not_forced": {
|
11114 |
"matrix": [
|
11115 |
[
|
@@ -22219,7 +22219,7 @@
|
|
22219 |
]
|
22220 |
}
|
22221 |
},
|
22222 |
-
"
|
22223 |
"not_forced": {
|
22224 |
"matrix": [
|
22225 |
[
|
@@ -33035,7 +33035,7 @@
|
|
33035 |
]
|
33036 |
}
|
33037 |
},
|
33038 |
-
"
|
33039 |
"not_forced": {
|
33040 |
"matrix": [
|
33041 |
[
|
@@ -43565,7 +43565,7 @@
|
|
43565 |
]
|
43566 |
}
|
43567 |
},
|
43568 |
-
"
|
43569 |
"not_forced": {
|
43570 |
"matrix": [
|
43571 |
[
|
@@ -54675,7 +54675,7 @@
|
|
54675 |
]
|
54676 |
}
|
54677 |
},
|
54678 |
-
"
|
54679 |
"not_forced": {
|
54680 |
"matrix": [
|
54681 |
[
|
@@ -65637,7 +65637,7 @@
|
|
65637 |
]
|
65638 |
}
|
65639 |
},
|
65640 |
-
"
|
65641 |
"not_forced": {
|
65642 |
"matrix": [
|
65643 |
[
|
@@ -76599,7 +76599,7 @@
|
|
76599 |
]
|
76600 |
}
|
76601 |
},
|
76602 |
-
"
|
76603 |
"not_forced": {
|
76604 |
"matrix": [
|
76605 |
[
|
@@ -87709,4 +87709,4 @@
|
|
87709 |
]
|
87710 |
}
|
87711 |
}
|
87712 |
-
}
|
|
|
1 |
{
|
2 |
+
"openai/whisper-large-v3-v20240930": {
|
3 |
"not_forced": {
|
4 |
"matrix": [
|
5 |
[
|
|
|
11109 |
]
|
11110 |
}
|
11111 |
},
|
11112 |
+
"openai/whisper-tiny": {
|
11113 |
"not_forced": {
|
11114 |
"matrix": [
|
11115 |
[
|
|
|
22219 |
]
|
22220 |
}
|
22221 |
},
|
22222 |
+
"openai/whisper-small": {
|
22223 |
"not_forced": {
|
22224 |
"matrix": [
|
22225 |
[
|
|
|
33035 |
]
|
33036 |
}
|
33037 |
},
|
33038 |
+
"openai/whisper-large-v3": {
|
33039 |
"not_forced": {
|
33040 |
"matrix": [
|
33041 |
[
|
|
|
43565 |
]
|
43566 |
}
|
43567 |
},
|
43568 |
+
"openai/whisper-large-v3-v20240930/626MB": {
|
43569 |
"not_forced": {
|
43570 |
"matrix": [
|
43571 |
[
|
|
|
54675 |
]
|
54676 |
}
|
54677 |
},
|
54678 |
+
"openai/whisper-large-v3-v20240930/547MB": {
|
54679 |
"not_forced": {
|
54680 |
"matrix": [
|
54681 |
[
|
|
|
65637 |
]
|
65638 |
}
|
65639 |
},
|
65640 |
+
"openai/whisper-large-v2": {
|
65641 |
"not_forced": {
|
65642 |
"matrix": [
|
65643 |
[
|
|
|
76599 |
]
|
76600 |
}
|
76601 |
},
|
76602 |
+
"openai/whisper-base": {
|
76603 |
"not_forced": {
|
76604 |
"matrix": [
|
76605 |
[
|
|
|
87709 |
]
|
87710 |
}
|
87711 |
}
|
87712 |
+
}
|
dashboard_data/multilingual_results.csv
CHANGED
@@ -1,17 +1,17 @@
|
|
1 |
Model,Forced Tokens,Average WER,WER_sl,WER_sk,WER_ur,WER_sw,WER_uz,WER_pl,WER_vi,WER_sq,WER_sv,WER_he,WER_mt,WER_hy,WER_am,WER_nn,WER_be,WER_da,WER_mr,WER_kk,WER_mn,WER_ja,WER_el,WER_lv,WER_oc,WER_it,WER_ca,WER_cs,WER_te,WER_ru,WER_tk,WER_ro,WER_yo,WER_yue,WER_yi,WER_pt,WER_ps,WER_zh,WER_uk,WER_sr,WER_pa,WER_ml,WER_mk,WER_ba,WER_ha,WER_ar,WER_gl,WER_hu,WER_nl,WER_bg,WER_bn,WER_ne,WER_af,WER_hi,WER_ka,WER_de,WER_as,WER_az,WER_br,WER_ko,WER_fi,WER_id,WER_fr,WER_es,WER_et,WER_en,WER_fa,WER_lt,WER_cy,WER_eu,WER_lo,WER_tt,WER_ta,WER_th,WER_tr
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
|
9 |
-
|
10 |
-
|
11 |
-
|
12 |
-
|
13 |
-
|
14 |
-
|
15 |
-
|
16 |
-
|
17 |
-
|
|
|
1 |
Model,Forced Tokens,Average WER,WER_sl,WER_sk,WER_ur,WER_sw,WER_uz,WER_pl,WER_vi,WER_sq,WER_sv,WER_he,WER_mt,WER_hy,WER_am,WER_nn,WER_be,WER_da,WER_mr,WER_kk,WER_mn,WER_ja,WER_el,WER_lv,WER_oc,WER_it,WER_ca,WER_cs,WER_te,WER_ru,WER_tk,WER_ro,WER_yo,WER_yue,WER_yi,WER_pt,WER_ps,WER_zh,WER_uk,WER_sr,WER_pa,WER_ml,WER_mk,WER_ba,WER_ha,WER_ar,WER_gl,WER_hu,WER_nl,WER_bg,WER_bn,WER_ne,WER_af,WER_hi,WER_ka,WER_de,WER_as,WER_az,WER_br,WER_ko,WER_fi,WER_id,WER_fr,WER_es,WER_et,WER_en,WER_fa,WER_lt,WER_cy,WER_eu,WER_lo,WER_tt,WER_ta,WER_th,WER_tr
|
2 |
+
openai/whisper-large-v3-v20240930,False,51.57,36.9,29.71,46.48,64.04,110.02,14.74,14.89,69.25,18.09,29.11,86.41,74.32,145.83,50.03,79.08,19.43,67.19,43.57,116.51,26.33,21.75,32.92,73.51,14.39,20.36,14.41,140.14,15.81,112.64,15.2,95.06,51.16,103.7,16.37,111.73,27.24,24.08,62.2,104.28,121.81,48.07,102.63,104.87,40.62,18.12,16.39,11.46,21.95,98.71,86.28,37.8,43.31,137.87,14.01,103.2,38.1,100.68,20.79,16.62,12.28,16.31,5.94,32.21,12.54,60.73,35.6,57.45,42.35,103.14,98.21,44.83,31.0,31.24
|
3 |
+
openai/whisper-large-v3-v20240930,True,46.09,27.13,24.61,25.59,61.29,98.84,12.12,16.92,65.69,12.97,26.85,84.04,73.95,128.9,39.97,61.51,17.63,48.26,41.87,97.08,21.97,17.73,30.78,71.01,12.83,18.25,12.85,75.43,13.28,104.35,11.41,89.71,64.28,100.0,14.93,95.78,25.34,19.14,54.07,120.4,112.94,34.52,100.0,96.64,31.45,15.0,15.3,8.91,20.42,79.7,63.89,36.54,26.14,132.26,12.26,105.14,33.33,95.96,20.75,15.42,11.11,15.51,6.1,31.51,12.13,55.96,32.84,54.92,40.65,114.11,98.39,41.54,23.3,24.29
|
4 |
+
openai/whisper-tiny,False,105.22,121.79,133.13,113.57,119.78,118.34,103.44,99.27,119.73,82.19,122.66,112.51,132.53,120.31,103.18,115.1,99.88,101.13,125.86,114.12,82.61,130.16,112.75,100.2,89.71,82.85,125.93,113.15,109.31,117.09,118.71,109.16,88.8,120.37,81.94,115.21,79.77,115.73,114.84,103.05,105.04,117.77,116.19,109.58,159.43,78.61,129.6,76.43,122.17,100.32,104.44,118.43,102.01,140.29,79.95,100.86,146.83,110.82,110.13,93.9,124.4,67.17,68.41,113.76,33.14,122.06,133.54,112.59,132.9,106.52,123.61,100.96,110.41,125.91
|
5 |
+
openai/whisper-tiny,True,86.1,81.42,92.88,70.33,112.75,122.16,56.82,50.52,99.17,64.45,72.15,103.81,133.47,140.93,102.1,98.9,79.55,102.76,179.77,128.57,53.32,66.33,89.27,93.19,60.12,59.02,81.79,133.22,58.43,124.62,66.43,111.99,90.36,102.78,65.71,105.43,65.2,69.07,80.42,104.57,133.29,83.84,110.69,97.86,97.63,54.0,85.5,54.06,83.5,106.27,103.34,93.39,102.17,140.86,49.53,112.36,90.67,102.29,62.34,72.56,54.08,59.57,34.99,101.75,33.4,130.66,101.05,93.62,97.05,113.15,107.44,80.94,42.42,66.07
|
6 |
+
openai/whisper-small,False,96.89,116.31,109.59,106.84,110.05,117.73,69.24,74.63,110.83,52.47,97.58,109.76,138.13,118.51,93.33,113.81,66.04,101.28,127.1,115.44,89.93,120.81,101.47,88.06,67.04,51.78,117.66,120.07,92.0,116.14,86.09,106.41,48.07,105.56,37.94,105.77,117.15,101.7,108.27,101.54,102.57,115.41,118.11,104.62,151.92,65.17,66.19,60.49,118.61,100.46,102.89,99.37,100.76,141.61,49.17,100.37,121.83,107.77,137.18,68.85,104.92,54.88,47.03,96.83,18.2,119.04,121.42,111.48,116.31,118.1,120.3,100.63,115.33,108.8
|
7 |
+
openai/whisper-small,True,69.14,49.09,51.74,40.93,96.1,115.21,23.74,25.43,89.94,23.97,43.29,96.2,120.55,130.06,164.5,78.06,37.18,89.86,82.95,262.79,30.25,31.49,62.47,114.14,25.02,30.35,37.7,311.76,26.09,174.55,26.99,161.95,48.61,100.0,35.7,94.66,42.22,40.02,60.5,160.3,115.92,50.81,118.57,97.46,47.01,30.45,44.66,19.94,49.16,129.67,107.33,71.02,45.58,,23.87,131.95,62.3,,34.7,30.07,23.81,27.11,11.94,72.39,17.35,97.5,75.61,67.42,77.08,102.07,103.03,42.18,21.52,33.32
|
8 |
+
openai/whisper-large-v3,False,54.77,41.01,32.74,44.39,66.07,110.74,17.82,14.19,64.45,13.59,36.31,96.11,70.79,134.23,52.71,85.41,16.63,60.48,58.79,122.59,33.66,28.76,27.08,78.1,13.55,17.32,20.67,123.88,16.13,107.71,10.99,110.75,53.95,105.56,14.51,103.6,42.68,32.28,64.23,101.2,102.62,68.19,100.46,99.97,36.03,23.57,13.44,12.17,30.2,98.44,101.1,40.79,75.87,149.84,14.75,100.54,35.32,106.08,20.94,15.9,11.86,15.51,6.36,30.06,12.7,62.68,32.37,51.06,45.38,104.29,100.73,81.72,38.07,26.73
|
9 |
+
openai/whisper-large-v3,True,34.23,18.87,18.44,21.24,58.02,90.52,10.13,12.32,53.97,9.81,23.79,78.78,54.56,,29.37,45.53,13.89,42.37,48.61,87.75,20.38,12.35,21.06,65.39,11.11,14.69,12.04,61.25,13.0,99.39,5.39,97.25,14.27,101.85,13.75,88.95,25.41,15.59,41.4,57.1,107.34,20.59,99.25,91.39,23.08,13.06,12.44,7.03,17.37,,52.77,36.38,20.33,,9.89,,21.43,86.38,20.37,10.32,9.47,13.67,4.93,28.43,12.21,45.43,27.63,35.05,40.65,102.76,90.45,28.97,6.11,17.88
|
10 |
+
openai/whisper-large-v3-v20240930/626MB,False,52.29,39.68,29.99,49.08,66.59,107.43,15.31,15.95,71.18,17.19,32.01,88.37,79.06,135.02,51.08,80.09,20.74,71.26,47.37,105.47,25.78,22.21,34.77,74.12,15.26,20.99,15.98,139.45,16.29,106.18,16.59,95.23,51.42,101.85,16.46,107.76,29.67,27.49,64.5,103.61,115.9,47.55,100.79,103.61,38.22,19.62,17.52,11.63,24.46,98.93,85.04,39.69,47.4,133.75,14.69,104.02,38.49,101.45,22.74,16.62,12.28,17.04,6.02,34.3,13.39,62.2,38.5,60.13,45.51,103.6,98.12,48.42,35.09,31.2
|
11 |
+
openai/whisper-large-v3-v20240930/626MB,True,47.64,30.62,25.67,26.93,62.36,97.82,13.11,17.36,67.47,12.72,29.16,84.89,77.31,111.23,39.77,63.57,18.94,50.51,45.76,97.71,22.33,18.71,31.72,72.64,13.16,19.39,14.49,84.78,14.37,102.51,12.24,93.42,66.1,100.0,14.85,95.84,27.18,21.48,56.17,134.5,123.72,36.74,98.12,95.73,32.09,15.48,17.05,9.05,22.25,81.89,63.49,40.0,27.3,128.28,13.21,102.9,34.52,96.28,22.01,15.17,12.66,15.68,5.97,33.98,13.03,56.96,35.97,57.0,43.62,121.47,99.17,42.74,23.09,24.11
|
12 |
+
openai/whisper-large-v3-v20240930/547MB,False,61.3,56.47,43.16,61.91,88.9,109.85,24.17,22.93,88.74,26.09,45.97,96.7,107.38,134.76,57.25,85.1,27.18,70.85,71.7,109.68,30.21,32.19,50.95,80.91,20.97,30.91,27.66,137.02,20.13,112.37,27.76,99.08,65.2,116.67,21.24,103.29,38.97,40.09,73.1,103.36,116.51,67.78,109.58,103.61,53.89,27.16,30.42,17.39,39.13,102.58,86.68,51.97,58.32,132.81,19.31,103.2,59.92,103.96,26.14,23.24,18.05,23.09,8.1,47.03,16.56,84.54,55.51,75.58,63.11,111.89,103.54,61.24,58.17,42.66
|
13 |
+
openai/whisper-large-v3-v20240930/547MB,True,54.61,40.12,35.54,35.4,78.38,102.05,19.6,25.97,81.39,19.25,38.72,89.86,109.32,146.41,46.71,69.67,25.3,60.25,66.07,101.55,25.79,26.23,45.55,75.4,18.77,27.16,23.73,106.23,18.63,108.87,18.26,97.33,74.97,101.85,18.91,95.65,34.74,30.15,63.01,,120.76,47.96,104.64,100.75,41.63,20.54,27.11,14.18,33.31,96.72,74.91,48.5,38.49,129.01,17.62,101.41,47.22,99.32,26.25,20.31,17.25,22.0,7.84,45.12,16.12,77.07,49.49,71.63,57.09,114.95,101.79,53.56,39.58,33.54
|
14 |
+
openai/whisper-large-v2,False,94.09,119.27,112.44,106.95,110.77,122.16,75.3,61.28,112.19,43.62,91.08,112.01,137.54,118.3,90.82,118.11,25.33,100.79,152.88,115.67,79.34,113.99,69.07,91.12,50.45,40.5,112.0,113.84,99.36,123.07,96.59,110.1,52.67,100.0,47.38,106.7,125.66,95.49,,101.13,102.34,118.31,124.76,105.15,143.76,63.7,44.82,48.04,119.44,100.18,102.64,101.1,100.38,154.52,65.08,100.59,85.71,105.79,97.02,48.57,92.39,31.95,46.74,99.3,13.74,116.06,137.91,74.42,107.11,111.27,131.83,100.18,119.35,113.86
|
15 |
+
openai/whisper-large-v2,True,47.14,25.76,25.84,25.24,67.14,100.99,12.51,17.69,65.57,12.16,24.01,83.34,62.4,176.79,47.12,49.05,16.72,48.12,58.01,136.6,22.6,15.04,28.69,72.69,14.34,16.2,17.14,165.74,15.11,115.56,7.86,95.93,53.06,105.56,15.23,99.75,36.59,20.95,43.09,105.46,114.5,25.32,107.07,115.23,26.39,16.27,16.72,8.93,21.52,103.9,62.0,47.24,25.92,150.19,11.7,107.19,29.37,106.33,24.84,13.13,12.2,16.21,6.93,35.96,12.7,53.38,38.94,32.85,49.09,103.76,105.51,28.08,8.76,19.55
|
16 |
+
openai/whisper-base,False,104.18,125.5,143.16,112.16,113.79,122.43,99.57,99.07,123.02,75.03,98.01,114.12,138.03,114.66,99.56,122.48,81.21,101.78,137.07,115.04,91.47,131.68,117.3,96.63,78.16,69.85,128.78,132.18,103.59,125.21,114.33,106.87,72.69,125.93,59.81,114.59,74.81,113.57,119.31,103.18,105.38,123.9,123.21,109.11,160.77,70.55,110.22,80.49,122.28,100.7,104.89,108.98,101.32,143.37,61.29,100.7,134.52,111.44,136.38,102.17,126.88,58.74,58.79,115.46,25.71,122.69,149.93,110.1,126.44,116.79,132.15,101.37,112.61,122.57
|
17 |
+
openai/whisper-base,True,79.92,72.07,76.57,59.1,106.54,171.77,43.44,40.15,100.62,45.53,61.22,102.6,208.4,165.98,83.98,92.45,61.96,103.31,99.05,201.0,42.73,55.22,81.5,82.95,46.45,48.59,67.24,117.99,44.21,151.1,54.19,105.42,70.49,111.11,48.98,98.51,53.88,58.31,76.9,100.38,119.75,74.21,134.5,116.11,72.17,47.63,71.07,37.01,73.54,100.79,101.9,87.4,102.24,117.93,38.09,109.8,84.13,106.76,48.87,56.32,43.04,45.09,24.55,91.31,25.11,104.21,91.07,87.41,98.64,106.13,108.77,60.25,32.91,51.87
|
dashboard_data/performance_data.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
dashboard_data/support_data.csv
CHANGED
@@ -1,23 +1,23 @@
|
|
1 |
-
,Model,Apple
|
2 |
-
distil-whisper_distil-large-v3,distil-whisper_distil-large-v3,✅ macOS 15.
|
3 |
-
distil-whisper_distil-large-v3_594MB,distil-whisper_distil-large-v3_594MB,✅ macOS 15.
|
4 |
-
distil-whisper_distil-large-v3_turbo,distil-whisper_distil-large-v3_turbo,✅ macOS 15.
|
5 |
-
distil-whisper_distil-large-v3_turbo_600MB,distil-whisper_distil-large-v3_turbo_600MB
|
6 |
-
openai_whisper-base,openai_whisper-base
|
7 |
-
openai_whisper-base.en,openai_whisper-base.en,✅ macOS 15.
|
8 |
-
openai_whisper-large-v2,openai_whisper-large-v2
|
9 |
-
openai_whisper-large-v2_949MB,openai_whisper-large-v2_949MB
|
10 |
-
openai_whisper-large-v2_turbo,openai_whisper-large-v2_turbo,✅ macOS 15.
|
11 |
-
openai_whisper-large-v2_turbo_955MB,openai_whisper-large-v2_turbo_955MB,✅ macOS 15.
|
12 |
-
openai_whisper-large-v3,openai_whisper-large-v3,✅ macOS 15.
|
13 |
-
openai_whisper-large-v3-v20240930,openai_whisper-large-v3-v20240930,✅ macOS 15.
|
14 |
-
openai_whisper-large-v3-v20240930_626MB,openai_whisper-large-v3-v20240930_626MB,✅ macOS 15.
|
15 |
-
openai_whisper-large-v3-v20240930_turbo,openai_whisper-large-v3-v20240930_turbo,✅ macOS 15.
|
16 |
-
openai_whisper-large-v3-v20240930_turbo_632MB,openai_whisper-large-v3-v20240930_turbo_632MB,✅ macOS 15.
|
17 |
-
openai_whisper-large-v3_947MB,openai_whisper-large-v3_947MB,✅ macOS 15.
|
18 |
-
openai_whisper-large-v3_turbo,openai_whisper-large-v3_turbo,✅ macOS 15.
|
19 |
-
openai_whisper-large-v3_turbo_954MB,openai_whisper-large-v3_turbo_954MB,✅ macOS 15.
|
20 |
-
openai_whisper-small,openai_whisper-small,✅ macOS 15.
|
21 |
-
openai_whisper-small.en,openai_whisper-small.en
|
22 |
-
openai_whisper-tiny,openai_whisper-tiny,✅ macOS 15.
|
23 |
-
openai_whisper-tiny.en,openai_whisper-tiny.en,✅ macOS 15.
|
|
|
1 |
+
,Model,Apple M3 Max
|
2 |
+
distil-whisper_distil-large-v3,distil-whisper_distil-large-v3,✅ macOS 15.2
|
3 |
+
distil-whisper_distil-large-v3_594MB,distil-whisper_distil-large-v3_594MB,✅ macOS 15.2
|
4 |
+
distil-whisper_distil-large-v3_turbo,distil-whisper_distil-large-v3_turbo,✅ macOS 15.2
|
5 |
+
distil-whisper_distil-large-v3_turbo_600MB,distil-whisper_distil-large-v3_turbo_600MB,✅ macOS 15.2
|
6 |
+
openai_whisper-base,openai_whisper-base,✅ macOS 15.2
|
7 |
+
openai_whisper-base.en,openai_whisper-base.en,✅ macOS 15.2
|
8 |
+
openai_whisper-large-v2,openai_whisper-large-v2,✅ macOS 15.2
|
9 |
+
openai_whisper-large-v2_949MB,openai_whisper-large-v2_949MB,✅ macOS 15.2
|
10 |
+
openai_whisper-large-v2_turbo,openai_whisper-large-v2_turbo,✅ macOS 15.2
|
11 |
+
openai_whisper-large-v2_turbo_955MB,openai_whisper-large-v2_turbo_955MB,✅ macOS 15.2
|
12 |
+
openai_whisper-large-v3,openai_whisper-large-v3,✅ macOS 15.2
|
13 |
+
openai_whisper-large-v3-v20240930,openai_whisper-large-v3-v20240930,✅ macOS 15.2
|
14 |
+
openai_whisper-large-v3-v20240930_626MB,openai_whisper-large-v3-v20240930_626MB,✅ macOS 15.2
|
15 |
+
openai_whisper-large-v3-v20240930_turbo,openai_whisper-large-v3-v20240930_turbo,✅ macOS 15.2
|
16 |
+
openai_whisper-large-v3-v20240930_turbo_632MB,openai_whisper-large-v3-v20240930_turbo_632MB,✅ macOS 15.2
|
17 |
+
openai_whisper-large-v3_947MB,openai_whisper-large-v3_947MB,✅ macOS 15.2
|
18 |
+
openai_whisper-large-v3_turbo,openai_whisper-large-v3_turbo,✅ macOS 15.2
|
19 |
+
openai_whisper-large-v3_turbo_954MB,openai_whisper-large-v3_turbo_954MB,✅ macOS 15.2
|
20 |
+
openai_whisper-small,openai_whisper-small,✅ macOS 15.2
|
21 |
+
openai_whisper-small.en,openai_whisper-small.en,✅ macOS 15.2
|
22 |
+
openai_whisper-tiny,openai_whisper-tiny,✅ macOS 15.2
|
23 |
+
openai_whisper-tiny.en,openai_whisper-tiny.en,✅ macOS 15.2
|
dashboard_data/version.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"last_modified": "", "sha": "", "releases": ["9f493bc", "a9b92c4"]}
|
main.py
CHANGED
@@ -87,6 +87,35 @@ sorted_quality_df = (
|
|
87 |
.reset_index(drop=True)
|
88 |
)
|
89 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
90 |
sorted_performance_df = (
|
91 |
benchmark_df.assign(model_len=benchmark_df["model"].str.len())
|
92 |
.sort_values(
|
@@ -124,7 +153,8 @@ performance_df = sorted_performance_df[
|
|
124 |
"model",
|
125 |
"device",
|
126 |
"os",
|
127 |
-
"
|
|
|
128 |
"qoi",
|
129 |
"speed",
|
130 |
"tokens_per_second",
|
@@ -166,16 +196,6 @@ for col in dataset_toks_columns:
|
|
166 |
}
|
167 |
)
|
168 |
|
169 |
-
# Calculate parity with M2 Ultra
|
170 |
-
m2_ultra_wer = (
|
171 |
-
performance_df[performance_df["Device"] == "Apple M2 Ultra"]
|
172 |
-
.groupby("Model")["Average WER"]
|
173 |
-
.first()
|
174 |
-
)
|
175 |
-
performance_df["Parity %"] = performance_df.apply(
|
176 |
-
lambda row: calculate_parity(m2_ultra_wer, row), axis=1
|
177 |
-
)
|
178 |
-
|
179 |
# Process model names for display
|
180 |
model_df["model_raw"] = model_df["Model"].copy()
|
181 |
performance_df["model_raw"] = performance_df["Model"].copy()
|
@@ -273,7 +293,6 @@ def performance_filter(
|
|
273 |
else:
|
274 |
filtered_df = pd.DataFrame(columns=filtered_df.columns)
|
275 |
|
276 |
-
|
277 |
# Filter by operating systems
|
278 |
filtered_df = (
|
279 |
filtered_df[
|
@@ -287,7 +306,6 @@ def performance_filter(
|
|
287 |
else pd.DataFrame(columns=filtered_df.columns)
|
288 |
)
|
289 |
|
290 |
-
|
291 |
# Apply short-form and long-form speed and tokens per second filters
|
292 |
min_short_speed, max_short_speed = short_speed_slider
|
293 |
min_long_speed, max_long_speed = long_speed_slider
|
@@ -377,23 +395,6 @@ text_diff_elems = []
|
|
377 |
|
378 |
tabs = gr.Tabs(elem_id="tab-elems")
|
379 |
|
380 |
-
multilingual_df = pd.read_csv("dashboard_data/multilingual_results.csv")
|
381 |
-
multilingual_models_df = multilingual_df[["Model"]].drop_duplicates()
|
382 |
-
multilingual_models_buttons = []
|
383 |
-
for model in multilingual_models_df["Model"]:
|
384 |
-
elem_id = (
|
385 |
-
f"{model}".replace(" ", "_").replace('"', "").replace("'", "").replace(",", "")
|
386 |
-
)
|
387 |
-
multilingual_models_buttons.append(
|
388 |
-
gr.Button(value=model, elem_id=elem_id, visible=False)
|
389 |
-
)
|
390 |
-
multilingual_models_df["Model"] = multilingual_models_df["Model"].apply(
|
391 |
-
lambda x: make_multilingual_model_clickable_link(x)
|
392 |
-
)
|
393 |
-
|
394 |
-
with open("dashboard_data/multilingual_confusion_matrices.json", "r") as file:
|
395 |
-
confusion_matrix_map = dict(json.load(file))
|
396 |
-
|
397 |
|
398 |
def update_multilingual_results(selected_model):
|
399 |
"""
|
@@ -484,7 +485,6 @@ def update_multilingual_results(selected_model):
|
|
484 |
gr.update(visible=unforced_plot is not None, value=unforced_plot),
|
485 |
]
|
486 |
|
487 |
-
|
488 |
font = [
|
489 |
"Zwizz Regular", # Local font
|
490 |
"IBM Plex Mono", # Monospace font
|
|
|
87 |
.reset_index(drop=True)
|
88 |
)
|
89 |
|
90 |
+
multilingual_df = pd.read_csv("dashboard_data/multilingual_results.csv")
|
91 |
+
multilingual_models_df = multilingual_df[["Model"]].drop_duplicates()
|
92 |
+
multilingual_models_buttons = []
|
93 |
+
for model in multilingual_models_df["Model"]:
|
94 |
+
elem_id = (
|
95 |
+
f"{model}".replace(" ", "_").replace('"', "").replace("'", "").replace(",", "")
|
96 |
+
)
|
97 |
+
multilingual_models_buttons.append(
|
98 |
+
gr.Button(value=model, elem_id=elem_id, visible=False)
|
99 |
+
)
|
100 |
+
multilingual_models_df["Model"] = multilingual_models_df["Model"].apply(
|
101 |
+
lambda x: make_multilingual_model_clickable_link(x)
|
102 |
+
)
|
103 |
+
|
104 |
+
with open("dashboard_data/multilingual_confusion_matrices.json", "r") as file:
|
105 |
+
confusion_matrix_map = dict(json.load(file))
|
106 |
+
|
107 |
+
# Create a mapping of model to average WER
|
108 |
+
model_to_english_wer = dict(zip(sorted_quality_df["model"], sorted_quality_df["average_wer"]))
|
109 |
+
model_to_multilingual_wer = dict(
|
110 |
+
zip(multilingual_df["Model"], multilingual_df["Average WER"])
|
111 |
+
)
|
112 |
+
|
113 |
+
# Add English WER and Multilingual WER to performance_df
|
114 |
+
benchmark_df["english_wer"] = benchmark_df["model"].map(model_to_english_wer)
|
115 |
+
benchmark_df["multilingual_wer"] = benchmark_df["model"].map(model_to_multilingual_wer)
|
116 |
+
benchmark_df.fillna({"multilingual_wer": "English-only model"}, inplace=True)
|
117 |
+
benchmark_df["multilingual_wer"] = benchmark_df["multilingual_wer"].astype(str)
|
118 |
+
|
119 |
sorted_performance_df = (
|
120 |
benchmark_df.assign(model_len=benchmark_df["model"].str.len())
|
121 |
.sort_values(
|
|
|
153 |
"model",
|
154 |
"device",
|
155 |
"os",
|
156 |
+
"english_wer",
|
157 |
+
"multilingual_wer",
|
158 |
"qoi",
|
159 |
"speed",
|
160 |
"tokens_per_second",
|
|
|
196 |
}
|
197 |
)
|
198 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
199 |
# Process model names for display
|
200 |
model_df["model_raw"] = model_df["Model"].copy()
|
201 |
performance_df["model_raw"] = performance_df["Model"].copy()
|
|
|
293 |
else:
|
294 |
filtered_df = pd.DataFrame(columns=filtered_df.columns)
|
295 |
|
|
|
296 |
# Filter by operating systems
|
297 |
filtered_df = (
|
298 |
filtered_df[
|
|
|
306 |
else pd.DataFrame(columns=filtered_df.columns)
|
307 |
)
|
308 |
|
|
|
309 |
# Apply short-form and long-form speed and tokens per second filters
|
310 |
min_short_speed, max_short_speed = short_speed_slider
|
311 |
min_long_speed, max_long_speed = long_speed_slider
|
|
|
395 |
|
396 |
tabs = gr.Tabs(elem_id="tab-elems")
|
397 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
398 |
|
399 |
def update_multilingual_results(selected_model):
|
400 |
"""
|
|
|
485 |
gr.update(visible=unforced_plot is not None, value=unforced_plot),
|
486 |
]
|
487 |
|
|
|
488 |
font = [
|
489 |
"Zwizz Regular", # Local font
|
490 |
"IBM Plex Mono", # Monospace font
|
multilingual_generate.py
CHANGED
@@ -92,6 +92,7 @@ def calculate_and_save_results(results, confusion_matrices):
|
|
92 |
wer_data = []
|
93 |
for key, data in results.items():
|
94 |
model, forced = key.rsplit("/", 1)
|
|
|
95 |
row = {
|
96 |
"Model": model,
|
97 |
"Forced Tokens": forced == "forced",
|
|
|
92 |
wer_data = []
|
93 |
for key, data in results.items():
|
94 |
model, forced = key.rsplit("/", 1)
|
95 |
+
model = model.replace("_", "/")
|
96 |
row = {
|
97 |
"Model": model,
|
98 |
"Forced Tokens": forced == "forced",
|
performance_generate.py
CHANGED
@@ -13,7 +13,7 @@ import requests
|
|
13 |
|
14 |
from constants import BASE_WHISPERKIT_BENCHMARK_URL
|
15 |
from text_normalizer import text_normalizer
|
16 |
-
from utils import compute_average_wer,
|
17 |
|
18 |
|
19 |
def fetch_evaluation_data(url):
|
@@ -80,7 +80,7 @@ def get_device_name(device):
|
|
80 |
return device_map.get(device, device).replace(" ", "_")
|
81 |
|
82 |
|
83 |
-
def process_benchmark_file(file_path, dataset_dfs, results):
|
84 |
"""
|
85 |
Processes a single benchmark file and updates the results dictionary.
|
86 |
|
@@ -97,6 +97,12 @@ def process_benchmark_file(file_path, dataset_dfs, results):
|
|
97 |
|
98 |
if len(test_results) == 0:
|
99 |
return
|
|
|
|
|
|
|
|
|
|
|
|
|
100 |
|
101 |
first_test_result = test_results[0]
|
102 |
model = first_test_result["testInfo"]["model"]
|
@@ -110,8 +116,6 @@ def process_benchmark_file(file_path, dataset_dfs, results):
|
|
110 |
else:
|
111 |
os_info = f"macOS_{first_test_result['staticAttributes']['osVersion']}"
|
112 |
timestamp = first_test_result["testInfo"]["date"]
|
113 |
-
commit_hash_timestamp = file_path.split("/")[-2]
|
114 |
-
commit_timestamp, commit_hash = commit_hash_timestamp.split("_")
|
115 |
|
116 |
key = (model, device, os_info, commit_timestamp)
|
117 |
dataset_name = dataset_dir
|
@@ -127,7 +131,6 @@ def process_benchmark_file(file_path, dataset_dfs, results):
|
|
127 |
}
|
128 |
results[key]["timestamp"] = timestamp
|
129 |
results[key]["average_wer"].append(wer_entry)
|
130 |
-
results[key]["dataset_wer"][dataset_name].append(wer_entry)
|
131 |
|
132 |
input_audio_seconds = test_info["timings"]["inputAudioSeconds"]
|
133 |
full_pipeline = test_info["timings"]["fullPipeline"]
|
@@ -163,10 +166,8 @@ def process_benchmark_file(file_path, dataset_dfs, results):
|
|
163 |
|
164 |
results[key]["qoi"].append(1 if prediction_wer <= reference_wer else 0)
|
165 |
|
166 |
-
return key, dataset_name
|
167 |
-
|
168 |
|
169 |
-
def process_summary_file(file_path, results):
|
170 |
"""
|
171 |
Processes a summary file and updates the results dictionary with device support information.
|
172 |
|
@@ -179,6 +180,9 @@ def process_summary_file(file_path, results):
|
|
179 |
"""
|
180 |
with open(file_path, "r") as file:
|
181 |
summary_data = json.load(file)
|
|
|
|
|
|
|
182 |
|
183 |
device = summary_data["deviceIdentifier"]
|
184 |
os = f"{'iPadOS' if 'iPad' in device else summary_data['osType']} {summary_data['osVersion']}"
|
@@ -270,10 +274,6 @@ def calculate_and_save_performance_results(
|
|
270 |
for dataset, tps_info in data["dataset_tokens_per_second"].items()
|
271 |
},
|
272 |
"average_wer": compute_average_wer(data["average_wer"]),
|
273 |
-
"dataset_wer": {
|
274 |
-
dataset: compute_average_wer(wer)
|
275 |
-
for dataset, wer in data["dataset_wer"].items()
|
276 |
-
},
|
277 |
"qoi": round(mean(data["qoi"]), 2),
|
278 |
"commit_hash": data["commit_hash"],
|
279 |
"commit_timestamp": data["commit_timestamp"],
|
@@ -423,7 +423,6 @@ def main():
|
|
423 |
performance_results = defaultdict(
|
424 |
lambda: {
|
425 |
"average_wer": [],
|
426 |
-
"dataset_wer": defaultdict(list),
|
427 |
"qoi": [],
|
428 |
"speed": {"inputAudioSeconds": 0, "fullPipeline": 0},
|
429 |
"tokens_per_second": {"totalDecodingLoops": 0, "fullPipeline": 0},
|
@@ -443,15 +442,19 @@ def main():
|
|
443 |
|
444 |
generate_device_map(source_xcresult_directory)
|
445 |
|
|
|
|
|
|
|
|
|
446 |
for subdir, _, files in os.walk(source_xcresult_directory):
|
447 |
for filename in files:
|
448 |
file_path = os.path.join(subdir, filename)
|
449 |
if not filename.endswith(".json"):
|
450 |
continue
|
451 |
elif "summary" in filename:
|
452 |
-
process_summary_file(file_path, support_results)
|
453 |
else:
|
454 |
-
process_benchmark_file(file_path, dataset_dfs, performance_results)
|
455 |
|
456 |
not_supported = calculate_and_save_performance_results(
|
457 |
performance_results, "dashboard_data/performance_data.json"
|
|
|
13 |
|
14 |
from constants import BASE_WHISPERKIT_BENCHMARK_URL
|
15 |
from text_normalizer import text_normalizer
|
16 |
+
from utils import compute_average_wer, download_dataset
|
17 |
|
18 |
|
19 |
def fetch_evaluation_data(url):
|
|
|
80 |
return device_map.get(device, device).replace(" ", "_")
|
81 |
|
82 |
|
83 |
+
def process_benchmark_file(file_path, dataset_dfs, results, releases):
|
84 |
"""
|
85 |
Processes a single benchmark file and updates the results dictionary.
|
86 |
|
|
|
97 |
|
98 |
if len(test_results) == 0:
|
99 |
return
|
100 |
+
|
101 |
+
commit_hash_timestamp = file_path.split("/")[-2]
|
102 |
+
commit_timestamp, commit_hash = commit_hash_timestamp.split("_")
|
103 |
+
|
104 |
+
if commit_hash not in releases:
|
105 |
+
return
|
106 |
|
107 |
first_test_result = test_results[0]
|
108 |
model = first_test_result["testInfo"]["model"]
|
|
|
116 |
else:
|
117 |
os_info = f"macOS_{first_test_result['staticAttributes']['osVersion']}"
|
118 |
timestamp = first_test_result["testInfo"]["date"]
|
|
|
|
|
119 |
|
120 |
key = (model, device, os_info, commit_timestamp)
|
121 |
dataset_name = dataset_dir
|
|
|
131 |
}
|
132 |
results[key]["timestamp"] = timestamp
|
133 |
results[key]["average_wer"].append(wer_entry)
|
|
|
134 |
|
135 |
input_audio_seconds = test_info["timings"]["inputAudioSeconds"]
|
136 |
full_pipeline = test_info["timings"]["fullPipeline"]
|
|
|
166 |
|
167 |
results[key]["qoi"].append(1 if prediction_wer <= reference_wer else 0)
|
168 |
|
|
|
|
|
169 |
|
170 |
+
def process_summary_file(file_path, results, releases):
|
171 |
"""
|
172 |
Processes a summary file and updates the results dictionary with device support information.
|
173 |
|
|
|
180 |
"""
|
181 |
with open(file_path, "r") as file:
|
182 |
summary_data = json.load(file)
|
183 |
+
|
184 |
+
if summary_data["commitHash"] not in releases:
|
185 |
+
return
|
186 |
|
187 |
device = summary_data["deviceIdentifier"]
|
188 |
os = f"{'iPadOS' if 'iPad' in device else summary_data['osType']} {summary_data['osVersion']}"
|
|
|
274 |
for dataset, tps_info in data["dataset_tokens_per_second"].items()
|
275 |
},
|
276 |
"average_wer": compute_average_wer(data["average_wer"]),
|
|
|
|
|
|
|
|
|
277 |
"qoi": round(mean(data["qoi"]), 2),
|
278 |
"commit_hash": data["commit_hash"],
|
279 |
"commit_timestamp": data["commit_timestamp"],
|
|
|
423 |
performance_results = defaultdict(
|
424 |
lambda: {
|
425 |
"average_wer": [],
|
|
|
426 |
"qoi": [],
|
427 |
"speed": {"inputAudioSeconds": 0, "fullPipeline": 0},
|
428 |
"tokens_per_second": {"totalDecodingLoops": 0, "fullPipeline": 0},
|
|
|
442 |
|
443 |
generate_device_map(source_xcresult_directory)
|
444 |
|
445 |
+
with open("dashboard_data/version.json", "r") as f:
|
446 |
+
version = json.load(f)
|
447 |
+
releases = set(version["releases"])
|
448 |
+
|
449 |
for subdir, _, files in os.walk(source_xcresult_directory):
|
450 |
for filename in files:
|
451 |
file_path = os.path.join(subdir, filename)
|
452 |
if not filename.endswith(".json"):
|
453 |
continue
|
454 |
elif "summary" in filename:
|
455 |
+
process_summary_file(file_path, support_results, releases)
|
456 |
else:
|
457 |
+
process_benchmark_file(file_path, dataset_dfs, performance_results, releases)
|
458 |
|
459 |
not_supported = calculate_and_save_performance_results(
|
460 |
performance_results, "dashboard_data/performance_data.json"
|
utils.py
CHANGED
@@ -173,7 +173,6 @@ def dir_to_json(root_dir, output_file):
|
|
173 |
if file_path.endswith(".DS_Store") or "summary" in file_path:
|
174 |
continue
|
175 |
parts = file_path.split(os.sep)
|
176 |
-
print(parts)
|
177 |
model_version = parts[2]
|
178 |
device_name = parts[3].replace("_", " ")
|
179 |
os_type_version = parts[4]
|
@@ -397,8 +396,17 @@ def plot_metric(
|
|
397 |
:param exclude_input: Optional string to exclude model-device-OS combinations
|
398 |
:return: A Plotly figure object
|
399 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
400 |
grouped = df.groupby(["model", "device", "os"])
|
401 |
-
sorted_groups = [
|
|
|
|
|
|
|
402 |
|
403 |
if filter_input:
|
404 |
filters = [f.strip().lower() for f in filter_input.split(";")]
|
@@ -551,8 +559,8 @@ def create_initial_performance_column_dict():
|
|
551 |
ColumnContent("Device", "html", True, never_hidden=True),
|
552 |
],
|
553 |
["os", ColumnContent, ColumnContent("OS", "html", True, never_hidden=True)],
|
554 |
-
["
|
555 |
-
["
|
556 |
["qoi", ColumnContent, ColumnContent("QoI", "html", False)],
|
557 |
["speed", ColumnContent, ColumnContent("Speed", "html", False)],
|
558 |
["toks", ColumnContent, ColumnContent("Tok / s", "html", False)],
|
|
|
173 |
if file_path.endswith(".DS_Store") or "summary" in file_path:
|
174 |
continue
|
175 |
parts = file_path.split(os.sep)
|
|
|
176 |
model_version = parts[2]
|
177 |
device_name = parts[3].replace("_", " ")
|
178 |
os_type_version = parts[4]
|
|
|
396 |
:param exclude_input: Optional string to exclude model-device-OS combinations
|
397 |
:return: A Plotly figure object
|
398 |
"""
|
399 |
+
with open("dashboard_data/version.json", "r") as f:
|
400 |
+
version = json.load(f)
|
401 |
+
releases = set(version["releases"])
|
402 |
+
|
403 |
+
df = df[df["commit_hash"].isin(releases)]
|
404 |
+
|
405 |
grouped = df.groupby(["model", "device", "os"])
|
406 |
+
sorted_groups = [
|
407 |
+
group.sort_values("commit_timestamp")
|
408 |
+
for _, group in grouped
|
409 |
+
]
|
410 |
|
411 |
if filter_input:
|
412 |
filters = [f.strip().lower() for f in filter_input.split(";")]
|
|
|
559 |
ColumnContent("Device", "html", True, never_hidden=True),
|
560 |
],
|
561 |
["os", ColumnContent, ColumnContent("OS", "html", True, never_hidden=True)],
|
562 |
+
["english_wer", ColumnContent, ColumnContent("English WER", "html", True)],
|
563 |
+
["multilingual_wer", ColumnContent, ColumnContent("Multilingual WER", "str", True)],
|
564 |
["qoi", ColumnContent, ColumnContent("QoI", "html", False)],
|
565 |
["speed", ColumnContent, ColumnContent("Speed", "html", False)],
|
566 |
["toks", ColumnContent, ColumnContent("Tok / s", "html", False)],
|