Spaces:
Running
Running
Upload folder using huggingface_hub
Browse files- results/bleu/st.csv +4 -7
- results/llama3_70b_judge/ac.csv +3 -6
- results/llama3_70b_judge/aqa.csv +3 -6
- results/llama3_70b_judge/ar.csv +3 -6
- results/llama3_70b_judge/si.csv +3 -6
- results/llama3_70b_judge/sqa.csv +3 -6
- results/llama3_70b_judge_binary/er.csv +6 -9
- results/llama3_70b_judge_binary/gr.csv +5 -8
- results/llama3_70b_judge_binary/sqa.csv +3 -6
- results/meteor/ac.csv +3 -6
- results/wer/asr.csv +6 -9
- results/wer/cnasr.csv +5 -8
results/bleu/st.csv
CHANGED
@@ -1,10 +1,7 @@
|
|
1 |
Model,covost2_en_id_test,covost2_en_zh_test,covost2_en_ta_test,covost2_id_en_test,covost2_zh_en_test,covost2_ta_en_test
|
2 |
-
|
|
|
3 |
wavllm_fairseq,13.841886973016162,31.96381187282953,0.0033159224040994286,5.933522277713613,2.368659001743569,0.1695522548322915
|
|
|
4 |
Qwen2-Audio-7B-Instruct,16.325186897428104,25.765420247070075,0.03245972071872916,6.326113431899141,16.466557744958333,0.04425838146050298
|
5 |
-
|
6 |
-
whisper_large_v3_with_llama_3_8b_instruct,,,,,,
|
7 |
-
mowe_audio,,,,,,
|
8 |
-
qwen_audio_chat,4.121393814607146,15.307536203629063,0.030392249793261363,0.5089433695608178,10.019503192969147,0.018955183488794443
|
9 |
-
meralion_audiollm_v1_mse,3.4633065594678443,4.974426298395461,0.19029087365857586,2.2174837053079424,1.1308873699909163,0.039170054539659147
|
10 |
-
stage2_whisper3_fft_mlp100_gemma2_9b_lora,31.486376219193552,37.72055065660984,7.85766371365086,31.30757966907217,14.725914408637212,3.025625434709311
|
|
|
1 |
Model,covost2_en_id_test,covost2_en_zh_test,covost2_en_ta_test,covost2_id_en_test,covost2_zh_en_test,covost2_ta_en_test
|
2 |
+
MERaLiON_AudioLLM_v1,32.62336432354048,37.983678588088225,8.496204861105712,37.072090507731545,15.012007824033613,3.9734959499000135
|
3 |
+
Qwen-Audio-Chat,,,,,,
|
4 |
wavllm_fairseq,13.841886973016162,31.96381187282953,0.0033159224040994286,5.933522277713613,2.368659001743569,0.1695522548322915
|
5 |
+
SALMONN_7B,,,,26.89649039333571,5.296039450108202,0.3649023706010388
|
6 |
Qwen2-Audio-7B-Instruct,16.325186897428104,25.765420247070075,0.03245972071872916,6.326113431899141,16.466557744958333,0.04425838146050298
|
7 |
+
whisper_large_v3_with_llama_3_8b_instruct,10.930203684508578,5.987143868370054,,46.79924664837527,14.154700735606419,2.4245628096245917
|
|
|
|
|
|
|
|
|
|
results/llama3_70b_judge/ac.csv
CHANGED
@@ -1,10 +1,7 @@
|
|
1 |
Model,audiocaps_test,wavcaps_test
|
2 |
-
|
|
|
3 |
wavllm_fairseq,5.5,6.901734104046243
|
|
|
4 |
Qwen2-Audio-7B-Instruct,40.77727272727273,33.78034682080925
|
5 |
-
meralion_audiollm_v1_lora,38.972727272727276,28.080924855491332
|
6 |
whisper_large_v3_with_llama_3_8b_instruct,2.4727272727272727,3.445086705202312
|
7 |
-
mowe_audio,41.20909090909091,23.294797687861273
|
8 |
-
qwen_audio_chat,43.82272727272727,29.248554913294797
|
9 |
-
meralion_audiollm_v1_mse,4.2,5.144508670520231
|
10 |
-
stage2_whisper3_fft_mlp100_gemma2_9b_lora,32.236363636363635,25.14450867052023
|
|
|
1 |
Model,audiocaps_test,wavcaps_test
|
2 |
+
MERaLiON_AudioLLM_v1,35.04090909090909,29.028901734104046
|
3 |
+
Qwen-Audio-Chat,,
|
4 |
wavllm_fairseq,5.5,6.901734104046243
|
5 |
+
SALMONN_7B,,
|
6 |
Qwen2-Audio-7B-Instruct,40.77727272727273,33.78034682080925
|
|
|
7 |
whisper_large_v3_with_llama_3_8b_instruct,2.4727272727272727,3.445086705202312
|
|
|
|
|
|
|
|
results/llama3_70b_judge/aqa.csv
CHANGED
@@ -1,10 +1,7 @@
|
|
1 |
Model,clotho_aqa_test,audiocaps_qa_test,wavcaps_qa_test
|
2 |
-
|
|
|
3 |
wavllm_fairseq,43.01199466903598,29.840255591054312,26.25
|
|
|
4 |
Qwen2-Audio-7B-Instruct,50.919591292758774,45.75079872204473,44.473684210526315
|
5 |
-
meralion_audiollm_v1_lora,61.57510938259601,47.6038338658147,41.25
|
6 |
whisper_large_v3_with_llama_3_8b_instruct,29.47134606841404,17.380191693290733,16.710526315789473
|
7 |
-
mowe_audio,62.221235006663704,32.97124600638978,28.88157894736842
|
8 |
-
qwen_audio_chat,58.196357174589075,47.98722044728435,38.68421052631579
|
9 |
-
meralion_audiollm_v1_mse,,19.233226837060702,15.723684210526317
|
10 |
-
stage2_whisper3_fft_mlp100_gemma2_9b_lora,55.303840544482256,38.65814696485623,34.53947368421053
|
|
|
1 |
Model,clotho_aqa_test,audiocaps_qa_test,wavcaps_qa_test
|
2 |
+
MERaLiON_AudioLLM_v1,59.902771025765674,45.11182108626198,38.09210526315789
|
3 |
+
Qwen-Audio-Chat,,,
|
4 |
wavllm_fairseq,43.01199466903598,29.840255591054312,26.25
|
5 |
+
SALMONN_7B,,50.287539936102235,47.30263157894737
|
6 |
Qwen2-Audio-7B-Instruct,50.919591292758774,45.75079872204473,44.473684210526315
|
|
|
7 |
whisper_large_v3_with_llama_3_8b_instruct,29.47134606841404,17.380191693290733,16.710526315789473
|
|
|
|
|
|
|
|
results/llama3_70b_judge/ar.csv
CHANGED
@@ -1,10 +1,7 @@
|
|
1 |
Model,voxceleb_accent_test
|
2 |
-
|
|
|
3 |
wavllm_fairseq,
|
|
|
4 |
Qwen2-Audio-7B-Instruct,29.187525646286417
|
5 |
-
meralion_audiollm_v1_lora,54.89946655724252
|
6 |
whisper_large_v3_with_llama_3_8b_instruct,39.32704144439885
|
7 |
-
mowe_audio,23.68485843249897
|
8 |
-
qwen_audio_chat,45.699630693475584
|
9 |
-
meralion_audiollm_v1_mse,62.83545342634387
|
10 |
-
stage2_whisper3_fft_mlp100_gemma2_9b_lora,
|
|
|
1 |
Model,voxceleb_accent_test
|
2 |
+
MERaLiON_AudioLLM_v1,46.335658596635206
|
3 |
+
Qwen-Audio-Chat,
|
4 |
wavllm_fairseq,
|
5 |
+
SALMONN_7B,
|
6 |
Qwen2-Audio-7B-Instruct,29.187525646286417
|
|
|
7 |
whisper_large_v3_with_llama_3_8b_instruct,39.32704144439885
|
|
|
|
|
|
|
|
results/llama3_70b_judge/si.csv
CHANGED
@@ -1,10 +1,7 @@
|
|
1 |
Model,openhermes_audio_test,alpaca_audio_test
|
2 |
-
|
|
|
3 |
wavllm_fairseq,22.400000000000002,21.6
|
|
|
4 |
Qwen2-Audio-7B-Instruct,44.800000000000004,52.599999999999994
|
5 |
-
meralion_audiollm_v1_lora,65.0,71.39999999999999
|
6 |
whisper_large_v3_with_llama_3_8b_instruct,63.0,70.8
|
7 |
-
mowe_audio,16.0,19.8
|
8 |
-
qwen_audio_chat,11.0,9.6
|
9 |
-
meralion_audiollm_v1_mse,40.0,38.8
|
10 |
-
stage2_whisper3_fft_mlp100_gemma2_9b_lora,63.6,69.2
|
|
|
1 |
Model,openhermes_audio_test,alpaca_audio_test
|
2 |
+
MERaLiON_AudioLLM_v1,71.39999999999999,73.4
|
3 |
+
Qwen-Audio-Chat,,
|
4 |
wavllm_fairseq,22.400000000000002,21.6
|
5 |
+
SALMONN_7B,15.8,17.2
|
6 |
Qwen2-Audio-7B-Instruct,44.800000000000004,52.599999999999994
|
|
|
7 |
whisper_large_v3_with_llama_3_8b_instruct,63.0,70.8
|
|
|
|
|
|
|
|
results/llama3_70b_judge/sqa.csv
CHANGED
@@ -1,10 +1,7 @@
|
|
1 |
Model,slue_p2_sqa5_test,public_sg_speech_qa_test,spoken_squad_test
|
2 |
-
|
|
|
3 |
wavllm_fairseq,83.92156862745098,58.54651162790698,77.64903756307233
|
|
|
4 |
Qwen2-Audio-7B-Instruct,80.04901960784315,58.31395348837209,64.86264249672958
|
5 |
-
meralion_audiollm_v1_lora,81.17647058823529,53.51744186046511,66.30162586432442
|
6 |
whisper_large_v3_with_llama_3_8b_instruct,82.99019607843137,64.94186046511628,83.81984675761541
|
7 |
-
mowe_audio,76.5686274509804,,63.0349467389273
|
8 |
-
qwen_audio_chat,76.12745098039215,57.47093023255814,60.65408334890675
|
9 |
-
meralion_audiollm_v1_mse,82.05882352941175,,68.56288544197346
|
10 |
-
stage2_whisper3_fft_mlp100_gemma2_9b_lora,77.84313725490196,53.11046511627907,
|
|
|
1 |
Model,slue_p2_sqa5_test,public_sg_speech_qa_test,spoken_squad_test
|
2 |
+
MERaLiON_AudioLLM_v1,82.94117647058825,60.31976744186046,70.32704167445337
|
3 |
+
Qwen-Audio-Chat,,,
|
4 |
wavllm_fairseq,83.92156862745098,58.54651162790698,77.64903756307233
|
5 |
+
SALMONN_7B,83.48039215686273,59.24418604651163,
|
6 |
Qwen2-Audio-7B-Instruct,80.04901960784315,58.31395348837209,64.86264249672958
|
|
|
7 |
whisper_large_v3_with_llama_3_8b_instruct,82.99019607843137,64.94186046511628,83.81984675761541
|
|
|
|
|
|
|
|
results/llama3_70b_judge_binary/er.csv
CHANGED
@@ -1,10 +1,7 @@
|
|
1 |
Model,iemocap_emotion_test,meld_sentiment_test,meld_emotion_test
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
qwen_audio_chat,27.345309381237527,43.86973180076628,50.57471264367817
|
9 |
-
meralion_audiollm_v1_mse,75.3493013972056,,79.88505747126436
|
10 |
-
stage2_whisper3_fft_mlp100_gemma2_9b_lora,33.63273453093812,47.43295019157088,37.47126436781609
|
|
|
1 |
Model,iemocap_emotion_test,meld_sentiment_test,meld_emotion_test
|
2 |
+
MERaLiON_AudioLLM_v1,43.72509960159363,42.26053639846744,30.15325670498084
|
3 |
+
Qwen-Audio-Chat,,,
|
4 |
+
wavllm_fairseq,,50.076628352490424,41.0727969348659
|
5 |
+
SALMONN_7B,23.904382470119522,,
|
6 |
+
Qwen2-Audio-7B-Instruct,,53.486590038314176,40.536398467432946
|
7 |
+
whisper_large_v3_with_llama_3_8b_instruct,46.31474103585657,43.86973180076628,33.25670498084291
|
|
|
|
|
|
results/llama3_70b_judge_binary/gr.csv
CHANGED
@@ -1,10 +1,7 @@
|
|
1 |
Model,voxceleb_gender_test,iemocap_gender_test
|
2 |
-
|
|
|
3 |
wavllm_fairseq,69.67583093967994,
|
4 |
-
|
5 |
-
|
6 |
-
whisper_large_v3_with_llama_3_8b_instruct,53.40582683627411,
|
7 |
-
mowe_audio,81.20640131308986,37.82435129740519
|
8 |
-
qwen_audio_chat,70.55806319244974,51.796407185628745
|
9 |
-
meralion_audiollm_v1_mse,47.250718096019696,
|
10 |
-
stage2_whisper3_fft_mlp100_gemma2_9b_lora,85.06360279031597,48.60279441117765
|
|
|
1 |
Model,voxceleb_gender_test,iemocap_gender_test
|
2 |
+
MERaLiON_AudioLLM_v1,99.52810832991383,96.81274900398407
|
3 |
+
Qwen-Audio-Chat,,
|
4 |
wavllm_fairseq,69.67583093967994,
|
5 |
+
SALMONN_7B,,
|
6 |
+
Qwen2-Audio-7B-Instruct,99.1177677472302,98.40637450199203
|
7 |
+
whisper_large_v3_with_llama_3_8b_instruct,53.40582683627411,46.41434262948207
|
|
|
|
|
|
|
|
results/llama3_70b_judge_binary/sqa.csv
CHANGED
@@ -1,10 +1,7 @@
|
|
1 |
Model,cn_college_listen_mcq_test,dream_tts_mcq_test
|
2 |
-
|
|
|
3 |
wavllm_fairseq,65.43372963452224,64.55828541557763
|
|
|
4 |
Qwen2-Audio-7B-Instruct,74.50462351387054,66.70151594354418
|
5 |
-
meralion_audiollm_v1_lora,87.27432848965213,76.94720334553058
|
6 |
whisper_large_v3_with_llama_3_8b_instruct,85.24878907970057,86.0951385258756
|
7 |
-
mowe_audio,75.3412593571114,
|
8 |
-
qwen_audio_chat,60.85424922941436,57.76267642446419
|
9 |
-
meralion_audiollm_v1_mse,84.36811977102599,78.46314688970205
|
10 |
-
stage2_whisper3_fft_mlp100_gemma2_9b_lora,83.92778511668868,80.39728175640354
|
|
|
1 |
Model,cn_college_listen_mcq_test,dream_tts_mcq_test
|
2 |
+
MERaLiON_AudioLLM_v1,85.02862175253192,79.09043387349712
|
3 |
+
Qwen-Audio-Chat,,
|
4 |
wavllm_fairseq,65.43372963452224,64.55828541557763
|
5 |
+
SALMONN_7B,,56.821745948771564
|
6 |
Qwen2-Audio-7B-Instruct,74.50462351387054,66.70151594354418
|
|
|
7 |
whisper_large_v3_with_llama_3_8b_instruct,85.24878907970057,86.0951385258756
|
|
|
|
|
|
|
|
results/meteor/ac.csv
CHANGED
@@ -1,10 +1,7 @@
|
|
1 |
Model,audiocaps_test,wavcaps_test
|
2 |
-
|
|
|
3 |
wavllm_fairseq,,
|
|
|
4 |
Qwen2-Audio-7B-Instruct,,
|
5 |
-
meralion_audiollm_v1_lora,,
|
6 |
whisper_large_v3_with_llama_3_8b_instruct,,
|
7 |
-
mowe_audio,,
|
8 |
-
qwen_audio_chat,,
|
9 |
-
meralion_audiollm_v1_mse,,
|
10 |
-
stage2_whisper3_fft_mlp100_gemma2_9b_lora,,
|
|
|
1 |
Model,audiocaps_test,wavcaps_test
|
2 |
+
MERaLiON_AudioLLM_v1,,
|
3 |
+
Qwen-Audio-Chat,,
|
4 |
wavllm_fairseq,,
|
5 |
+
SALMONN_7B,,
|
6 |
Qwen2-Audio-7B-Instruct,,
|
|
|
7 |
whisper_large_v3_with_llama_3_8b_instruct,,
|
|
|
|
|
|
|
|
results/wer/asr.csv
CHANGED
@@ -1,10 +1,7 @@
|
|
1 |
Model,librispeech_test_clean,librispeech_test_other,common_voice_15_en_test,peoples_speech_test,gigaspeech_test,earnings21_test,earnings22_test,tedlium3_test,tedlium3_long_form_test,imda_part1_asr_test,imda_part2_asr_test
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
|
6 |
-
|
7 |
-
|
8 |
-
qwen_audio_chat,0.02246576376051609,0.04163277659031154,0.11645169375518152,0.30723956433545313,0.13316235845059654,0.38460383986837277,0.511809498898483,0.03995599872254356,0.45289842226555577,0.08434005911836524,0.5057046105756707
|
9 |
-
meralion_audiollm_v1_mse,0.45899196438676576,0.5464089129324531,1.0056747707276945,0.811818861521855,0.4370689590337161,0.3729613900901602,0.4129325088937485,0.3558425889783897,0.24396383619925546,0.4923371032001028,1.869158635061214
|
10 |
-
stage2_whisper3_fft_mlp100_gemma2_9b_lora,0.022031916097634587,0.04416743904515104,0.09438242684815651,0.25788482518786754,0.10128130883529002,0.15378928527754,0.20857550992683252,0.040701181647209116,0.058677539443361106,0.015052692455982522,0.044282365199270646
|
|
|
1 |
Model,librispeech_test_clean,librispeech_test_other,common_voice_15_en_test,peoples_speech_test,gigaspeech_test,earnings21_test,earnings22_test,tedlium3_test,tedlium3_long_form_test,imda_part1_asr_test,imda_part2_asr_test
|
2 |
+
MERaLiON_AudioLLM_v1,0.0316520164484853,0.05489246599958386,0.0987204452413545,0.2529415939486267,0.16893672174464575,0.17487020711276138,0.20473616818080898,0.0931123806820198,0.12051054777521716,0.051617779728378804,0.05437870302735168
|
3 |
+
Qwen-Audio-Chat,,,,,,,,,,,
|
4 |
+
wavllm_fairseq,0.02103218017882069,0.04798834811886432,0.14533325621300636,0.3792176325635977,0.15491778414546403,0.6447482518259942,0.6671766188447099,0.06621482559171073,0.4536784258110264,,
|
5 |
+
SALMONN_7B,0.10270871845172973,0.09671439650443565,,,,,,0.0459884319222171,0.14231519234178336,,
|
6 |
+
Qwen2-Audio-7B-Instruct,0.035141660693401744,0.060415760304159495,0.11438872500819404,0.2165498391593041,0.11723812890302816,0.18872219319407232,,0.06114048472375004,0.08739585179932637,0.07197717796796138,0.1905689473257041
|
7 |
+
whisper_large_v3_with_llama_3_8b_instruct,0.018334779492209605,0.03714982881570734,0.09876543209876543,0.14540692118393275,0.09515429104337297,0.11773910240019567,0.15611126487402763,0.038146268762641496,0.04754476156709803,0.06922195401458074,0.31912994075156237
|
|
|
|
|
|
results/wer/cnasr.csv
CHANGED
@@ -1,10 +1,7 @@
|
|
1 |
Model,aishell_asr_zh_test
|
2 |
-
|
|
|
3 |
wavllm_fairseq,0.7054601967888183
|
4 |
-
|
5 |
-
|
6 |
-
whisper_large_v3_with_llama_3_8b_instruct,
|
7 |
-
mowe_audio,1.0174318465284788
|
8 |
-
qwen_audio_chat,0.06602522222882144
|
9 |
-
meralion_audiollm_v1_mse,2.1331096196868007
|
10 |
-
stage2_whisper3_fft_mlp100_gemma2_9b_lora,0.08288293639009325
|
|
|
1 |
Model,aishell_asr_zh_test
|
2 |
+
MERaLiON_AudioLLM_v1,0.18824612460652135
|
3 |
+
Qwen-Audio-Chat,
|
4 |
wavllm_fairseq,0.7054601967888183
|
5 |
+
SALMONN_7B,0.8259290055631446
|
6 |
+
Qwen2-Audio-7B-Instruct,0.09260359129694522
|
7 |
+
whisper_large_v3_with_llama_3_8b_instruct,0.12450753301261111
|
|
|
|
|
|
|
|