Spaces:

MERaLiON
/

AudioBench-Leaderboard

Running

App Files Files Community

zhuohan-7 commited on Oct 28, 2024

Commit

7d4aeea

verified ·

1 Parent(s): 181fb78

Upload folder using huggingface_hub

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

.gitattributes +18 -0
examples/2AC/AudioCaps-Test/data-00000-of-00001.arrow +3 -0
examples/2AC/AudioCaps-Test/dataset_info.json +160 -0
examples/2AC/AudioCaps-Test/sample_0.wav +0 -0
examples/2AC/AudioCaps-Test/sample_1.wav +0 -0
examples/2AC/AudioCaps-Test/sample_2.wav +0 -0
examples/2AC/AudioCaps-Test/state.json +24 -0
examples/2AC/WavCaps-Test/data-00000-of-00001.arrow +3 -0
examples/2AC/WavCaps-Test/dataset_info.json +156 -0
examples/2AC/WavCaps-Test/sample_0.wav +0 -0
examples/2AC/WavCaps-Test/sample_1.wav +0 -0
examples/2AC/WavCaps-Test/sample_2.wav +0 -0
examples/2AC/WavCaps-Test/state.json +24 -0
examples/2AQA/AudioCaps-QA-Test/data-00000-of-00001.arrow +3 -0
examples/2AQA/AudioCaps-QA-Test/dataset_info.json +164 -0
examples/2AQA/AudioCaps-QA-Test/sample_0.wav +0 -0
examples/2AQA/AudioCaps-QA-Test/sample_1.wav +0 -0
examples/2AQA/AudioCaps-QA-Test/sample_2.wav +0 -0
examples/2AQA/AudioCaps-QA-Test/state.json +24 -0
examples/2AQA/Clotho-AQA-Test/data-00000-of-00001.arrow +3 -0
examples/2AQA/Clotho-AQA-Test/dataset_info.json +147 -0
examples/2AQA/Clotho-AQA-Test/sample_0.wav +0 -0
examples/2AQA/Clotho-AQA-Test/sample_1.wav +0 -0
examples/2AQA/Clotho-AQA-Test/sample_2.wav +0 -0
examples/2AQA/Clotho-AQA-Test/state.json +24 -0
examples/2AQA/WavCaps-QA-Test/data-00000-of-00001.arrow +3 -0
examples/2AQA/WavCaps-QA-Test/dataset_info.json +160 -0
examples/2AQA/WavCaps-QA-Test/sample_0.wav +0 -0
examples/2AQA/WavCaps-QA-Test/sample_1.wav +0 -0
examples/2AQA/WavCaps-QA-Test/sample_2.wav +0 -0
examples/2AQA/WavCaps-QA-Test/state.json +24 -0
examples/2AR/VoxCeleb-Accent-Test/data-00000-of-00001.arrow +3 -0
examples/2AR/VoxCeleb-Accent-Test/dataset_info.json +168 -0
examples/2AR/VoxCeleb-Accent-Test/sample_0.wav +0 -0
examples/2AR/VoxCeleb-Accent-Test/sample_1.wav +0 -0
examples/2AR/VoxCeleb-Accent-Test/sample_2.wav +0 -0
examples/2AR/VoxCeleb-Accent-Test/state.json +24 -0
examples/2ASR/Common-Voice-15-En-Test/data-00000-of-00001.arrow +3 -0
examples/2ASR/Common-Voice-15-En-Test/dataset_info.json +188 -0
examples/2ASR/Common-Voice-15-En-Test/sample_0.wav +0 -0
examples/2ASR/Common-Voice-15-En-Test/sample_1.wav +0 -0
examples/2ASR/Common-Voice-15-En-Test/sample_2.wav +0 -0
examples/2ASR/Common-Voice-15-En-Test/state.json +24 -0
examples/2ASR/Earnings21-Test/data-00000-of-00001.arrow +3 -0
examples/2ASR/Earnings21-Test/dataset_info.json +152 -0
examples/2ASR/Earnings21-Test/sample_0.wav +3 -0
examples/2ASR/Earnings21-Test/sample_1.wav +3 -0
examples/2ASR/Earnings21-Test/sample_2.wav +3 -0
examples/2ASR/Earnings21-Test/state.json +24 -0
examples/2ASR/Earnings22-Test/data-00000-of-00001.arrow +3 -0

.gitattributes CHANGED Viewed

@@ -60,3 +60,21 @@ examples/SQA/CN-College-Listen-MCQ-Test/sample_0.wav filter=lfs diff=lfs merge=l
 examples/SQA/Spoken-Squad-Test/sample_0.wav filter=lfs diff=lfs merge=lfs -text
 examples/SQA/Spoken-Squad-Test/sample_1.wav filter=lfs diff=lfs merge=lfs -text
 examples/SQA/Spoken-Squad-Test/sample_2.wav filter=lfs diff=lfs merge=lfs -text

 examples/SQA/Spoken-Squad-Test/sample_0.wav filter=lfs diff=lfs merge=lfs -text
 examples/SQA/Spoken-Squad-Test/sample_1.wav filter=lfs diff=lfs merge=lfs -text
 examples/SQA/Spoken-Squad-Test/sample_2.wav filter=lfs diff=lfs merge=lfs -text
+examples/2ASR/Earnings21-Test/sample_0.wav filter=lfs diff=lfs merge=lfs -text
+examples/2ASR/Earnings21-Test/sample_1.wav filter=lfs diff=lfs merge=lfs -text
+examples/2ASR/Earnings21-Test/sample_2.wav filter=lfs diff=lfs merge=lfs -text
+examples/2ASR/Earnings22-Test/sample_0.wav filter=lfs diff=lfs merge=lfs -text
+examples/2ASR/Earnings22-Test/sample_1.wav filter=lfs diff=lfs merge=lfs -text
+examples/2ASR/Earnings22-Test/sample_2.wav filter=lfs diff=lfs merge=lfs -text
+examples/2ASR/Tedlium3-Long-form-Test/sample_0.wav filter=lfs diff=lfs merge=lfs -text
+examples/2ASR/Tedlium3-Long-form-Test/sample_1.wav filter=lfs diff=lfs merge=lfs -text
+examples/2ASR/Tedlium3-Long-form-Test/sample_2.wav filter=lfs diff=lfs merge=lfs -text
+examples/2SQA/CN-College-Listen-MCQ-Test/sample_0.wav filter=lfs diff=lfs merge=lfs -text
+examples/2SQA/Public-SG-Speech-QA-Test/sample_0.wav filter=lfs diff=lfs merge=lfs -text
+examples/2SQA/SLUE-P2-SQA5-Test/sample_0.wav filter=lfs diff=lfs merge=lfs -text
+examples/2SQA/SLUE-P2-SQA5-Test/sample_1.wav filter=lfs diff=lfs merge=lfs -text
+examples/2SQA/SLUE-P2-SQA5-Test/sample_2.wav filter=lfs diff=lfs merge=lfs -text
+examples/2SQA/Spoken-Squad-Test/sample_0.wav filter=lfs diff=lfs merge=lfs -text
+examples/2SQA/Spoken-Squad-Test/sample_1.wav filter=lfs diff=lfs merge=lfs -text
+examples/2SQA/Spoken-Squad-Test/sample_2.wav filter=lfs diff=lfs merge=lfs -text
+examples/SQA/DREAM-TTS-MCQ-Test/sample_1.wav filter=lfs diff=lfs merge=lfs -text

examples/2AC/AudioCaps-Test/data-00000-of-00001.arrow ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:70213ecc31962d6d8bbc0e4d7ae2dd302c851a4af00f12b07735311f5f128288
+size 966216

examples/2AC/AudioCaps-Test/dataset_info.json ADDED Viewed

	@@ -0,0 +1,160 @@

+{
+  "citation": "",
+  "description": "",
+  "features": {
+    "context": {
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "audio": {
+        "sampling_rate": 16000,
+        "_type": "Audio"
+      }
+    },
+    "instruction": {
+      "audio": {
+        "dtype": "null",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "answer": {
+      "audio": {
+        "dtype": "null",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "other_attributes": {
+      "audiocap_id": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "start_time": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "youtube_id": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "salmonn_7b": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "wavllm_fairseq": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "Qwen2-Audio-7B-Instruct": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "whisper_large_v3_with_llama_3_8b_instruct": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "mowe_audio": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "qwen_audio_chat": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    }
+  },
+  "homepage": "",
+  "license": ""
+}

examples/2AC/AudioCaps-Test/sample_0.wav ADDED Viewed

Binary file (320 kB). View file

examples/2AC/AudioCaps-Test/sample_1.wav ADDED Viewed

Binary file (320 kB). View file

examples/2AC/AudioCaps-Test/sample_2.wav ADDED Viewed

Binary file (315 kB). View file

examples/2AC/AudioCaps-Test/state.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "_data_files": [
+    {
+      "filename": "data-00000-of-00001.arrow"
+    }
+  ],
+  "_fingerprint": "e736bf1821a473f3",
+  "_format_columns": [
+    "context",
+    "instruction",
+    "answer",
+    "other_attributes",
+    "salmonn_7b",
+    "wavllm_fairseq",
+    "Qwen2-Audio-7B-Instruct",
+    "whisper_large_v3_with_llama_3_8b_instruct",
+    "mowe_audio",
+    "qwen_audio_chat"
+  ],
+  "_format_kwargs": {},
+  "_format_type": null,
+  "_output_all_columns": false,
+  "_split": null
+}

examples/2AC/WavCaps-Test/data-00000-of-00001.arrow ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9636af636286b1aedad840ccef31ca7d879e824ddc6814fcb7411b7fbdf411aa
+size 812352

examples/2AC/WavCaps-Test/dataset_info.json ADDED Viewed

	@@ -0,0 +1,156 @@

+{
+  "citation": "",
+  "description": "",
+  "features": {
+    "context": {
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "audio": {
+        "sampling_rate": 16000,
+        "_type": "Audio"
+      }
+    },
+    "instruction": {
+      "audio": {
+        "dtype": "null",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "answer": {
+      "audio": {
+        "dtype": "null",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "other_attributes": {
+      "audio_path": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "duration": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "salmonn_7b": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "wavllm_fairseq": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "Qwen2-Audio-7B-Instruct": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "whisper_large_v3_with_llama_3_8b_instruct": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "mowe_audio": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "qwen_audio_chat": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    }
+  },
+  "homepage": "",
+  "license": ""
+}

examples/2AC/WavCaps-Test/sample_0.wav ADDED Viewed

Binary file (32 kB). View file

examples/2AC/WavCaps-Test/sample_1.wav ADDED Viewed

Binary file (268 kB). View file

examples/2AC/WavCaps-Test/sample_2.wav ADDED Viewed

Binary file (500 kB). View file

examples/2AC/WavCaps-Test/state.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "_data_files": [
+    {
+      "filename": "data-00000-of-00001.arrow"
+    }
+  ],
+  "_fingerprint": "742ab313af054565",
+  "_format_columns": [
+    "context",
+    "instruction",
+    "answer",
+    "other_attributes",
+    "salmonn_7b",
+    "wavllm_fairseq",
+    "Qwen2-Audio-7B-Instruct",
+    "whisper_large_v3_with_llama_3_8b_instruct",
+    "mowe_audio",
+    "qwen_audio_chat"
+  ],
+  "_format_kwargs": {},
+  "_format_type": null,
+  "_output_all_columns": false,
+  "_split": null
+}

examples/2AQA/AudioCaps-QA-Test/data-00000-of-00001.arrow ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:92b0b2fe81ee0e3a2690a444bb9b68994d89ca53db6ce174f5802293549256d1
+size 953616

examples/2AQA/AudioCaps-QA-Test/dataset_info.json ADDED Viewed

	@@ -0,0 +1,164 @@

+{
+  "citation": "",
+  "description": "",
+  "features": {
+    "context": {
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "audio": {
+        "sampling_rate": 16000,
+        "_type": "Audio"
+      }
+    },
+    "instruction": {
+      "audio": {
+        "dtype": "null",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "answer": {
+      "audio": {
+        "dtype": "null",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "other_attributes": {
+      "audiocap_id": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "caption": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "start_time": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "youtube_id": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "salmonn_7b": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "wavllm_fairseq": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "Qwen2-Audio-7B-Instruct": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "whisper_large_v3_with_llama_3_8b_instruct": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "mowe_audio": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "qwen_audio_chat": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    }
+  },
+  "homepage": "",
+  "license": ""
+}

examples/2AQA/AudioCaps-QA-Test/sample_0.wav ADDED Viewed

Binary file (320 kB). View file

examples/2AQA/AudioCaps-QA-Test/sample_1.wav ADDED Viewed

Binary file (320 kB). View file

examples/2AQA/AudioCaps-QA-Test/sample_2.wav ADDED Viewed

Binary file (302 kB). View file

examples/2AQA/AudioCaps-QA-Test/state.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "_data_files": [
+    {
+      "filename": "data-00000-of-00001.arrow"
+    }
+  ],
+  "_fingerprint": "52bc1dfcaf2a0f4b",
+  "_format_columns": [
+    "context",
+    "instruction",
+    "answer",
+    "other_attributes",
+    "salmonn_7b",
+    "wavllm_fairseq",
+    "Qwen2-Audio-7B-Instruct",
+    "whisper_large_v3_with_llama_3_8b_instruct",
+    "mowe_audio",
+    "qwen_audio_chat"
+  ],
+  "_format_kwargs": {},
+  "_format_type": null,
+  "_output_all_columns": false,
+  "_split": null
+}

examples/2AQA/Clotho-AQA-Test/data-00000-of-00001.arrow ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:08b4de88bf163bbfd2097769e6104ba8514380a7a097741d38e1ccc41d5b0f86
+size 2035832

examples/2AQA/Clotho-AQA-Test/dataset_info.json ADDED Viewed

	@@ -0,0 +1,147 @@

+{
+  "citation": "",
+  "description": "",
+  "features": {
+    "context": {
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "audio": {
+        "sampling_rate": 16000,
+        "_type": "Audio"
+      }
+    },
+    "instruction": {
+      "audio": {
+        "dtype": "null",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "answer": {
+      "audio": {
+        "dtype": "null",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "other_attributes": {},
+    "salmonn_7b": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "wavllm_fairseq": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "Qwen2-Audio-7B-Instruct": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "whisper_large_v3_with_llama_3_8b_instruct": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "mowe_audio": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "qwen_audio_chat": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    }
+  },
+  "homepage": "",
+  "license": ""
+}

examples/2AQA/Clotho-AQA-Test/sample_0.wav ADDED Viewed

Binary file (615 kB). View file

examples/2AQA/Clotho-AQA-Test/sample_1.wav ADDED Viewed

Binary file (576 kB). View file

examples/2AQA/Clotho-AQA-Test/sample_2.wav ADDED Viewed

Binary file (835 kB). View file

examples/2AQA/Clotho-AQA-Test/state.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "_data_files": [
+    {
+      "filename": "data-00000-of-00001.arrow"
+    }
+  ],
+  "_fingerprint": "e2e76326f448d7c4",
+  "_format_columns": [
+    "context",
+    "instruction",
+    "answer",
+    "other_attributes",
+    "salmonn_7b",
+    "wavllm_fairseq",
+    "Qwen2-Audio-7B-Instruct",
+    "whisper_large_v3_with_llama_3_8b_instruct",
+    "mowe_audio",
+    "qwen_audio_chat"
+  ],
+  "_format_kwargs": {},
+  "_format_type": null,
+  "_output_all_columns": false,
+  "_split": null
+}

examples/2AQA/WavCaps-QA-Test/data-00000-of-00001.arrow ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1736d5bcc9ca0d8d4847d7d000e6c6e63c73f6262177ea0391d180c40649da39
+size 837920

examples/2AQA/WavCaps-QA-Test/dataset_info.json ADDED Viewed

	@@ -0,0 +1,160 @@

+{
+  "citation": "",
+  "description": "",
+  "features": {
+    "context": {
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "audio": {
+        "sampling_rate": 16000,
+        "_type": "Audio"
+      }
+    },
+    "instruction": {
+      "audio": {
+        "dtype": "null",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "answer": {
+      "audio": {
+        "dtype": "null",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "other_attributes": {
+      "audio_path": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "caption": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "duration": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "salmonn_7b": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "wavllm_fairseq": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "Qwen2-Audio-7B-Instruct": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "whisper_large_v3_with_llama_3_8b_instruct": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "mowe_audio": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "qwen_audio_chat": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    }
+  },
+  "homepage": "",
+  "license": ""
+}

examples/2AQA/WavCaps-QA-Test/sample_0.wav ADDED Viewed

Binary file (320 kB). View file

examples/2AQA/WavCaps-QA-Test/sample_1.wav ADDED Viewed

Binary file (251 kB). View file

examples/2AQA/WavCaps-QA-Test/sample_2.wav ADDED Viewed

Binary file (256 kB). View file

examples/2AQA/WavCaps-QA-Test/state.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "_data_files": [
+    {
+      "filename": "data-00000-of-00001.arrow"
+    }
+  ],
+  "_fingerprint": "40995a6cc1fe3dc7",
+  "_format_columns": [
+    "context",
+    "instruction",
+    "answer",
+    "other_attributes",
+    "salmonn_7b",
+    "wavllm_fairseq",
+    "Qwen2-Audio-7B-Instruct",
+    "whisper_large_v3_with_llama_3_8b_instruct",
+    "mowe_audio",
+    "qwen_audio_chat"
+  ],
+  "_format_kwargs": {},
+  "_format_type": null,
+  "_output_all_columns": false,
+  "_split": null
+}

examples/2AR/VoxCeleb-Accent-Test/data-00000-of-00001.arrow ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5307150e5b08f9bfd7b81d56ca7ffbee1d731d002417d20fc3ec0713bc764533
+size 730864

examples/2AR/VoxCeleb-Accent-Test/dataset_info.json ADDED Viewed

	@@ -0,0 +1,168 @@

+{
+  "citation": "",
+  "description": "",
+  "features": {
+    "context": {
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "audio": {
+        "sampling_rate": 16000,
+        "_type": "Audio"
+      }
+    },
+    "instruction": {
+      "audio": {
+        "dtype": "null",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "answer": {
+      "audio": {
+        "dtype": "null",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "other_attributes": {
+      "Gender": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "Nationality": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "VGGFace1 ID": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "VoxCeleb1 ID": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "index": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "salmonn_7b": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "wavllm_fairseq": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "Qwen2-Audio-7B-Instruct": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "whisper_large_v3_with_llama_3_8b_instruct": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "mowe_audio": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "qwen_audio_chat": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    }
+  },
+  "homepage": "",
+  "license": ""
+}

examples/2AR/VoxCeleb-Accent-Test/sample_0.wav ADDED Viewed

Binary file (268 kB). View file

examples/2AR/VoxCeleb-Accent-Test/sample_1.wav ADDED Viewed

Binary file (152 kB). View file

examples/2AR/VoxCeleb-Accent-Test/sample_2.wav ADDED Viewed

Binary file (301 kB). View file

examples/2AR/VoxCeleb-Accent-Test/state.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "_data_files": [
+    {
+      "filename": "data-00000-of-00001.arrow"
+    }
+  ],
+  "_fingerprint": "fa91a59f90c22c3c",
+  "_format_columns": [
+    "context",
+    "instruction",
+    "answer",
+    "other_attributes",
+    "salmonn_7b",
+    "wavllm_fairseq",
+    "Qwen2-Audio-7B-Instruct",
+    "whisper_large_v3_with_llama_3_8b_instruct",
+    "mowe_audio",
+    "qwen_audio_chat"
+  ],
+  "_format_kwargs": {},
+  "_format_type": null,
+  "_output_all_columns": false,
+  "_split": null
+}

examples/2ASR/Common-Voice-15-En-Test/data-00000-of-00001.arrow ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f9a561ce9aed8ba4c02f86c90883029e1bb566e2b66986b17874f3bb5884d67d
+size 489552

examples/2ASR/Common-Voice-15-En-Test/dataset_info.json ADDED Viewed

	@@ -0,0 +1,188 @@

+{
+  "citation": "",
+  "description": "",
+  "features": {
+    "context": {
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "audio": {
+        "sampling_rate": 16000,
+        "_type": "Audio"
+      }
+    },
+    "instruction": {
+      "audio": {
+        "dtype": "null",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "answer": {
+      "audio": {
+        "dtype": "null",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "other_attributes": {
+      "accents": {
+        "dtype": "null",
+        "_type": "Value"
+      },
+      "age": {
+        "dtype": "null",
+        "_type": "Value"
+      },
+      "client_id": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "down_votes": {
+        "dtype": "int64",
+        "_type": "Value"
+      },
+      "gender": {
+        "dtype": "null",
+        "_type": "Value"
+      },
+      "language": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "locale": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "segment": {
+        "dtype": "null",
+        "_type": "Value"
+      },
+      "up_votes": {
+        "dtype": "int64",
+        "_type": "Value"
+      },
+      "variant": {
+        "dtype": "null",
+        "_type": "Value"
+      }
+    },
+    "salmonn_7b": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "wavllm_fairseq": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "Qwen2-Audio-7B-Instruct": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "whisper_large_v3_with_llama_3_8b_instruct": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "mowe_audio": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "qwen_audio_chat": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    }
+  },
+  "homepage": "",
+  "license": ""
+}

examples/2ASR/Common-Voice-15-En-Test/sample_0.wav ADDED Viewed

Binary file (158 kB). View file

examples/2ASR/Common-Voice-15-En-Test/sample_1.wav ADDED Viewed

Binary file (172 kB). View file

examples/2ASR/Common-Voice-15-En-Test/sample_2.wav ADDED Viewed

Binary file (148 kB). View file

examples/2ASR/Common-Voice-15-En-Test/state.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "_data_files": [
+    {
+      "filename": "data-00000-of-00001.arrow"
+    }
+  ],
+  "_fingerprint": "468db91ad949e4d4",
+  "_format_columns": [
+    "context",
+    "instruction",
+    "answer",
+    "other_attributes",
+    "salmonn_7b",
+    "wavllm_fairseq",
+    "Qwen2-Audio-7B-Instruct",
+    "whisper_large_v3_with_llama_3_8b_instruct",
+    "mowe_audio",
+    "qwen_audio_chat"
+  ],
+  "_format_kwargs": {},
+  "_format_type": null,
+  "_output_all_columns": false,
+  "_split": null
+}

examples/2ASR/Earnings21-Test/data-00000-of-00001.arrow ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:5fbca261ae8ac8ccc96993ea11f17836bdcfef1070835784f159b79990a5a298
+size 429108160

examples/2ASR/Earnings21-Test/dataset_info.json ADDED Viewed

	@@ -0,0 +1,152 @@

+{
+  "citation": "",
+  "description": "",
+  "features": {
+    "context": {
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "audio": {
+        "sampling_rate": 16000,
+        "_type": "Audio"
+      }
+    },
+    "instruction": {
+      "audio": {
+        "dtype": "null",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "answer": {
+      "audio": {
+        "dtype": "null",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "other_attributes": {
+      "id": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "salmonn_7b": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "wavllm_fairseq": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "Qwen2-Audio-7B-Instruct": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "whisper_large_v3_with_llama_3_8b_instruct": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "mowe_audio": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    },
+    "qwen_audio_chat": {
+      "answer": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "model_prediction": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "task_type": {
+        "dtype": "string",
+        "_type": "Value"
+      },
+      "text": {
+        "dtype": "string",
+        "_type": "Value"
+      }
+    }
+  },
+  "homepage": "",
+  "license": ""
+}

examples/2ASR/Earnings21-Test/sample_0.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d8fb994964e1b0df9f4675ceaa73d55da55a096f5b94d002d9f7b07c997fc83e
+size 97593644

examples/2ASR/Earnings21-Test/sample_1.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:bd6ba77731011a6dc02e5854a600a2036713be4c2d71abf63fd6a89b86083c4f
+size 178791280

examples/2ASR/Earnings21-Test/sample_2.wav ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:1a1d15425069b003730e79f0df467103f4ac7670f87a0539a97c82973a02943e
+size 150700076

examples/2ASR/Earnings21-Test/state.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "_data_files": [
+    {
+      "filename": "data-00000-of-00001.arrow"
+    }
+  ],
+  "_fingerprint": "8cc0ad99446f1aba",
+  "_format_columns": [
+    "context",
+    "instruction",
+    "answer",
+    "other_attributes",
+    "salmonn_7b",
+    "wavllm_fairseq",
+    "Qwen2-Audio-7B-Instruct",
+    "whisper_large_v3_with_llama_3_8b_instruct",
+    "mowe_audio",
+    "qwen_audio_chat"
+  ],
+  "_format_kwargs": {},
+  "_format_type": null,
+  "_output_all_columns": false,
+  "_split": null
+}

examples/2ASR/Earnings22-Test/data-00000-of-00001.arrow ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:497dd6d287df9a8be5194b7875ae88f275127986d3fc538601382e80244bbb7b
+size 332277848