S2S_Evaluation / data /emotion.json
KurtDu's picture
Upload 21 files
50ad069 verified
raw
history blame
22.8 kB
[
{
"id": "emotion_audio_0",
"input_path": "/input/emotion/audio_0.wav",
"text": "[emotion: happy]Kids are talking by the door",
"task": "Emotion recognition and expression",
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?",
"output_path_4o": "/output/ChatGPT-4o/emotion/audio_0/audio_0.wav",
"output_path_miniomni": "/output/Mini-Omni/emotion/00.wav",
"output_path_speechgpt": "/output/SpeechGPT/emotion/answer_0.wav",
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_0.wav",
"text_cn": "孩子们在门旁说话",
"language": "English",
"category": "Social Companionship",
"output_path_4o_cascade": "/output/cascade/emotion/audio_0.wav",
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/audio_0.wav",
"level": "L3"
},
{
"id": "emotion_audio_1",
"input_path": "/input/emotion/audio_1.wav",
"text": "[emotion: sad]Kids are talking by the door",
"task": "Emotion recognition and expression",
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?",
"output_path_4o": "/output/ChatGPT-4o/emotion/audio_1/audio_1.wav",
"output_path_miniomni": "/output/Mini-Omni/emotion/01.wav",
"output_path_speechgpt": "/output/SpeechGPT/emotion/answer_1.wav",
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_1.wav",
"text_cn": "孩子们在门旁说话",
"language": "English",
"category": "Social Companionship",
"output_path_4o_cascade": "/output/cascade/emotion/audio_1.wav",
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/audio_1.wav",
"level": "L3"
},
{
"id": "emotion_audio_2",
"input_path": "/input/emotion/audio_2.wav",
"text": "[emotion: angry]Kids are talking by the door",
"task": "Emotion recognition and expression",
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?",
"output_path_4o": "/output/ChatGPT-4o/emotion/audio_2/audio_2.wav",
"output_path_miniomni": "/output/Mini-Omni/emotion/02.wav",
"output_path_speechgpt": "/output/SpeechGPT/emotion/answer_2.wav",
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_2.wav",
"text_cn": "孩子们在门旁说话",
"language": "English",
"category": "Social Companionship",
"output_path_4o_cascade": "/output/cascade/emotion/audio_2.wav",
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/audio_2.wav",
"level": "L3"
},
{
"id": "emotion_audio_3",
"input_path": "/input/emotion/audio_3.wav",
"text": "[emotion: fealful]Kids are talking by the door",
"task": "Emotion recognition and expression",
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?",
"output_path_4o": "/output/ChatGPT-4o/emotion/audio_3/audio_3.wav",
"output_path_miniomni": "/output/Mini-Omni/emotion/03.wav",
"output_path_speechgpt": "/output/SpeechGPT/emotion/answer_3.wav",
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_3.wav",
"text_cn": "孩子们在门旁说话",
"language": "English",
"category": "Social Companionship",
"output_path_4o_cascade": "/output/cascade/emotion/audio_3.wav",
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/audio_3.wav",
"level": "L3"
},
{
"id": "emotion_audio_4",
"input_path": "/input/emotion/audio_4.wav",
"text": "[emotion: disgust]Kids are talking by the door",
"task": "Emotion recognition and expression",
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?",
"output_path_4o": "/output/ChatGPT-4o/emotion/audio_4/audio_4.wav",
"output_path_miniomni": "/output/Mini-Omni/emotion/04.wav",
"output_path_speechgpt": "/output/SpeechGPT/emotion/answer_4.wav",
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_4.wav",
"text_cn": "孩子们在门旁说话",
"language": "English",
"category": "Social Companionship",
"output_path_4o_cascade": "/output/cascade/emotion/audio_4.wav",
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/audio_4.wav",
"level": "L3"
},
{
"id": "emotion_audio_5",
"input_path": "/input/emotion/audio_5.wav",
"text": "[emotion: surprised]Kids are talking by the door",
"task": "Emotion recognition and expression",
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?",
"output_path_4o": "/output/ChatGPT-4o/emotion/audio_5/audio_5.wav",
"output_path_miniomni": "/output/Mini-Omni/emotion/05.wav",
"output_path_speechgpt": "/output/SpeechGPT/emotion/answer_5.wav",
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_5.wav",
"text_cn": "孩子们在门旁说话",
"language": "English",
"category": "Social Companionship",
"output_path_4o_cascade": "/output/cascade/emotion/audio_5.wav",
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/audio_5.wav",
"level": "L3"
},
{
"id": "emotion_audio_6",
"input_path": "/input/emotion/audio_6.wav",
"text": "[emotion: happy]Dogs are sitting by the door",
"task": "Emotion recognition and expression",
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?",
"output_path_4o": "/output/ChatGPT-4o/emotion/audio_6/audio_6.wav",
"output_path_miniomni": "/output/Mini-Omni/emotion/06.wav",
"output_path_speechgpt": "/output/SpeechGPT/emotion/answer_6.wav",
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_6.wav",
"text_cn": "狗坐在门旁",
"language": "English",
"category": "Social Companionship",
"output_path_4o_cascade": "/output/cascade/emotion/audio_6.wav",
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/audio_6.wav",
"level": "L3"
},
{
"id": "emotion_audio_7",
"input_path": "/input/emotion/audio_7.wav",
"text": "[emotion: sad]Dogs are sitting by the door",
"task": "Emotion recognition and expression",
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?",
"output_path_4o": "/output/ChatGPT-4o/emotion/audio_7/audio_7.wav",
"output_path_miniomni": "/output/Mini-Omni/emotion/07.wav",
"output_path_speechgpt": "/output/SpeechGPT/emotion/answer_7.wav",
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_7.wav",
"text_cn": "狗坐在门旁",
"language": "English",
"category": "Social Companionship",
"output_path_4o_cascade": "/output/cascade/emotion/audio_7.wav",
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/audio_7.wav",
"level": "L3"
},
{
"id": "emotion_audio_8",
"input_path": "/input/emotion/audio_8.wav",
"text": "[emotion: angry]Dogs are sitting by the door",
"task": "Emotion recognition and expression",
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?",
"output_path_4o": "/output/ChatGPT-4o/emotion/audio_8/audio_8.wav",
"output_path_miniomni": "/output/Mini-Omni/emotion/08.wav",
"output_path_speechgpt": "/output/SpeechGPT/emotion/answer_8.wav",
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_8.wav",
"text_cn": "狗坐在门旁",
"language": "English",
"category": "Social Companionship",
"output_path_4o_cascade": "/output/cascade/emotion/audio_8.wav",
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/audio_8.wav",
"level": "L3"
},
{
"id": "emotion_audio_9",
"input_path": "/input/emotion/audio_9.wav",
"text": "[emotion: fealful]Dogs are sitting by the door",
"task": "Emotion recognition and expression",
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?",
"output_path_4o": "/output/ChatGPT-4o/emotion/audio_9/audio_9.wav",
"output_path_miniomni": "/output/Mini-Omni/emotion/09.wav",
"output_path_speechgpt": "/output/SpeechGPT/emotion/answer_9.wav",
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_9.wav",
"text_cn": "狗坐在门旁",
"language": "English",
"category": "Social Companionship",
"output_path_4o_cascade": "/output/cascade/emotion/audio_9.wav",
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/audio_9.wav",
"level": "L3"
},
{
"id": "emotion_audio_10",
"input_path": "/input/emotion/audio_10.wav",
"text": "[emotion: disgust]Dogs are sitting by the door",
"task": "Emotion recognition and expression",
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?",
"output_path_4o": "/output/ChatGPT-4o/emotion/audio_10/audio_10.wav",
"output_path_miniomni": "/output/Mini-Omni/emotion/10.wav",
"output_path_speechgpt": "/output/SpeechGPT/emotion/answer_10.wav",
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_10.wav",
"text_cn": "狗坐在门旁",
"language": "English",
"category": "Social Companionship",
"output_path_4o_cascade": "/output/cascade/emotion/audio_10.wav",
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/audio_10.wav",
"level": "L3"
},
{
"id": "emotion_audio_11",
"input_path": "/input/emotion/audio_11.wav",
"text": "[emotion: surprised]Dogs are sitting by the door",
"task": "Emotion recognition and expression",
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?",
"output_path_4o": "/output/ChatGPT-4o/emotion/audio_11/audio_11.wav",
"output_path_miniomni": "/output/Mini-Omni/emotion/11.wav",
"output_path_speechgpt": "/output/SpeechGPT/emotion/answer_11.wav",
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_11.wav",
"text_cn": "狗坐在门旁",
"language": "English",
"category": "Social Companionship",
"output_path_4o_cascade": "/output/cascade/emotion/audio_11.wav",
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/audio_11.wav",
"level": "L3"
},
{
"id": "emotion_emotion2-1",
"input_path": "/input/emotion/emotion2-1.wav",
"text": "[emotion: happy]What should I do now?",
"task": "Emotion recognition and expression",
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?",
"output_path_4o": "/output/ChatGPT-4o/emotion/emotion2-1/emotion2-1.wav",
"output_path_miniomni": "/output/Mini-Omni/emotion/13.wav",
"output_path_speechgpt": "/output/SpeechGPT/emotion/emotion2-1.wav",
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_13.wav",
"text_cn": "我现在该怎么办?",
"language": "English",
"category": "Social Companionship",
"output_path_4o_cascade": "/output/cascade/emotion/emotion2-1.wav",
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/emotion2-1.wav",
"level": "L3"
},
{
"id": "emotion_emotion2-2",
"input_path": "/input/emotion/emotion2-2.wav",
"text": "[emotion: sad]What should I do now?",
"task": "Emotion recognition and expression",
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?",
"output_path_4o": "/output/ChatGPT-4o/emotion/emotion2-2/emotion2-2.wav",
"output_path_miniomni": "/output/Mini-Omni/emotion/14.wav",
"output_path_speechgpt": "/output/SpeechGPT/emotion/emotion2-2.wav",
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_14.wav",
"text_cn": "我现在该怎么办?",
"language": "English",
"category": "Social Companionship",
"output_path_4o_cascade": "/output/cascade/emotion/emotion2-2.wav",
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/emotion2-2.wav",
"level": "L3"
},
{
"id": "emotion_emotion2-3",
"input_path": "/input/emotion/emotion2-3.wav",
"text": "[emotion: angry]What should I do now?",
"task": "Emotion recognition and expression",
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?",
"output_path_4o": "/output/ChatGPT-4o/emotion/emotion2-3/emotion2-3.wav",
"output_path_miniomni": "/output/Mini-Omni/emotion/15.wav",
"output_path_speechgpt": "/output/SpeechGPT/emotion/emotion2-3.wav",
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_15.wav",
"text_cn": "我现在该怎么办?",
"language": "English",
"category": "Social Companionship",
"output_path_4o_cascade": "/output/cascade/emotion/emotion2-3.wav",
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/emotion2-3.wav",
"level": "L3"
},
{
"id": "emotion_emotion2-4",
"input_path": "/input/emotion/emotion2-4.wav",
"text": "[emotion: fealful]What should I do now?",
"task": "Emotion recognition and expression",
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?",
"output_path_4o": "/output/ChatGPT-4o/emotion/emotion2-4/emotion2-4.wav",
"output_path_miniomni": "/output/Mini-Omni/emotion/16.wav",
"output_path_speechgpt": "/output/SpeechGPT/emotion/emotion2-4.wav",
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_16.wav",
"text_cn": "我现在该怎么办?",
"language": "English",
"category": "Social Companionship",
"output_path_4o_cascade": "/output/cascade/emotion/emotion2-4.wav",
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/emotion2-4.wav",
"level": "L3"
},
{
"id": "emotion_emotion3-1",
"input_path": "/input/emotion/emotion3-1.wav",
"text": "[emotion: happy]I really wish things could be different.",
"task": "Emotion recognition and expression",
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?",
"output_path_4o": "/output/ChatGPT-4o/emotion/emotion3-1/emotion3-1.wav",
"output_path_miniomni": "/output/Mini-Omni/emotion/17.wav",
"output_path_speechgpt": "/output/SpeechGPT/emotion/emotion3-1.wav",
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_17.wav",
"text_cn": "我真希望事情能够有所不同。",
"language": "English",
"category": "Social Companionship",
"output_path_4o_cascade": "/output/cascade/emotion/emotion3-1.wav",
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/emotion3-1.wav",
"level": "L3"
},
{
"id": "emotion_emotion3-2",
"input_path": "/input/emotion/emotion3-2.wav",
"text": "[emotion: sad]I really wish things could be different.",
"task": "Emotion recognition and expression",
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?",
"output_path_4o": "/output/ChatGPT-4o/emotion/emotion3-2/emotion3-2.wav",
"output_path_miniomni": "/output/Mini-Omni/emotion/18.wav",
"output_path_speechgpt": "/output/SpeechGPT/emotion/emotion3-2.wav",
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_18.wav",
"text_cn": "我真希望事情能够有所不同。",
"language": "English",
"category": "Social Companionship",
"output_path_4o_cascade": "/output/cascade/emotion/emotion3-2.wav",
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/emotion3-2.wav",
"level": "L3"
},
{
"id": "emotion_emotion3-3",
"input_path": "/input/emotion/emotion3-3.wav",
"text": "[emotion: angry]I really wish things could be different.",
"task": "Emotion recognition and expression",
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?",
"output_path_4o": "/output/ChatGPT-4o/emotion/emotion3-3/emotion3-3.wav",
"output_path_miniomni": "/output/Mini-Omni/emotion/19.wav",
"output_path_speechgpt": "/output/SpeechGPT/emotion/emotion3-3.wav",
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_19.wav",
"text_cn": "我真希望事情能够有所不同。",
"language": "English",
"category": "Social Companionship",
"output_path_4o_cascade": "/output/cascade/emotion/emotion3-3.wav",
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/emotion3-3.wav",
"level": "L3"
},
{
"id": "emotion_emotion3-4",
"input_path": "/input/emotion/emotion3-4.wav",
"text": "[emotion: fealful]I really wish things could be different.",
"task": "Emotion recognition and expression",
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?",
"output_path_4o": "/output/ChatGPT-4o/emotion/emotion3-4/emotion3-4.wav",
"output_path_miniomni": "/output/Mini-Omni/emotion/20.wav",
"output_path_speechgpt": "/output/SpeechGPT/emotion/emotion3-4.wav",
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_20.wav",
"text_cn": "我真希望事情能够有所不同。",
"language": "English",
"category": "Social Companionship",
"output_path_4o_cascade": "/output/cascade/emotion/emotion3-4.wav",
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/emotion3-4.wav",
"level": "L3"
},
{
"id": "emotion_emotion4-1",
"input_path": "/input/emotion/emotion4-1.wav",
"text": "[emotion: happy]This reminds me of a lot of things.",
"task": "Emotion recognition and expression",
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?",
"output_path_4o": "/output/ChatGPT-4o/emotion/emotion4-1/emotion4-1.wav",
"output_path_miniomni": "/output/Mini-Omni/emotion/21.wav",
"output_path_speechgpt": "/output/SpeechGPT/emotion/emotion4-1.wav",
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_21.wav",
"text_cn": "这让我想起了很多事。",
"language": "English",
"category": "Social Companionship",
"output_path_4o_cascade": "/output/cascade/emotion/emotion4-1.wav",
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/emotion4-1.wav",
"level": "L3"
},
{
"id": "emotion_emotion4-2",
"input_path": "/input/emotion/emotion4-2.wav",
"text": "[emotion: sad]This reminds me of a lot of things.",
"task": "Emotion recognition and expression",
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?",
"output_path_4o": "/output/ChatGPT-4o/emotion/emotion4-2/emotion4-2.wav",
"output_path_miniomni": "/output/Mini-Omni/emotion/22.wav",
"output_path_speechgpt": "/output/SpeechGPT/emotion/emotion4-2.wav",
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_22.wav",
"text_cn": "这让我想起了很多事。",
"language": "English",
"category": "Social Companionship",
"output_path_4o_cascade": "/output/cascade/emotion/emotion4-2.wav",
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/emotion4-2.wav",
"level": "L3"
},
{
"id": "emotion_emotion4-3",
"input_path": "/input/emotion/emotion4-3.wav",
"text": "[emotion: angry]This reminds me of a lot of things.",
"task": "Emotion recognition and expression",
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?",
"output_path_4o": "/output/ChatGPT-4o/emotion/emotion4-3/emotion4-3.wav",
"output_path_miniomni": "/output/Mini-Omni/emotion/23.wav",
"output_path_speechgpt": "/output/SpeechGPT/emotion/emotion4-3.wav",
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_23.wav",
"text_cn": "这让我想起了很多事。",
"language": "English",
"category": "Social Companionship",
"output_path_4o_cascade": "/output/cascade/emotion/emotion4-3.wav",
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/emotion4-3.wav",
"level": "L3"
},
{
"id": "emotion_emotion4-4",
"input_path": "/input/emotion/emotion4-4.wav",
"text": "[emotion: fealful]This reminds me of a lot of things.",
"task": "Emotion recognition and expression",
"task_description": "Can the model recognize emotions and provide appropriate responses based on different emotions?",
"output_path_4o": "/output/ChatGPT-4o/emotion/emotion4-4/emotion4-4.wav",
"output_path_miniomni": "/output/Mini-Omni/emotion/24.wav",
"output_path_speechgpt": "/output/SpeechGPT/emotion/emotion4-4.wav",
"output_path_funaudio": "/output/FunAudioLLM/emotion/audio_24.wav",
"text_cn": "这让我想起了很多事。",
"language": "English",
"category": "Social Companionship",
"output_path_4o_cascade": "/output/cascade/emotion/emotion4-4.wav",
"output_path_4o_llama_omni": "/output/LLaMA_omni/emotion/emotion4-4.wav",
"level": "L3"
}
]