File size: 3,093 Bytes
33ac3a6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
{
    "asr": "<Speech><SpeechHere></Speech> Recognize the speech and give me the transcription.",
    "gender_recognition": "<Speech><SpeechHere></Speech> What is the gender of the speaker?",
    "dialect_identification": "<Speech><SpeechHere></Speech> What is the dialect of the speaker?",
    "asr_zh": "<Speech><SpeechHere></Speech> 请将语音中的内容写下来。",
    "summarization": "<Speech><SpeechHere></Speech> Could you capture the main points of this audio in a short summary?",
    "translation_ae": "<Speech><SpeechHere></Speech> Listen to the speech and translate it into English.",
    "asr_de": "<Speech><SpeechHere></Speech> Hören Sie sich die Rede an und schreiben Sie ihren Inhalt auf.",
    "translation_ec": "<Speech><SpeechHere></Speech> Listen to the speech and translate it into Chinese.",
    "audiocaption": "<Speech><SpeechHere></Speech> Please describe the audio.",
    "audiocaption_v2": "<Speech><SpeechHere></Speech> Please write down what your hear in the audio.",
    "QA": "<Speech><SpeechHere></Speech> {}",
    "gender_QA": "<Speech><SpeechHere></Speech> {}",
    "phone_recognition": "<Speech><SpeechHere></Speech> Provide the phonetic transcription for the speech.",
    "speech_query": "<Speech><SpeechHere></Speech> Please answer the question in detail.",
    "emotion_recognition": "<Speech><SpeechHere></Speech> Describe the emotion of the speaker in one word.", 
    "lyrics_recognition": "<Speech><SpeechHere></Speech> Listen to the song and write down its content.",
    "audio_speech_description": "<Speech><SpeechHere></Speech> Describe the speech and the background audio",
    "speaker_verification": "<Speech><SpeechHere></Speech> Do you only hear the same person talking? Answer yes or no.",
    "fluent_speech_audio": "<Speech><SpeechHere></Speech> Describe the background audio and the speech in a fluent sentence.",
    "speech_separation": "<Speech><SpeechHere></Speech> Please write down what you hear each person says.",
    "audio_story_telling": "<Speech><SpeechHere></Speech> Based on the audio, write a story in detail. Your story should be highly related to the audio.",
    "speech_audio_query": "<Speech><SpeechHere></Speech> Please answer the speaker's question in detail based on the background sound.",
    "slot_filling": "<Speech><SpeechHere></Speech> According to the speech, what is the {}?",
    "music_description": "<Speech><SpeechHere></Speech> Listen to this music clip and describe the music.",
    "translation_en2ja": "<Speech><SpeechHere></Speech> Listen to the speech and translate it into Japanese.",
    "translation_en2de": "<Speech><SpeechHere></Speech> Listen to the speech and translate it into German.",
    "speech_audio_coreasoning": "<Speech><SpeechHere></Speech> Use your strong reasoning skills to answer the speaker's question in detail based on the background sound.",
    "keywords": "<Speech><SpeechHere></Speech> Give me only three keywords of the text.",
    "speaker_diarization_asr": "<Speech><SpeechHere></Speech> Please recognize each speaker and transcribe their speech content."
}