nvidia
/

canary-1b

@@ -331,8 +331,8 @@ Another recommended option is to use a json manifest as input, where each line i
 # Example of a line in input_manifest.json
 {
     "audio_filepath": "/path/to/audio.wav",  # path to the audio file
-    "duration": 1000,  # duration of the audio
-    "taskname": "asr",  # use "ast" for speech-to-text translation
     "source_lang": "en",  # language of the audio input, set `source_lang`==`target_lang` for ASR, choices=['en','de','es','fr']
     "target_lang": "en",  # language of the text output, choices=['en','de','es','fr']
     "pnc": "yes",  # whether to have PnC output, choices=['yes', 'no']
@@ -364,7 +364,7 @@ An example manifest for transcribing English audios can be:
 # Example of a line in input_manifest.json
 {
     "audio_filepath": "/path/to/audio.wav",  # path to the audio file
-    "duration": 1000,  # duration of the audio
     "taskname": "asr",
     "source_lang": "en", # language of the audio input, set `source_lang`==`target_lang` for ASR, choices=['en','de','es','fr']
     "target_lang": "en", # language of the text output, choices=['en','de','es','fr']
@@ -382,8 +382,8 @@ An example manifest for transcribing English audios into German text can be:
 # Example of a line in input_manifest.json
 {
     "audio_filepath": "/path/to/audio.wav",  # path to the audio file
-    "duration": 1000,  # duration of the audio
-    "taskname": "ast",
     "source_lang": "en", # language of the audio input, choices=['en','de','es','fr']
     "target_lang": "de", # language of the text output, choices=['en','de','es','fr']
     "pnc": "yes",  # whether to have PnC output, choices=['yes', 'no']

 # Example of a line in input_manifest.json
 {
     "audio_filepath": "/path/to/audio.wav",  # path to the audio file
+    "duration": 1000,  # duration of the audio, can be set to `None` if using NeMo main branch
+    "taskname": "asr",  # use "s2t_translation" for speech-to-text translation with r1.23, or "ast" if using the NeMo main branch
     "source_lang": "en",  # language of the audio input, set `source_lang`==`target_lang` for ASR, choices=['en','de','es','fr']
     "target_lang": "en",  # language of the text output, choices=['en','de','es','fr']
     "pnc": "yes",  # whether to have PnC output, choices=['yes', 'no']
 # Example of a line in input_manifest.json
 {
     "audio_filepath": "/path/to/audio.wav",  # path to the audio file
+    "duration": 1000,  # duration of the audio, can be set to `None` if using NeMo main branch
     "taskname": "asr",
     "source_lang": "en", # language of the audio input, set `source_lang`==`target_lang` for ASR, choices=['en','de','es','fr']
     "target_lang": "en", # language of the text output, choices=['en','de','es','fr']
 # Example of a line in input_manifest.json
 {
     "audio_filepath": "/path/to/audio.wav",  # path to the audio file
+    "duration": 1000,  # duration of the audio, can be set to `None` if using NeMo main branch
+    "taskname": "s2t_translation", # r1.23 only recognizes "s2t_translation", but "ast" is supported if using the NeMo main branch
     "source_lang": "en", # language of the audio input, choices=['en','de','es','fr']
     "target_lang": "de", # language of the text output, choices=['en','de','es','fr']
     "pnc": "yes",  # whether to have PnC output, choices=['yes', 'no']