Automatic Speech Recognition
NeMo
PyTorch
4 languages
automatic-speech-translation
speech
audio
Transformer
FastConformer
Conformer
NeMo
hf-asr-leaderboard
Eval Results
Files changed (1) hide show
  1. README.md +5 -5
README.md CHANGED
@@ -331,8 +331,8 @@ Another recommended option is to use a json manifest as input, where each line i
331
  # Example of a line in input_manifest.json
332
  {
333
  "audio_filepath": "/path/to/audio.wav", # path to the audio file
334
- "duration": 1000, # duration of the audio
335
- "taskname": "asr", # use "ast" for speech-to-text translation
336
  "source_lang": "en", # language of the audio input, set `source_lang`==`target_lang` for ASR, choices=['en','de','es','fr']
337
  "target_lang": "en", # language of the text output, choices=['en','de','es','fr']
338
  "pnc": "yes", # whether to have PnC output, choices=['yes', 'no']
@@ -364,7 +364,7 @@ An example manifest for transcribing English audios can be:
364
  # Example of a line in input_manifest.json
365
  {
366
  "audio_filepath": "/path/to/audio.wav", # path to the audio file
367
- "duration": 1000, # duration of the audio
368
  "taskname": "asr",
369
  "source_lang": "en", # language of the audio input, set `source_lang`==`target_lang` for ASR, choices=['en','de','es','fr']
370
  "target_lang": "en", # language of the text output, choices=['en','de','es','fr']
@@ -382,8 +382,8 @@ An example manifest for transcribing English audios into German text can be:
382
  # Example of a line in input_manifest.json
383
  {
384
  "audio_filepath": "/path/to/audio.wav", # path to the audio file
385
- "duration": 1000, # duration of the audio
386
- "taskname": "ast",
387
  "source_lang": "en", # language of the audio input, choices=['en','de','es','fr']
388
  "target_lang": "de", # language of the text output, choices=['en','de','es','fr']
389
  "pnc": "yes", # whether to have PnC output, choices=['yes', 'no']
 
331
  # Example of a line in input_manifest.json
332
  {
333
  "audio_filepath": "/path/to/audio.wav", # path to the audio file
334
+ "duration": 1000, # duration of the audio, can be set to `None` if using NeMo main branch
335
+ "taskname": "asr", # use "s2t_translation" for speech-to-text translation with r1.23, or "ast" if using the NeMo main branch
336
  "source_lang": "en", # language of the audio input, set `source_lang`==`target_lang` for ASR, choices=['en','de','es','fr']
337
  "target_lang": "en", # language of the text output, choices=['en','de','es','fr']
338
  "pnc": "yes", # whether to have PnC output, choices=['yes', 'no']
 
364
  # Example of a line in input_manifest.json
365
  {
366
  "audio_filepath": "/path/to/audio.wav", # path to the audio file
367
+ "duration": 1000, # duration of the audio, can be set to `None` if using NeMo main branch
368
  "taskname": "asr",
369
  "source_lang": "en", # language of the audio input, set `source_lang`==`target_lang` for ASR, choices=['en','de','es','fr']
370
  "target_lang": "en", # language of the text output, choices=['en','de','es','fr']
 
382
  # Example of a line in input_manifest.json
383
  {
384
  "audio_filepath": "/path/to/audio.wav", # path to the audio file
385
+ "duration": 1000, # duration of the audio, can be set to `None` if using NeMo main branch
386
+ "taskname": "s2t_translation", # r1.23 only recognizes "s2t_translation", but "ast" is supported if using the NeMo main branch
387
  "source_lang": "en", # language of the audio input, choices=['en','de','es','fr']
388
  "target_lang": "de", # language of the text output, choices=['en','de','es','fr']
389
  "pnc": "yes", # whether to have PnC output, choices=['yes', 'no']