README.md · nvidia/canary-1b at 0b65545bda1c069de870071e88967ac52203e2b4

metadata
license: cc-by-nc-4.0
language:
  - en
  - de
  - es
  - fr
library_name: nemo
datasets:
  - librispeech_asr
  - fisher_corpus
  - Switchboard-1
  - WSJ-0
  - WSJ-1
  - National-Singapore-Corpus-Part-1
  - National-Singapore-Corpus-Part-6
  - vctk
  - voxpopuli
  - europarl
  - multilingual_librispeech
  - mozilla-foundation/common_voice_8_0
  - MLCommons/peoples_speech
thumbnail: null
tags:
  - automatic-speech-recognition
  - speech
  - audio
  - Transducer
  - FastConformer
  - Conformer
  - pytorch
  - NeMo
  - hf-asr-leaderboard
widget:
  - example_title: Librispeech sample 1
    src: https://cdn-media.huggingface.co/speech_samples/sample1.flac
  - example_title: Librispeech sample 2
    src: https://cdn-media.huggingface.co/speech_samples/sample2.flac
model-index:
  - name: parakeet_rnnt_1.1b
    results:
      - task:
          name: Automatic Speech Recognition
          type: automatic-speech-recognition
        dataset:
          name: AMI (Meetings test)
          type: edinburghcstr/ami
          config: ihm
          split: test
          args:
            language: en
        metrics:
          - name: Test WER
            type: wer
            value: 17.1
      - task:
          name: Automatic Speech Recognition
          type: automatic-speech-recognition
        dataset:
          name: Earnings-22
          type: revdotcom/earnings22
          split: test
          args:
            language: en
        metrics:
          - name: Test WER
            type: wer
            value: 14.11
      - task:
          name: Automatic Speech Recognition
          type: automatic-speech-recognition
        dataset:
          name: GigaSpeech
          type: speechcolab/gigaspeech
          split: test
          args:
            language: en
        metrics:
          - name: Test WER
            type: wer
            value: 9.96
      - task:
          name: Automatic Speech Recognition
          type: automatic-speech-recognition
        dataset:
          name: LibriSpeech (clean)
          type: librispeech_asr
          config: other
          split: test
          args:
            language: en
        metrics:
          - name: Test WER
            type: wer
            value: 1.46
      - task:
          name: Automatic Speech Recognition
          type: automatic-speech-recognition
        dataset:
          name: LibriSpeech (other)
          type: librispeech_asr
          config: other
          split: test
          args:
            language: en
        metrics:
          - name: Test WER
            type: wer
            value: 2.47
      - task:
          type: Automatic Speech Recognition
          name: automatic-speech-recognition
        dataset:
          name: SPGI Speech
          type: kensho/spgispeech
          config: test
          split: test
          args:
            language: en
        metrics:
          - name: Test WER
            type: wer
            value: 3.11
      - task:
          type: Automatic Speech Recognition
          name: automatic-speech-recognition
        dataset:
          name: tedlium-v3
          type: LIUM/tedlium
          config: release1
          split: test
          args:
            language: en
        metrics:
          - name: Test WER
            type: wer
            value: 3.92
      - task:
          name: Automatic Speech Recognition
          type: automatic-speech-recognition
        dataset:
          name: Vox Populi
          type: facebook/voxpopuli
          config: en
          split: test
          args:
            language: en
        metrics:
          - name: Test WER
            type: wer
            value: 5.39
      - task:
          type: Automatic Speech Recognition
          name: automatic-speech-recognition
        dataset:
          name: Mozilla Common Voice 9.0
          type: mozilla-foundation/common_voice_9_0
          config: en
          split: test
          args:
            language: en
        metrics:
          - name: Test WER
            type: wer
            value: 5.79
metrics:
  - wer
pipeline_tag: automatic-speech-recognition