Spaces:

clr
/

w2v2asr

Sleeping

App Files Files Community

clr commited on May 22, 2024

Commit

544f10c

verified ·

1 Parent(s): 20ae5de

Update app.py

Browse files

Files changed (1) hide show

app.py +8 -8

app.py CHANGED Viewed

@@ -5,7 +5,7 @@ import numpy as np
 import torch, torchaudio
 from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, pipeline
-MODEL_IS="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-icelandic-ep10-1000h"
 MODEL_FO="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h"
 torch.random.manual_seed(0)
@@ -57,17 +57,17 @@ with bl:
     gr.Markdown(
         """
-    # W2V2 speech recognition
-    ## 10.01.2023 : No longer maintained, the normal hosted inference API is working now.
     ## * * * * * * * *
     Upload a file for recognition with
-    https://huggingface.co/carlosdanielhernandezmena/wav2vec2-large-xlsr-53-icelandic-ep10-1000h
     or https://huggingface.co/carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h
-    - For some reason, the huggingface 'Hosted inference API' on the model page does not currently work, but this does.
-    - There is no language model (yet), so it can generate non-words.
     - Send errors/bugs to caitlinr@ru.is
     """
     )
@@ -75,7 +75,7 @@ with bl:
     with gr.Tabs():
         with gr.TabItem("Icelandic"):
             with gr.Row():
-                audio_file = gr.Audio(type="filepath")
                 with gr.Column():
                     whole_output = gr.Textbox(label="whole-file recognition")
                     chunk_output = gr.Textbox(label="recognition with chunking")
@@ -83,7 +83,7 @@ with bl:
             text_button.click(recis, inputs=audio_file, outputs=[whole_output,chunk_output])
         with gr.TabItem("Faroese"):
             with gr.Row():
-                audio_file = gr.Audio(type="filepath")
                 with gr.Column():
                     whole_output = gr.Textbox(label="whole-file recognition")
                     chunk_output = gr.Textbox(label="recognition with chunking")

 import torch, torchaudio
 from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, pipeline
+MODEL_IS="language-and-voice-lab/wav2vec2-large-xlsr-53-icelandic-ep30-967h"
 MODEL_FO="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h"
 torch.random.manual_seed(0)
     gr.Markdown(
         """
+    # Speech recognition
+    ### Users logged in to a Huggingface account can use each model's normal hosted inference API instead.
     ## * * * * * * * *
     Upload a file for recognition with
+    https://huggingface.co/language-and-voice-lab/wav2vec2-large-xlsr-53-icelandic-ep30-967h
     or https://huggingface.co/carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h
+    - Wav2Vec2 models have no language model (yet), so it can generate non-words.
+    - Whisper can hallucinate.
     - Send errors/bugs to caitlinr@ru.is
     """
     )
     with gr.Tabs():
         with gr.TabItem("Icelandic"):
             with gr.Row():
+                audio_file = gr.Audio(source=["upload", "microphone"],type="filepath")
                 with gr.Column():
                     whole_output = gr.Textbox(label="whole-file recognition")
                     chunk_output = gr.Textbox(label="recognition with chunking")
             text_button.click(recis, inputs=audio_file, outputs=[whole_output,chunk_output])
         with gr.TabItem("Faroese"):
             with gr.Row():
+                audio_file = gr.Audio(source=["upload", "microphone"],type="filepath")
                 with gr.Column():
                     whole_output = gr.Textbox(label="whole-file recognition")
                     chunk_output = gr.Textbox(label="recognition with chunking")