clr commited on
Commit
544f10c
·
verified ·
1 Parent(s): 20ae5de

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -8
app.py CHANGED
@@ -5,7 +5,7 @@ import numpy as np
5
  import torch, torchaudio
6
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, pipeline
7
 
8
- MODEL_IS="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-icelandic-ep10-1000h"
9
  MODEL_FO="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h"
10
 
11
  torch.random.manual_seed(0)
@@ -57,17 +57,17 @@ with bl:
57
 
58
  gr.Markdown(
59
  """
60
- # W2V2 speech recognition
61
 
62
- ## 10.01.2023 : No longer maintained, the normal hosted inference API is working now.
63
  ## * * * * * * * *
64
 
65
  Upload a file for recognition with
66
- https://huggingface.co/carlosdanielhernandezmena/wav2vec2-large-xlsr-53-icelandic-ep10-1000h
67
  or https://huggingface.co/carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h
68
 
69
- - For some reason, the huggingface 'Hosted inference API' on the model page does not currently work, but this does.
70
- - There is no language model (yet), so it can generate non-words.
71
  - Send errors/bugs to caitlinr@ru.is
72
  """
73
  )
@@ -75,7 +75,7 @@ with bl:
75
  with gr.Tabs():
76
  with gr.TabItem("Icelandic"):
77
  with gr.Row():
78
- audio_file = gr.Audio(type="filepath")
79
  with gr.Column():
80
  whole_output = gr.Textbox(label="whole-file recognition")
81
  chunk_output = gr.Textbox(label="recognition with chunking")
@@ -83,7 +83,7 @@ with bl:
83
  text_button.click(recis, inputs=audio_file, outputs=[whole_output,chunk_output])
84
  with gr.TabItem("Faroese"):
85
  with gr.Row():
86
- audio_file = gr.Audio(type="filepath")
87
  with gr.Column():
88
  whole_output = gr.Textbox(label="whole-file recognition")
89
  chunk_output = gr.Textbox(label="recognition with chunking")
 
5
  import torch, torchaudio
6
  from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, pipeline
7
 
8
+ MODEL_IS="language-and-voice-lab/wav2vec2-large-xlsr-53-icelandic-ep30-967h"
9
  MODEL_FO="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h"
10
 
11
  torch.random.manual_seed(0)
 
57
 
58
  gr.Markdown(
59
  """
60
+ # Speech recognition
61
 
62
+ ### Users logged in to a Huggingface account can use each model's normal hosted inference API instead.
63
  ## * * * * * * * *
64
 
65
  Upload a file for recognition with
66
+ https://huggingface.co/language-and-voice-lab/wav2vec2-large-xlsr-53-icelandic-ep30-967h
67
  or https://huggingface.co/carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h
68
 
69
+ - Wav2Vec2 models have no language model (yet), so it can generate non-words.
70
+ - Whisper can hallucinate.
71
  - Send errors/bugs to caitlinr@ru.is
72
  """
73
  )
 
75
  with gr.Tabs():
76
  with gr.TabItem("Icelandic"):
77
  with gr.Row():
78
+ audio_file = gr.Audio(source=["upload", "microphone"],type="filepath")
79
  with gr.Column():
80
  whole_output = gr.Textbox(label="whole-file recognition")
81
  chunk_output = gr.Textbox(label="recognition with chunking")
 
83
  text_button.click(recis, inputs=audio_file, outputs=[whole_output,chunk_output])
84
  with gr.TabItem("Faroese"):
85
  with gr.Row():
86
+ audio_file = gr.Audio(source=["upload", "microphone"],type="filepath")
87
  with gr.Column():
88
  whole_output = gr.Textbox(label="whole-file recognition")
89
  chunk_output = gr.Textbox(label="recognition with chunking")