Update app.py
Browse files
app.py
CHANGED
@@ -5,7 +5,7 @@ import numpy as np
|
|
5 |
import torch, torchaudio
|
6 |
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, pipeline
|
7 |
|
8 |
-
MODEL_IS="
|
9 |
MODEL_FO="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h"
|
10 |
|
11 |
torch.random.manual_seed(0)
|
@@ -57,17 +57,17 @@ with bl:
|
|
57 |
|
58 |
gr.Markdown(
|
59 |
"""
|
60 |
-
#
|
61 |
|
62 |
-
|
63 |
## * * * * * * * *
|
64 |
|
65 |
Upload a file for recognition with
|
66 |
-
https://huggingface.co/
|
67 |
or https://huggingface.co/carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h
|
68 |
|
69 |
-
-
|
70 |
-
-
|
71 |
- Send errors/bugs to caitlinr@ru.is
|
72 |
"""
|
73 |
)
|
@@ -75,7 +75,7 @@ with bl:
|
|
75 |
with gr.Tabs():
|
76 |
with gr.TabItem("Icelandic"):
|
77 |
with gr.Row():
|
78 |
-
audio_file = gr.Audio(type="filepath")
|
79 |
with gr.Column():
|
80 |
whole_output = gr.Textbox(label="whole-file recognition")
|
81 |
chunk_output = gr.Textbox(label="recognition with chunking")
|
@@ -83,7 +83,7 @@ with bl:
|
|
83 |
text_button.click(recis, inputs=audio_file, outputs=[whole_output,chunk_output])
|
84 |
with gr.TabItem("Faroese"):
|
85 |
with gr.Row():
|
86 |
-
audio_file = gr.Audio(type="filepath")
|
87 |
with gr.Column():
|
88 |
whole_output = gr.Textbox(label="whole-file recognition")
|
89 |
chunk_output = gr.Textbox(label="recognition with chunking")
|
|
|
5 |
import torch, torchaudio
|
6 |
from transformers import Wav2Vec2ForCTC, Wav2Vec2Processor, pipeline
|
7 |
|
8 |
+
MODEL_IS="language-and-voice-lab/wav2vec2-large-xlsr-53-icelandic-ep30-967h"
|
9 |
MODEL_FO="carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h"
|
10 |
|
11 |
torch.random.manual_seed(0)
|
|
|
57 |
|
58 |
gr.Markdown(
|
59 |
"""
|
60 |
+
# Speech recognition
|
61 |
|
62 |
+
### Users logged in to a Huggingface account can use each model's normal hosted inference API instead.
|
63 |
## * * * * * * * *
|
64 |
|
65 |
Upload a file for recognition with
|
66 |
+
https://huggingface.co/language-and-voice-lab/wav2vec2-large-xlsr-53-icelandic-ep30-967h
|
67 |
or https://huggingface.co/carlosdanielhernandezmena/wav2vec2-large-xlsr-53-faroese-100h
|
68 |
|
69 |
+
- Wav2Vec2 models have no language model (yet), so it can generate non-words.
|
70 |
+
- Whisper can hallucinate.
|
71 |
- Send errors/bugs to caitlinr@ru.is
|
72 |
"""
|
73 |
)
|
|
|
75 |
with gr.Tabs():
|
76 |
with gr.TabItem("Icelandic"):
|
77 |
with gr.Row():
|
78 |
+
audio_file = gr.Audio(source=["upload", "microphone"],type="filepath")
|
79 |
with gr.Column():
|
80 |
whole_output = gr.Textbox(label="whole-file recognition")
|
81 |
chunk_output = gr.Textbox(label="recognition with chunking")
|
|
|
83 |
text_button.click(recis, inputs=audio_file, outputs=[whole_output,chunk_output])
|
84 |
with gr.TabItem("Faroese"):
|
85 |
with gr.Row():
|
86 |
+
audio_file = gr.Audio(source=["upload", "microphone"],type="filepath")
|
87 |
with gr.Column():
|
88 |
whole_output = gr.Textbox(label="whole-file recognition")
|
89 |
chunk_output = gr.Textbox(label="recognition with chunking")
|