add language ID
Browse files- app.py +19 -4
- requirements.txt +3 -1
app.py
CHANGED
@@ -8,8 +8,14 @@ import requests
|
|
8 |
from os.path import exists
|
9 |
from stt import Model
|
10 |
|
|
|
|
|
11 |
|
12 |
-
#
|
|
|
|
|
|
|
|
|
13 |
storage_url = "https://coqui.gateway.scarf.sh/mixtec/jemeyer/v1.0.0"
|
14 |
model_name = "model.tflite"
|
15 |
model_link = f"{storage_url}/{model_name}"
|
@@ -18,6 +24,8 @@ model_link = f"{storage_url}/{model_name}"
|
|
18 |
def client(audio_data: np.array, sample_rate: int, use_scorer=False):
|
19 |
output_audio = _convert_audio(audio_data, sample_rate)
|
20 |
|
|
|
|
|
21 |
fin = wave.open(output_audio, 'rb')
|
22 |
audio = np.frombuffer(fin.readframes(fin.getnframes()), np.int16)
|
23 |
|
@@ -29,7 +37,7 @@ def client(audio_data: np.array, sample_rate: int, use_scorer=False):
|
|
29 |
|
30 |
result = ds.stt(audio)
|
31 |
|
32 |
-
return result
|
33 |
|
34 |
|
35 |
def download(url, file_name):
|
@@ -77,8 +85,15 @@ iface = gr.Interface(
|
|
77 |
outputs=gr.outputs.Textbox(label="Output"),
|
78 |
title="Coqui STT Yoloxochitl Mixtec",
|
79 |
theme="huggingface",
|
80 |
-
description="Prueba de dictado a texto para el mixteco de Yoloxochitl,
|
81 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
)
|
83 |
|
84 |
download(model_link, model_name)
|
|
|
8 |
from os.path import exists
|
9 |
from stt import Model
|
10 |
|
11 |
+
import torchaudio
|
12 |
+
from speechbrain.pretrained import EncoderClassifier
|
13 |
|
14 |
+
# initialize language ID model
|
15 |
+
lang_classifier = EncoderClassifier.from_hparams(source="speechbrain/lang-id-commonlanguage_ecapa", savedir="pretrained_models/lang-id-commonlanguage_ecapa")
|
16 |
+
|
17 |
+
|
18 |
+
# download STT model
|
19 |
storage_url = "https://coqui.gateway.scarf.sh/mixtec/jemeyer/v1.0.0"
|
20 |
model_name = "model.tflite"
|
21 |
model_link = f"{storage_url}/{model_name}"
|
|
|
24 |
def client(audio_data: np.array, sample_rate: int, use_scorer=False):
|
25 |
output_audio = _convert_audio(audio_data, sample_rate)
|
26 |
|
27 |
+
out_prob, score, index, text_lab = lang_classifier.classify_file(output_audio)
|
28 |
+
|
29 |
fin = wave.open(output_audio, 'rb')
|
30 |
audio = np.frombuffer(fin.readframes(fin.getnframes()), np.int16)
|
31 |
|
|
|
37 |
|
38 |
result = ds.stt(audio)
|
39 |
|
40 |
+
return f"{text_lab}: {result}"
|
41 |
|
42 |
|
43 |
def download(url, file_name):
|
|
|
85 |
outputs=gr.outputs.Textbox(label="Output"),
|
86 |
title="Coqui STT Yoloxochitl Mixtec",
|
87 |
theme="huggingface",
|
88 |
+
description="Prueba de dictado a texto para el mixteco de Yoloxochitl,"
|
89 |
+
" usando [el modelo entrenado por Josh Meyer](https://coqui.ai/mixtec/jemeyer/v1.0.0/)"
|
90 |
+
" con [los datos recopilados por Rey Castillo y sus colaboradores](https://www.openslr.org/89)."
|
91 |
+
" Esta prueba es basada en la de [Ukraniano](https://huggingface.co/spaces/robinhad/ukrainian-stt)."
|
92 |
+
" \n\n"
|
93 |
+
"Speech-to-text demo for Yoloxochitl Mixtec,"
|
94 |
+
" using [the model trained by Josh Meyer](https://coqui.ai/mixtec/jemeyer/v1.0.0/)"
|
95 |
+
" on [the corpus compiled by Rey Castillo and collaborators](https://www.openslr.org/89)."
|
96 |
+
" This demo is based on the [Ukrainian STT demo](https://huggingface.co/spaces/robinhad/ukrainian-stt).",
|
97 |
)
|
98 |
|
99 |
download(model_link, model_name)
|
requirements.txt
CHANGED
@@ -1,3 +1,5 @@
|
|
1 |
gradio==2.4.5
|
2 |
STT==1.0.0
|
3 |
-
pydub==0.25.1
|
|
|
|
|
|
1 |
gradio==2.4.5
|
2 |
STT==1.0.0
|
3 |
+
pydub==0.25.1
|
4 |
+
speechbrain==0.5.10
|
5 |
+
torchaudio
|