lucio commited on
Commit
f09c038
1 Parent(s): e292f86

add language ID

Browse files
Files changed (2) hide show
  1. app.py +19 -4
  2. requirements.txt +3 -1
app.py CHANGED
@@ -8,8 +8,14 @@ import requests
8
  from os.path import exists
9
  from stt import Model
10
 
 
 
11
 
12
- # download model
 
 
 
 
13
  storage_url = "https://coqui.gateway.scarf.sh/mixtec/jemeyer/v1.0.0"
14
  model_name = "model.tflite"
15
  model_link = f"{storage_url}/{model_name}"
@@ -18,6 +24,8 @@ model_link = f"{storage_url}/{model_name}"
18
  def client(audio_data: np.array, sample_rate: int, use_scorer=False):
19
  output_audio = _convert_audio(audio_data, sample_rate)
20
 
 
 
21
  fin = wave.open(output_audio, 'rb')
22
  audio = np.frombuffer(fin.readframes(fin.getnframes()), np.int16)
23
 
@@ -29,7 +37,7 @@ def client(audio_data: np.array, sample_rate: int, use_scorer=False):
29
 
30
  result = ds.stt(audio)
31
 
32
- return result
33
 
34
 
35
  def download(url, file_name):
@@ -77,8 +85,15 @@ iface = gr.Interface(
77
  outputs=gr.outputs.Textbox(label="Output"),
78
  title="Coqui STT Yoloxochitl Mixtec",
79
  theme="huggingface",
80
- description="Prueba de dictado a texto para el mixteco de Yoloxochitl, usando [el modelo entrenado por Josh Meyer](https://coqui.ai/mixtec/jemeyer/v1.0.0/) con [los datos recopilados por Rey Castillo y sus colaboradoes](https://www.openslr.org/89). Esta prueba es basada en la de [Ukraniano](https://huggingface.co/spaces/robinhad/ukrainian-stt). \n\n"
81
- "Speech-to-text demo for Yoloxochitl Mixtec, using [the model trained by Josh Meyer](https://coqui.ai/mixtec/jemeyer/v1.0.0/) on [the corpus compiled by Rey Castillo and collaborators](https://www.openslr.org/89). This demo is based on the [Ukrainian STT demo](https://huggingface.co/spaces/robinhad/ukrainian-stt).",
 
 
 
 
 
 
 
82
  )
83
 
84
  download(model_link, model_name)
 
8
  from os.path import exists
9
  from stt import Model
10
 
11
+ import torchaudio
12
+ from speechbrain.pretrained import EncoderClassifier
13
 
14
+ # initialize language ID model
15
+ lang_classifier = EncoderClassifier.from_hparams(source="speechbrain/lang-id-commonlanguage_ecapa", savedir="pretrained_models/lang-id-commonlanguage_ecapa")
16
+
17
+
18
+ # download STT model
19
  storage_url = "https://coqui.gateway.scarf.sh/mixtec/jemeyer/v1.0.0"
20
  model_name = "model.tflite"
21
  model_link = f"{storage_url}/{model_name}"
 
24
  def client(audio_data: np.array, sample_rate: int, use_scorer=False):
25
  output_audio = _convert_audio(audio_data, sample_rate)
26
 
27
+ out_prob, score, index, text_lab = lang_classifier.classify_file(output_audio)
28
+
29
  fin = wave.open(output_audio, 'rb')
30
  audio = np.frombuffer(fin.readframes(fin.getnframes()), np.int16)
31
 
 
37
 
38
  result = ds.stt(audio)
39
 
40
+ return f"{text_lab}: {result}"
41
 
42
 
43
  def download(url, file_name):
 
85
  outputs=gr.outputs.Textbox(label="Output"),
86
  title="Coqui STT Yoloxochitl Mixtec",
87
  theme="huggingface",
88
+ description="Prueba de dictado a texto para el mixteco de Yoloxochitl,"
89
+ " usando [el modelo entrenado por Josh Meyer](https://coqui.ai/mixtec/jemeyer/v1.0.0/)"
90
+ " con [los datos recopilados por Rey Castillo y sus colaboradores](https://www.openslr.org/89)."
91
+ " Esta prueba es basada en la de [Ukraniano](https://huggingface.co/spaces/robinhad/ukrainian-stt)."
92
+ " \n\n"
93
+ "Speech-to-text demo for Yoloxochitl Mixtec,"
94
+ " using [the model trained by Josh Meyer](https://coqui.ai/mixtec/jemeyer/v1.0.0/)"
95
+ " on [the corpus compiled by Rey Castillo and collaborators](https://www.openslr.org/89)."
96
+ " This demo is based on the [Ukrainian STT demo](https://huggingface.co/spaces/robinhad/ukrainian-stt).",
97
  )
98
 
99
  download(model_link, model_name)
requirements.txt CHANGED
@@ -1,3 +1,5 @@
1
  gradio==2.4.5
2
  STT==1.0.0
3
- pydub==0.25.1
 
 
 
1
  gradio==2.4.5
2
  STT==1.0.0
3
+ pydub==0.25.1
4
+ speechbrain==0.5.10
5
+ torchaudio