lucio commited on
Commit
ff08b05
1 Parent(s): d27ee9b
Files changed (1) hide show
  1. app.py +15 -7
app.py CHANGED
@@ -41,6 +41,7 @@ def client(audio_data: np.array, sample_rate: int, default_lang: str):
41
  output_audio = _convert_audio(audio_data, sample_rate)
42
  waveform, _ = torchaudio.load(output_audio)
43
  out_prob, score, index, text_lab = lang_classifier.classify_batch(waveform)
 
44
 
45
  output_audio.seek(0)
46
  fin = wave.open(output_audio, 'rb')
@@ -50,12 +51,14 @@ def client(audio_data: np.array, sample_rate: int, default_lang: str):
50
  print(default_lang, text_lab)
51
 
52
  if text_lab == 'Spanish':
 
53
  processor, model = STT_MODELS['español']
54
  inputs = processor(waveform)
55
  logits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
56
  result = processor.decode(torch.argmax(logits, dim=-1).cpu().tolist())
57
 
58
  else:
 
59
  ds = STT_MODELS[default_lang]
60
  result = ds.stt(audio)
61
 
@@ -112,17 +115,22 @@ iface = gr.Interface(
112
  gr.inputs.Audio(type="numpy", label="Audio", optional=False),
113
  ],
114
  outputs=gr.outputs.Textbox(label="Output"),
115
- title="Coqui STT Yoloxochitl Mixtec",
116
  theme="huggingface",
117
- description="Prueba de dictado a texto para el mixteco de Yoloxochitl,"
 
 
 
 
118
  " usando [el modelo entrenado por Josh Meyer](https://coqui.ai/mixtec/jemeyer/v1.0.0/)"
119
- " con [los datos recopilados por Rey Castillo y sus colaboradores](https://www.openslr.org/89)."
120
  " Esta prueba es basada en la de [Ukraniano](https://huggingface.co/spaces/robinhad/ukrainian-stt)."
121
  " \n\n"
122
- "Speech-to-text demo for Yoloxochitl Mixtec,"
123
- " using [the model trained by Josh Meyer](https://coqui.ai/mixtec/jemeyer/v1.0.0/)"
124
- " on [the corpus compiled by Rey Castillo and collaborators](https://www.openslr.org/89)."
125
- " This demo is based on the [Ukrainian STT demo](https://huggingface.co/spaces/robinhad/ukrainian-stt).",
 
126
  )
127
 
128
 
 
41
  output_audio = _convert_audio(audio_data, sample_rate)
42
  waveform, _ = torchaudio.load(output_audio)
43
  out_prob, score, index, text_lab = lang_classifier.classify_batch(waveform)
44
+ text_lab = text_lab[0]
45
 
46
  output_audio.seek(0)
47
  fin = wave.open(output_audio, 'rb')
 
51
  print(default_lang, text_lab)
52
 
53
  if text_lab == 'Spanish':
54
+ text_lab = 'español'
55
  processor, model = STT_MODELS['español']
56
  inputs = processor(waveform)
57
  logits = model(inputs.input_values, attention_mask=inputs.attention_mask).logits
58
  result = processor.decode(torch.argmax(logits, dim=-1).cpu().tolist())
59
 
60
  else:
61
+ text_lab = default_lang
62
  ds = STT_MODELS[default_lang]
63
  result = ds.stt(audio)
64
 
 
115
  gr.inputs.Audio(type="numpy", label="Audio", optional=False),
116
  ],
117
  outputs=gr.outputs.Textbox(label="Output"),
118
+ title="Coqui STT de Chatino, Mixteco, y Totonaco",
119
  theme="huggingface",
120
+ article="Chatino: Prueba de dictado a texto para el chatino de la sierra (Quiahije) "
121
+ " usando [el modelo entrenado por Bülent Özden](https://coqui.ai/chatino/bozden/v1.0.0)"
122
+ " con [los datos recopilados por Hilaria Cruz y sys colaboradores](https://gorilla.linguistlist.org/code/ctp/)"
123
+ "\n\n"
124
+ "Mixteco: Prueba de dictado a texto para el mixteco de Yoloxochitl,"
125
  " usando [el modelo entrenado por Josh Meyer](https://coqui.ai/mixtec/jemeyer/v1.0.0/)"
126
+ " con [los datos recopilados por Rey Castillo, Jonathan Amith y sus colaboradores](https://www.openslr.org/89)."
127
  " Esta prueba es basada en la de [Ukraniano](https://huggingface.co/spaces/robinhad/ukrainian-stt)."
128
  " \n\n"
129
+ "Totonaco: Prueba de dictado a texto para el totonaco de la sierra,"
130
+ " usando [el modelo entrenado por Bülent Özden](https://coqui.ai/totonac/bozden/v1.0.0)"
131
+ " con [los datos recopilados por Osbel López Francisco y Jonathan Amith](https://www.openslr.org/107)."
132
+ " \n\n"
133
+ " Esta prueba es basada en la de [Ukraniano](https://huggingface.co/spaces/robinhad/ukrainian-stt)."
134
  )
135
 
136