lucio commited on
Commit
dbf2fc2
1 Parent(s): dffab9f

add basic HF space demo

Browse files
Files changed (4) hide show
  1. NOTES.md +2 -2
  2. app.py +87 -0
  3. packages.txt +1 -0
  4. requirements.txt +3 -0
NOTES.md CHANGED
@@ -5,13 +5,13 @@
5
  ## Trained models
6
 
7
  ESPnet model for Yoloxochitl Mixtec
8
- - Huggingface Hub page
9
  - Model source code https://github.com/espnet/espnet/tree/master/egs/yoloxochitl_mixtec/asr1
10
  - Colab notebook to setup and apply the model https://colab.research.google.com/drive/1ieoW2b3ERydjaaWuhVPBP_v2QqqWsC1Q?usp=sharing
11
 
12
  Coqui model for Yoloxochitl Mixtec
13
  - Huggingface Hub page
14
- - Coqui page
15
  - Colab notebook to setup and apply the model https://colab.research.google.com/drive/1b1SujEGC_F3XhvUCuUyZK_tyUkEaFZ7D?usp=sharing#scrollTo=6IvRFke4Ckpz
16
 
17
  Spanish ASR models
5
  ## Trained models
6
 
7
  ESPnet model for Yoloxochitl Mixtec
8
+ - Huggingface Hub page https://huggingface.co/espnet/ftshijt_espnet2_asr_yolo_mixtec_transformer
9
  - Model source code https://github.com/espnet/espnet/tree/master/egs/yoloxochitl_mixtec/asr1
10
  - Colab notebook to setup and apply the model https://colab.research.google.com/drive/1ieoW2b3ERydjaaWuhVPBP_v2QqqWsC1Q?usp=sharing
11
 
12
  Coqui model for Yoloxochitl Mixtec
13
  - Huggingface Hub page
14
+ - Coqui page https://coqui.ai/mixtec/jemeyer/v1.0.0
15
  - Colab notebook to setup and apply the model https://colab.research.google.com/drive/1b1SujEGC_F3XhvUCuUyZK_tyUkEaFZ7D?usp=sharing#scrollTo=6IvRFke4Ckpz
16
 
17
  Spanish ASR models
app.py ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from io import BytesIO
2
+ from typing import Tuple
3
+ import wave
4
+ import gradio as gr
5
+ import numpy as np
6
+ from pydub.audio_segment import AudioSegment
7
+ import requests
8
+ from os.path import exists
9
+ from stt import Model
10
+
11
+
12
+ # download model
13
+ storage_url = "https://coqui.gateway.scarf.sh/mixtec/jemeyer/v1.0.0"
14
+ model_name = "model.tflite"
15
+ model_link = f"{storage_url}/{model_name}"
16
+
17
+
18
+ def client(audio_data: np.array, sample_rate: int, use_scorer=False):
19
+ output_audio = _convert_audio(audio_data, sample_rate)
20
+
21
+ fin = wave.open(output_audio, 'rb')
22
+ audio = np.frombuffer(fin.readframes(fin.getnframes()), np.int16)
23
+
24
+ fin.close()
25
+
26
+ ds = Model(model_name)
27
+ if use_scorer:
28
+ ds.enableExternalScorer("kenlm.scorer")
29
+
30
+ result = ds.stt(audio)
31
+
32
+ return result
33
+
34
+
35
+ def download(url, file_name):
36
+ if not exists(file_name):
37
+ print(f"Downloading {file_name}")
38
+ r = requests.get(url, allow_redirects=True)
39
+ with open(file_name, 'wb') as file:
40
+ file.write(r.content)
41
+ else:
42
+ print(f"Found {file_name}. Skipping download...")
43
+
44
+
45
+ def stt(audio: Tuple[int, np.array], model_name: str):
46
+ sample_rate, audio = audio
47
+ use_scorer = False
48
+
49
+ if sample_rate != 16000:
50
+ raise ValueError("Incorrect sample rate.")
51
+
52
+ recognized_result = client(audio, sample_rate, use_scorer)
53
+
54
+ return recognized_result
55
+
56
+
57
+ def _convert_audio(audio_data: np.array, sample_rate: int):
58
+ source_audio = BytesIO()
59
+ source_audio.write(audio_data)
60
+ source_audio.seek(0)
61
+ output_audio = BytesIO()
62
+ wav_file = AudioSegment.from_raw(
63
+ source_audio,
64
+ channels=1,
65
+ sample_width=2,
66
+ frame_rate=sample_rate
67
+ )
68
+ wav_file.set_frame_rate(16000).set_channels(
69
+ 1).export(output_audio, "wav", codec="pcm_s16le")
70
+ output_audio.seek(0)
71
+ return output_audio
72
+
73
+
74
+ iface = gr.Interface(
75
+ fn=stt,
76
+ inputs=[
77
+ gr.inputs.Audio(type="numpy",
78
+ label=None, optional=False),
79
+ ],
80
+ outputs=gr.outputs.Textbox(label="Output"),
81
+ title="Coqui STT Yoloxochitl Mixtec",
82
+ theme="huggingface",
83
+ description="Speech-to-text demo for Yoloxochitl Mixtec, using the model trained by Josh Meyer on the corpus compiled by Rey Castillo and collaborators. This demo is based on the [Ukrainian STT demo](https://huggingface.co/spaces/robinhad/ukrainian-stt).",
84
+ )
85
+
86
+ download(model_link, model_name)
87
+ iface.launch()
packages.txt ADDED
@@ -0,0 +1 @@
 
1
+ ffmpeg
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ gradio==2.4.5
2
+ STT==1.0.0
3
+ pydub==0.25.1