rexsimiloluwah commited on
Commit
bcb1eec
1 Parent(s): cefcfbf

added app for automatic speech recognition

Browse files
Files changed (4) hide show
  1. app.py +13 -4
  2. requirements.txt +8 -0
  3. tasks/__init__.py +0 -0
  4. tasks/asr.py +47 -0
app.py CHANGED
@@ -1,7 +1,16 @@
1
  import gradio as gr
2
 
3
- def greet(name):
4
- return "Hello " + name + "!!"
 
 
5
 
6
- iface = gr.Interface(fn=greet, inputs="text", outputs="text")
7
- iface.launch()
 
 
 
 
 
 
 
 
1
  import gradio as gr
2
 
3
+ from tasks.asr import (
4
+ mic_transcribe_interface,
5
+ file_transcribe_interface
6
+ )
7
 
8
+ app = gr.Blocks()
9
+
10
+ with app:
11
+ gr.TabbedInterface(
12
+ [mic_transcribe_interface, file_transcribe_interface],
13
+ ["Transcribe from Microphone", "Transcribe from Audio File"]
14
+ )
15
+
16
+ app.launch(share=True)
requirements.txt ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ torch
2
+ librosa
3
+ soundfile
4
+ transformers
5
+ pillow
6
+ numpy
7
+ requests
8
+ matplotlib
tasks/__init__.py ADDED
File without changes
tasks/asr.py ADDED
@@ -0,0 +1,47 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import librosa
2
+ import numpy as np
3
+ import gradio as gr
4
+ import soundfile as sf
5
+ from transformers import pipeline
6
+
7
+ # Load the pipeline
8
+ model = pipeline(
9
+ task="automatic-speech-recognition",
10
+ model="distil-whisper/distil-small.en"
11
+ )
12
+
13
+ def transcribe_audio(filepath):
14
+ """Transcribe audio to text"""
15
+ audio, sample_rate = sf.read(filepath)
16
+ audio_mono = librosa.to_mono(np.transpose(audio))
17
+
18
+ # resample the audio
19
+ audio_16KHz = librosa.resample(
20
+ audio_mono,
21
+ orig_sr=sample_rate,
22
+ target_sr=16000
23
+ )
24
+
25
+ output = model(
26
+ audio_16KHz,
27
+ chunk_length_s=30,
28
+ batch_size=4,
29
+ )
30
+
31
+ return output["text"]
32
+
33
+ mic_transcribe_interface = gr.Interface(
34
+ fn=transcribe_audio,
35
+ inputs=gr.Audio(sources="microphone", type="filepath"),
36
+ outputs=gr.Textbox(label="Transcription", lines=3),
37
+ allow_flagging="never",
38
+ title="Transcribe Audio from your Microphone"
39
+ )
40
+
41
+ file_transcribe_interface = gr.Interface(
42
+ fn=transcribe_audio,
43
+ inputs=gr.Audio(sources="upload", type="filepath"),
44
+ outputs=gr.Textbox(label="Transcription", lines=3),
45
+ allow_flagging="never",
46
+ title="Transcribe Audio from a File"
47
+ )