Gorgefound commited on
Commit
07f4993
1 Parent(s): 40c01a1

Create app.py

Browse files
Files changed (1) hide show
  1. app.py +31 -0
app.py ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import gradio as gr
2
+ import torch
3
+ from espnet2.bin.asr_inference import Speech2Text
4
+ from espnet_model_zoo.downloader import ModelDownloader
5
+
6
+ # Download a pretrained model
7
+ d = ModelDownloader()
8
+ asr_model = Speech2Text(
9
+ **d.download_and_unpack("espnet/simple_asr_train_asr_transformer_e18_raw_bpe_sp_valid.acc.best"),
10
+ device="cpu", # Change to "cuda" if using a GPU
11
+ )
12
+
13
+ def transcribe(audio):
14
+ """Transcribe speech to text using ESPnet."""
15
+ # Convert audio input (from Gradio) to text
16
+ speech = torch.tensor(audio[1]) # Extract the audio waveform
17
+ result = asr_model(speech)
18
+ text, *_ = result[0] # Get the transcription from the result
19
+ return text
20
+
21
+ # Create a simple Gradio interface
22
+ interface = gr.Interface(
23
+ fn=transcribe, # Function to call
24
+ inputs=gr.Audio(source="microphone", type="numpy"), # Audio input from microphone
25
+ outputs="text", # Output type (text transcription)
26
+ title="ESPnet ASR Demo", # Title of the UI
27
+ description="Simple ESPnet-based speech recognition", # Description of the app
28
+ )
29
+
30
+ # Launch the app
31
+ interface.launch()