naveenk-ai commited on
Commit
7240e40
1 Parent(s): 3ae9f80

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +20 -12
app.py CHANGED
@@ -1,35 +1,43 @@
1
  from transformers import pipeline
2
 
3
- # Load the ASR pipeline
4
- asr = pipeline(task="automatic-speech-recognition", model="facebook/wav2vec2-base-960h")
5
 
6
  import gradio as gr
 
7
 
8
  def transcribe_speech(filepath):
9
  if filepath is None:
10
  gr.Warning("No Audio found, please retry")
11
  return ""
12
 
13
- # Perform speech recognition using the Hugging Face ASR pipeline
14
  output = asr(filepath)
15
- return output[0]['transcription']
16
 
17
  mic_transcribe = gr.Interface(
18
  fn = transcribe_speech,
19
- inputs = gr.Audio(sources="microphone", type="file"),
20
- outputs=gr.Textbox(label="Transcription", lines=3),
 
 
21
  allow_flagging="never"
22
  )
23
 
24
  file_transcribe = gr.Interface(
25
  fn = transcribe_speech,
26
- inputs = gr.Audio(sources="upload", type="file"),
27
- outputs=gr.Textbox(label="Transcription", lines=3),
 
 
28
  allow_flagging="never"
29
  )
30
 
31
- with gr.Block():
32
  gr.TabbedInterface(
33
- [mic_transcribe, file_transcribe],
34
- ["Transcribe Microphone", "Transcribe Audio File"],
35
- ).launch(debug=True)
 
 
 
 
 
1
  from transformers import pipeline
2
 
3
+ asr = pipeline(task="automatic-speech-recognition",
4
+ model="distil-whisper/distil-small.en")
5
 
6
  import gradio as gr
7
+ demo = gr.Blocks()
8
 
9
  def transcribe_speech(filepath):
10
  if filepath is None:
11
  gr.Warning("No Audio found, please retry")
12
  return ""
13
 
 
14
  output = asr(filepath)
15
+ return output["text"]
16
 
17
  mic_transcribe = gr.Interface(
18
  fn = transcribe_speech,
19
+ inputs = gr.Audio(sources="microphone",
20
+ type="filepath"),
21
+ outputs=gr.Textbox(label="Transcription",
22
+ lines=3),
23
  allow_flagging="never"
24
  )
25
 
26
  file_transcribe = gr.Interface(
27
  fn = transcribe_speech,
28
+ inputs = gr.Audio(sources="upload",
29
+ type="filepath"),
30
+ outputs=gr.Textbox(label="Transcription",
31
+ lines=3),
32
  allow_flagging="never"
33
  )
34
 
35
+ with demo:
36
  gr.TabbedInterface(
37
+ [mic_transcribe,
38
+ file_transcribe],
39
+ ["Transcribe Microphone",
40
+ "Transcribe Audio File"],
41
+ )
42
+
43
+ demo.launch(debug=True)