rngzhi commited on
Commit
64fc2b0
1 Parent(s): 6145590

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +35 -16
app.py CHANGED
@@ -1,30 +1,49 @@
 
 
1
  import gradio as gr
 
 
2
 
3
- def transcribe_speech(audio_file):
4
- # Load the model
5
- asr_model = gr.load("models/rngzhi/cs3264-project")
6
- result = asr_model(audio_file)
7
- return result['text']
 
 
 
 
 
 
 
 
 
 
8
 
9
- demo = gr.Blocks()
10
 
 
 
 
 
 
 
 
 
 
11
  mic_transcribe = gr.Interface(
12
- fn=transcribe_speech,
13
  inputs=gr.Audio(sources="microphone", type="filepath"),
14
- outputs=gr.Textbox(),
15
  )
16
 
17
  file_transcribe = gr.Interface(
18
- fn=transcribe_speech,
19
  inputs=gr.Audio(sources="upload", type="filepath"),
20
- outputs=gr.Textbox(),
21
  )
22
 
23
- with demo:
24
- gr.TabbedInterface(
25
- [mic_transcribe, file_transcribe],
26
- ["Transcribe Microphone", "Transcribe Audio File"],
27
- )
28
 
 
 
29
 
30
- demo.launch(share=True,auth=None,debug=False)
 
1
+ import torch
2
+
3
  import gradio as gr
4
+ from transformers import pipeline
5
+ from transformers.pipelines.audio_utils import ffmpeg_read
6
 
7
+ import tempfile
8
+ import os
9
+
10
+ MODEL_NAME = "rngzhi/cs3264-project"
11
+ BATCH_SIZE = 8
12
+ FILE_LIMIT_MB = 1000
13
+
14
+ device = 0 if torch.cuda.is_available() else "cpu"
15
+
16
+ pipe = pipeline(
17
+ task="automatic-speech-recognition",
18
+ model=MODEL_NAME,
19
+ chunk_length_s=30,
20
+ device=device,
21
+ )
22
 
 
23
 
24
+ def transcribe(inputs, task):
25
+ if inputs is None:
26
+ raise gr.Error("No audio file submitted! Please upload or record an audio file before submitting your request.")
27
+
28
+ text = pipe(inputs, batch_size=BATCH_SIZE, generate_kwargs={"task": task}, return_timestamps=True)["text"]
29
+ return text
30
+
31
+
32
+ demo = gr.Blocks()
33
  mic_transcribe = gr.Interface(
34
+ fn=transcribe,
35
  inputs=gr.Audio(sources="microphone", type="filepath"),
36
+ outputs="text",
37
  )
38
 
39
  file_transcribe = gr.Interface(
40
+ fn=transcribe,
41
  inputs=gr.Audio(sources="upload", type="filepath"),
42
+ outputs="text",
43
  )
44
 
 
 
 
 
 
45
 
46
+ with demo:
47
+ gr.TabbedInterface([file_transcribe, mic_transcribe], ["Audio file", "Microphone"])
48
 
49
+ demo.launch(debug=True)