kurianbenoy commited on
Commit
f802c6e
1 Parent(s): 390fc37

add application file

Browse files
Files changed (2) hide show
  1. app.py +64 -0
  2. requirements.txt +3 -0
app.py ADDED
@@ -0,0 +1,64 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # AUTOGENERATED! DO NOT EDIT! File to edit: app.ipynb.
2
+
3
+ # %% auto 0
4
+ __all__ = ['mf_transcribe', 'transcribe_malayalam_speech', 'gr_transcribe_malayalam_speech']
5
+
6
+ # %% app.ipynb 4
7
+ import gradio as gr
8
+ from faster_whisper import WhisperModel
9
+
10
+ # %% app.ipynb 8
11
+ def transcribe_malayalam_speech(audio_file, compute_type="int8", device="cpu", folder="vegam-whisper-medium-ml-fp16"):
12
+
13
+ model = WhisperModel(folder, device=device, compute_type=compute_type)
14
+ segments, info = model.transcribe(audio_file, beam_size=5)
15
+
16
+ lst = []
17
+ for segment in segments:
18
+ # print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
19
+ lst.append(segment.text)
20
+
21
+ return(" ".join(lst))
22
+
23
+ # %% app.ipynb 9
24
+ def gr_transcribe_malayalam_speech(microphone, file_upload, compute_type="int8", device="cpu", folder="vegam-whisper-medium-ml-fp16"):
25
+ warn_output = ""
26
+ if (microphone is not None) and (file_upload is not None):
27
+ warn_output = (
28
+ "WARNING: You've uploaded an audio file and used the microphone. "
29
+ "The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
30
+ )
31
+
32
+ elif (microphone is None) and (file_upload is None):
33
+ return "ERROR: You have to either use the microphone or upload an audio file"
34
+
35
+ audio_file = microphone if microphone is not None else file_upload
36
+
37
+ model = WhisperModel(folder, device=device, compute_type=compute_type)
38
+ segments, info = model.transcribe(audio_file, beam_size=5)
39
+
40
+ lst = []
41
+ for segment in segments:
42
+ # print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
43
+ lst.append(segment.text)
44
+
45
+ return(" ".join(lst))
46
+
47
+ # %% app.ipynb 16
48
+ mf_transcribe = gr.Interface(
49
+ fn=gr_transcribe_malayalam_speech,
50
+ inputs=[
51
+ gr.inputs.Audio(source="microphone", type="filepath", optional=True),
52
+ gr.inputs.Audio(source="upload", type="filepath", optional=True),
53
+ ],
54
+ outputs="text",
55
+ title="PALLAKKU (പല്ലക്ക്)",
56
+ description=(
57
+ "Pallakku is a Malayalam speech to text demo leveraging the model-weights of [vegam-whisper-medium-ml](https://huggingface.co/kurianbenoy/vegam-whisper-medium-ml-fp16)."
58
+ ),
59
+ article="Please note that this demo now uses CPU only and in my testing for a 5 seconds audio file it can take upto 15 seconds for results to come. If you are interested to use a GPU based API instead, feel free to contact the author @ kurian.bkk@gmail.com",
60
+ allow_flagging="never",
61
+ )
62
+
63
+ # %% app.ipynb 17
64
+ mf_transcribe.launch(share=True)
requirements.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
1
+ gradio==3.31.0
2
+ faster-whisper==0.5.1
3
+ torch