Spaces:

kurianbenoy
/

Pallakku

Running

App Files Files Community

kurianbenoy commited on May 21, 2023

Commit

f802c6e

•

1 Parent(s): 390fc37

add application file

Browse files

Files changed (2) hide show

app.py +64 -0
requirements.txt +3 -0

app.py ADDED Viewed

	@@ -0,0 +1,64 @@

+# AUTOGENERATED! DO NOT EDIT! File to edit: app.ipynb.
+# %% auto 0
+__all__ = ['mf_transcribe', 'transcribe_malayalam_speech', 'gr_transcribe_malayalam_speech']
+# %% app.ipynb 4
+import gradio as gr
+from faster_whisper import WhisperModel
+# %% app.ipynb 8
+def transcribe_malayalam_speech(audio_file, compute_type="int8", device="cpu", folder="vegam-whisper-medium-ml-fp16"):
+    model = WhisperModel(folder, device=device, compute_type=compute_type)
+    segments, info = model.transcribe(audio_file, beam_size=5)
+    lst = []
+    for segment in segments:
+        # print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
+        lst.append(segment.text)
+    return(" ".join(lst))
+# %% app.ipynb 9
+def gr_transcribe_malayalam_speech(microphone, file_upload, compute_type="int8", device="cpu", folder="vegam-whisper-medium-ml-fp16"):
+    warn_output = ""
+    if (microphone is not None) and (file_upload is not None):
+        warn_output = (
+            "WARNING: You've uploaded an audio file and used the microphone. "
+            "The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
+        )
+    elif (microphone is None) and (file_upload is None):
+        return "ERROR: You have to either use the microphone or upload an audio file"
+    audio_file = microphone if microphone is not None else file_upload
+    model = WhisperModel(folder, device=device, compute_type=compute_type)
+    segments, info = model.transcribe(audio_file, beam_size=5)
+    lst = []
+    for segment in segments:
+        # print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
+        lst.append(segment.text)
+    return(" ".join(lst))
+# %% app.ipynb 16
+mf_transcribe = gr.Interface(
+    fn=gr_transcribe_malayalam_speech,
+    inputs=[
+        gr.inputs.Audio(source="microphone", type="filepath", optional=True),
+        gr.inputs.Audio(source="upload", type="filepath", optional=True),
+    ],
+    outputs="text",
+    title="PALLAKKU (പല്ലക്ക്)",
+    description=(
+        "Pallakku is a Malayalam speech to text demo leveraging the model-weights of [vegam-whisper-medium-ml](https://huggingface.co/kurianbenoy/vegam-whisper-medium-ml-fp16)."
+    ),
+    article="Please note that this demo now uses CPU only and in my testing for a 5 seconds audio file it can take upto 15 seconds for results to come. If you are interested to use a GPU based API instead, feel free to contact the author @ kurian.bkk@gmail.com",
+    allow_flagging="never",
+)
+# %% app.ipynb 17
+mf_transcribe.launch(share=True)

requirements.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+gradio==3.31.0
+faster-whisper==0.5.1
+torch