Pallakku / app.py
kurianbenoy's picture
Update app.py
0c3d334
# AUTOGENERATED! DO NOT EDIT! File to edit: app.ipynb.
# %% auto 0
__all__ = ['mf_transcribe', 'transcribe_malayalam_speech', 'gr_transcribe_malayalam_speech']
# %% app.ipynb 4
import gradio as gr
from faster_whisper import WhisperModel
# %% app.ipynb 8
def transcribe_malayalam_speech(audio_file, compute_type="int8", device="cpu", folder="vegam-whisper-medium-ml-fp16"):
model = WhisperModel(folder, device=device, compute_type=compute_type)
segments, info = model.transcribe(audio_file, beam_size=5)
lst = []
for segment in segments:
# print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
lst.append(segment.text)
return(" ".join(lst))
# %% app.ipynb 9
def gr_transcribe_malayalam_speech(microphone, file_upload, compute_type="int8", device="cpu", folder="vegam-whisper-medium-ml-fp16"):
warn_output = ""
if (microphone is not None) and (file_upload is not None):
warn_output = (
"WARNING: You've uploaded an audio file and used the microphone. "
"The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
)
elif (microphone is None) and (file_upload is None):
return "ERROR: You have to either use the microphone or upload an audio file"
audio_file = microphone if microphone is not None else file_upload
model = WhisperModel(folder, device=device, compute_type=compute_type)
segments, info = model.transcribe(audio_file, beam_size=5)
lst = []
for segment in segments:
# print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
lst.append(segment.text)
return(" ".join(lst))
# %% app.ipynb 16
mf_transcribe = gr.Interface(
fn=gr_transcribe_malayalam_speech,
inputs=[
gr.inputs.Audio(source="microphone", type="filepath", optional=True),
gr.inputs.Audio(source="upload", type="filepath", optional=True),
],
outputs="text",
title="PALLAKKU (പല്ലക്ക്)",
description=(
"Pallakku is a Malayalam speech to text demo leveraging the model-weights of [vegam-whisper-medium-ml](https://huggingface.co/kurianbenoy/vegam-whisper-medium-ml-fp16)."
),
article="Please note that this demo now uses CPU only and in my testing for a 5 seconds audio file it can take upto 15 seconds for results to come. If you are interested to use a GPU based API instead, feel free to contact the author @ kurian.bkk@gmail.com",
allow_flagging="never",
)
# %% app.ipynb 17
mf_transcribe.launch(share=False)