# AUTOGENERATED! DO NOT EDIT! File to edit: app.ipynb. # %% auto 0 __all__ = ['mf_transcribe', 'transcribe_malayalam_speech', 'gr_transcribe_malayalam_speech'] # %% app.ipynb 4 import gradio as gr from faster_whisper import WhisperModel # %% app.ipynb 8 def transcribe_malayalam_speech(audio_file, compute_type="int8", device="cpu", folder="vegam-whisper-medium-ml-fp16"): model = WhisperModel(folder, device=device, compute_type=compute_type) segments, info = model.transcribe(audio_file, beam_size=5) lst = [] for segment in segments: # print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text)) lst.append(segment.text) return(" ".join(lst)) # %% app.ipynb 9 def gr_transcribe_malayalam_speech(microphone, file_upload, compute_type="int8", device="cpu", folder="vegam-whisper-medium-ml-fp16"): warn_output = "" if (microphone is not None) and (file_upload is not None): warn_output = ( "WARNING: You've uploaded an audio file and used the microphone. " "The recorded file from the microphone will be used and the uploaded audio will be discarded.\n" ) elif (microphone is None) and (file_upload is None): return "ERROR: You have to either use the microphone or upload an audio file" audio_file = microphone if microphone is not None else file_upload model = WhisperModel(folder, device=device, compute_type=compute_type) segments, info = model.transcribe(audio_file, beam_size=5) lst = [] for segment in segments: # print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text)) lst.append(segment.text) return(" ".join(lst)) # %% app.ipynb 16 mf_transcribe = gr.Interface( fn=gr_transcribe_malayalam_speech, inputs=[ gr.inputs.Audio(source="microphone", type="filepath", optional=True), gr.inputs.Audio(source="upload", type="filepath", optional=True), ], outputs="text", title="PALLAKKU (പല്ലക്ക്)", description=( "Pallakku is a Malayalam speech to text demo leveraging the model-weights of [vegam-whisper-medium-ml](https://huggingface.co/kurianbenoy/vegam-whisper-medium-ml-fp16)." ), article="Please note that this demo now uses CPU only and in my testing for a 5 seconds audio file it can take upto 15 seconds for results to come. If you are interested to use a GPU based API instead, feel free to contact the author @ kurian.bkk@gmail.com", allow_flagging="never", ) # %% app.ipynb 17 mf_transcribe.launch(share=False)