Spaces:

kurianbenoy
/

Pallakku

Running

App Files Files Community

Pallakku / app.py

kurianbenoy

Update app.py

0c3d334 12 months ago

raw history blame contribute delete

No virus

2.59 kB

	# AUTOGENERATED! DO NOT EDIT! File to edit: app.ipynb.

	# %% auto 0
	__all__ = ['mf_transcribe', 'transcribe_malayalam_speech', 'gr_transcribe_malayalam_speech']

	# %% app.ipynb 4
	import gradio as gr
	from faster_whisper import WhisperModel

	# %% app.ipynb 8
	def transcribe_malayalam_speech(audio_file, compute_type="int8", device="cpu", folder="vegam-whisper-medium-ml-fp16"):

	model = WhisperModel(folder, device=device, compute_type=compute_type)
	segments, info = model.transcribe(audio_file, beam_size=5)

	lst = []
	for segment in segments:
	# print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
	lst.append(segment.text)

	return(" ".join(lst))

	# %% app.ipynb 9
	def gr_transcribe_malayalam_speech(microphone, file_upload, compute_type="int8", device="cpu", folder="vegam-whisper-medium-ml-fp16"):
	warn_output = ""
	if (microphone is not None) and (file_upload is not None):
	warn_output = (
	"WARNING: You've uploaded an audio file and used the microphone. "
	"The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
	)

	elif (microphone is None) and (file_upload is None):
	return "ERROR: You have to either use the microphone or upload an audio file"

	audio_file = microphone if microphone is not None else file_upload

	model = WhisperModel(folder, device=device, compute_type=compute_type)
	segments, info = model.transcribe(audio_file, beam_size=5)

	lst = []
	for segment in segments:
	# print("[%.2fs -> %.2fs] %s" % (segment.start, segment.end, segment.text))
	lst.append(segment.text)

	return(" ".join(lst))

	# %% app.ipynb 16
	mf_transcribe = gr.Interface(
	fn=gr_transcribe_malayalam_speech,
	inputs=[
	gr.inputs.Audio(source="microphone", type="filepath", optional=True),
	gr.inputs.Audio(source="upload", type="filepath", optional=True),
	],
	outputs="text",
	title="PALLAKKU (പല്ലക്ക്)",
	description=(
	"Pallakku is a Malayalam speech to text demo leveraging the model-weights of [vegam-whisper-medium-ml](https://huggingface.co/kurianbenoy/vegam-whisper-medium-ml-fp16)."
	),
	article="Please note that this demo now uses CPU only and in my testing for a 5 seconds audio file it can take upto 15 seconds for results to come. If you are interested to use a GPU based API instead, feel free to contact the author @ kurian.bkk@gmail.com",
	allow_flagging="never",
	)

	# %% app.ipynb 17
	mf_transcribe.launch(share=False)