MMS

Runtime error

MMS / app.py

remove extras

813fffa over 1 year ago

4.09 kB

	import gradio as gr
	import librosa
	from asr import transcribe, ASR_EXAMPLES, ASR_LANGUAGES, ASR_NOTE
	from tts import synthesize, TTS_EXAMPLES, TTS_LANGUAGES
	from lid import identify, LID_EXAMPLES
	from generate import generate, GenExamples

	MAX_MAX_NEW_TOKENS = 2048
	DEFAULT_MAX_NEW_TOKENS = 1024

	demo = gr.Blocks()

	mms_select_source_trans = gr.Radio(
	["Record from Mic", "Upload audio"],
	label="Audio input",
	value="Record from Mic",
	)
	mms_mic_source_trans = gr.Audio(source="microphone", type="filepath", label="Use mic")
	mms_upload_source_trans = gr.Audio(
	source="upload", type="filepath", label="Upload file", visible=False
	)
	mms_transcribe = gr.Interface(
	fn=transcribe,
	inputs=[
	mms_select_source_trans,
	mms_mic_source_trans,
	mms_upload_source_trans,
	gr.Dropdown(
	[f"{k} ({v})" for k, v in ASR_LANGUAGES.items()],
	label="Language",
	value="eng English",
	),
	# gr.Checkbox(label="Use Language Model (if available)", default=True),
	],
	outputs="text",
	examples=ASR_EXAMPLES,
	title="Speech-to-text",
	description=(
	"Transcribe audio from a microphone or input file in your desired language."
	),
	article=ASR_NOTE,
	allow_flagging="never",
	)

	mms_synthesize = gr.Interface(
	fn=synthesize,
	inputs=[
	gr.Text(label="Input text"),
	gr.Dropdown(
	[f"{k} ({v})" for k, v in TTS_LANGUAGES.items()],
	label="Language",
	value="eng English",
	),
	gr.Slider(minimum=0.1, maximum=4.0, value=1.0, step=0.1, label="Speed"),
	],
	outputs=[
	gr.Audio(label="Generated Audio", type="numpy"),
	gr.Text(label="Filtered text after removing OOVs"),
	],
	examples=TTS_EXAMPLES,
	title="Text-to-speech",
	description=("Generate audio in your desired language from input text."),
	allow_flagging="never",
	)

	chat_interface = gr.Interface(
	fn=generate,
	inputs=[
	gr.Textbox(label="Message", type="text"),
	gr.Textbox(label="Chat History", type="text"),
	gr.Textbox(label="System prompt", type="text"),
	],
	outputs=gr.Textbox(),
	# live=True,
	title="Chat Interface",
	description="Interactive chat interface using Hugging Face Transformers.",
	# interpretation="default",
	# allow_flagging=False,
	)

	mms_select_source_iden = gr.Radio(
	["Record from Mic", "Upload audio"],
	label="Audio input",
	value="Record from Mic",
	)
	mms_mic_source_iden = gr.Audio(source="microphone", type="filepath", label="Use mic")
	mms_upload_source_iden = gr.Audio(
	source="upload", type="filepath", label="Upload file", visible=False
	)
	mms_identify = gr.Interface(
	fn=identify,
	inputs=[
	mms_select_source_iden,
	mms_mic_source_iden,
	mms_upload_source_iden,
	],
	outputs=gr.Label(num_top_classes=10),
	examples=LID_EXAMPLES,
	title="Language Identification",
	description=("Identity the language of input audio."),
	allow_flagging="never",
	)

	tabbed_interface = gr.TabbedInterface(
	[mms_transcribe, mms_synthesize, mms_identify, chat_interface],
	["Speech-to-text", "Text-to-speech", "Language Identification", "Chat with Llama"],
	)

	with gr.Blocks() as demo:

	tabbed_interface.render()
	mms_select_source_trans.change(
	lambda x: [
	gr.update(visible=True if x == "Record from Mic" else False),
	gr.update(visible=True if x == "Upload audio" else False),
	],
	inputs=[mms_select_source_trans],
	outputs=[mms_mic_source_trans, mms_upload_source_trans],
	queue=False,
	)
	mms_select_source_iden.change(
	lambda x: [
	gr.update(visible=True if x == "Record from Mic" else False),
	gr.update(visible=True if x == "Upload audio" else False),
	],
	inputs=[mms_select_source_iden],
	outputs=[mms_mic_source_iden, mms_upload_source_iden],
	queue=False,
	)

	demo.queue(concurrency_count=3)
	demo.launch()
	# demo.queue(max_size=20).launch()