Spaces:

Akmyradov
/

TurkmenTTSweSTT

Running

App Files Files Community

TurkmenTTSweSTT / app.py

Akmyradov

Duplicate from facebook/MMS

757a712 over 1 year ago

raw

history blame

2.81 kB

	import gradio as gr
	import librosa
	from asr import transcribe
	from tts import synthesize, TTS_EXAMPLES

	ALL_LANGUAGES = {}

	for task in ["asr", "tts", "lid"]:
	ALL_LANGUAGES.setdefault(task, {})
	with open(f"data/{task}/all_langs.tsv") as f:
	for line in f:
	iso, name = line.split(" ", 1)
	ALL_LANGUAGES[task][iso] = name


	def identify(microphone, file_upload):
	LID_SAMPLING_RATE = 16_000

	warn_output = ""
	if (microphone is not None) and (file_upload is not None):
	warn_output = (
	"WARNING: You've uploaded an audio file and used the microphone. "
	"The recorded file from the microphone will be used and the uploaded audio will be discarded.\n"
	)

	elif (microphone is None) and (file_upload is None):
	return "ERROR: You have to either use the microphone or upload an audio file"

	audio_fp = microphone if microphone is not None else file_upload
	inputs = librosa.load(audio_fp, sr=LID_SAMPLING_RATE, mono=True)[0]

	raw_output = {"eng": 0.9, "hin": 0.04, "heb": 0.03, "ara": 0.02, "fra": 0.01}
	return {(k + ": " + ALL_LANGUAGES["lid"][k]): v for k, v in raw_output.items()}


	demo = gr.Blocks()

	mms_transcribe = gr.Interface(
	fn=transcribe,
	inputs=[
	gr.Audio(source="microphone", type="filepath"),
	gr.Audio(source="upload", type="filepath"),
	gr.Dropdown(
	[f"{k}: {v}" for k, v in ALL_LANGUAGES["asr"].items()],
	label="Language",
	value="eng: English",
	),
	],
	outputs="text",
	title="Speech-to-text",
	description=("Transcribe audio!"),
	allow_flagging="never",
	)

	mms_synthesize = gr.Interface(
	fn=synthesize,
	inputs=[
	gr.Text(label="Input text"),
	gr.Dropdown(
	[f"{k}: {v}" for k, v in ALL_LANGUAGES["tts"].items()],
	label="Language",
	value="eng: English",
	),
	gr.Slider(minimum=0.1, maximum=4.0, value=1.0, step=0.1, label="Speed"),
	],
	outputs=[
	gr.Audio(label="Generated Audio", type="numpy"),
	gr.Text(label="Filtered text after removing OOVs"),
	],
	examples=TTS_EXAMPLES,
	title="Text-to-speech",
	description=("Generate audio!"),
	allow_flagging="never",
	)

	mms_identify = gr.Interface(
	fn=identify,
	inputs=[
	gr.Audio(source="microphone", type="filepath"),
	gr.Audio(source="upload", type="filepath"),
	],
	outputs=gr.Label(num_top_classes=10),
	title="Language Identification",
	description=("Identity the language of audio!"),
	allow_flagging="never",
	)

	with demo:
	gr.TabbedInterface(
	[mms_transcribe, mms_synthesize, mms_identify],
	["Speech-to-text", "Text-to-speech", "Language Identification"],
	)

	demo.launch()