Spaces:

Flux9665
/

IMS-Toucan

Running

App Files Files

IMS-Toucan / app.py

Florian Lux

try it one more time with speaker

49696ae over 2 years ago

raw

history blame

No virus

3.63 kB

	import os

	import gradio as gr
	import numpy as np
	import torch

	from InferenceInterfaces.Meta_FastSpeech2 import Meta_FastSpeech2

	os.system("pip uninstall -y gradio")
	os.system("pip install gradio==2.7.5.2")


	def float2pcm(sig, dtype='int16'):
	"""
	https://gist.github.com/HudsonHuang/fbdf8e9af7993fe2a91620d3fb86a182
	"""
	sig = np.asarray(sig)
	if sig.dtype.kind != 'f':
	raise TypeError("'sig' must be a float array")
	dtype = np.dtype(dtype)
	if dtype.kind not in 'iu':
	raise TypeError("'dtype' must be an integer type")
	i = np.iinfo(dtype)
	abs_max = 2 ** (i.bits - 1)
	offset = i.min + abs_max
	return (sig * abs_max + offset).clip(i.min, i.max).astype(dtype)


	class TTS_Interface:

	def __init__(self):
	self.device = "cuda" if torch.cuda.is_available() else "cpu"
	self.model = Meta_FastSpeech2(device=self.device)

	def read(self, prompt, language, path_to_audio):
	language_id_lookup = {
	"English" : "en",
	"German" : "de",
	"Greek" : "el",
	"Spanish" : "es",
	"Finnish" : "fi",
	"Russian" : "ru",
	"Hungarian": "hu",
	"Dutch" : "nl",
	"French" : "fr"
	}
	self.model.set_language(language_id_lookup[language])
	if path_to_audio is not None:
	try:
	self.model.set_utterance_embedding(path_to_audio)
	except RuntimeError:
	pass
	wav = self.model(prompt)
	return 48000, float2pcm(wav.cpu().numpy())


	meta_model = TTS_Interface()
	article = "<p style='text-align: left'>This is still a work in progress, models will be exchanged for better ones as soon as they are done. All of those languages are spoken by a single model. Speakers can be transferred across languages. More languages will be added soon.</p><p style='text-align: center'><a href='https://github.com/DigitalPhonetics/IMS-Toucan' target='_blank'>Click here to learn more about the IMS Toucan Speech Synthesis Toolkit</a></p>"

	iface = gr.Interface(fn=meta_model.read,
	inputs=[gr.inputs.Textbox(lines=2, placeholder="write what you want the synthesis to read here...", label=" "),
	gr.inputs.Dropdown(['English',
	'German',
	'Greek',
	'Spanish',
	'Finnish',
	'Russian',
	'Hungarian',
	'Dutch',
	'French'], type="value", default='English', label="Language Selection"),
	gr.inputs.Audio(source="microphone",
	optional=True,
	label="Make the TTS imitate your Voice (optional, press once to start recording and again to stop)",
	type="filepath")],
	outputs=gr.outputs.Audio(type="numpy", label=None),
	layout="vertical",
	title="IMS Toucan Multilingual Multispeaker Demo",
	thumbnail="Utility/toucan.png",
	theme="default",
	allow_flagging="never",
	allow_screenshot=False,
	article=article)
	iface.launch(enable_queue=True)