Spaces:

bartelds
/

GROTTS

Sleeping

GROTTS / app.py

Martijn Bartelds

Update app

c3ebb96 over 1 year ago

1.71 kB

	import os
	import time
	import torch
	import urllib.request
	import gradio as gr
	import nltk
	import numpy as np
	import soundfile as sf
	from espnet2.bin.tts_inference import Text2Speech
	from espnet2.utils.types import str_or_none
	from pathlib import Path
	from nltk.tokenize import sent_tokenize

	nltk.download('punkt')

	gos_text2speech = Text2Speech.from_pretrained(
	model_tag="bartelds/gos_tts",
	device="cpu",
	speed_control_alpha=1.0,
	noise_scale=1.0,
	noise_scale_dur=1.0
	)

	def inference(text, lang):
	with torch.no_grad():
	lines = sent_tokenize(text.lower())
	outputs = []

	for line in lines:
	line = line.lower()
	if lang == "Hoogelaandsters":
	wav = gos_text2speech(line, sids=np.array([1]))["wav"]
	elif lang == "Oldambsters":
	wav = gos_text2speech(line, sids=np.array([2]))["wav"]
	elif lang == "Westerkertaaiers":
	wav = gos_text2speech(line, sids=np.array([3]))["wav"]

	outputs.append(wav)

	concatenated_wav = np.concatenate([o.view(-1).cpu().numpy() for o in outputs])
	sf.write("out.wav", concatenated_wav, gos_text2speech.fs)

	return "out.wav", "out.wav"

	title = "Gronings text-to-speech"
	examples = [
	['Mamme mos even noar winkel om n bosschop.', 'Hoogelaandsters']
	]

	gr.Interface(
	inference,
	[gr.inputs.Textbox(label="Input text", lines=3), gr.inputs.Radio(choices=["Hoogelaandsters", "Oldambsters", "Westerkertaaiers"], type="value", default="Hoogelaandsters", label="Variant")],
	[gr.outputs.Audio(type="file", label="Output"), gr.outputs.File()],
	title=title,
	examples=examples
	).launch(enable_queue=True)