GROTTS / app.py
Martijn Bartelds
Update app
c3ebb96
import os
import time
import torch
import urllib.request
import gradio as gr
import nltk
import numpy as np
import soundfile as sf
from espnet2.bin.tts_inference import Text2Speech
from espnet2.utils.types import str_or_none
from pathlib import Path
from nltk.tokenize import sent_tokenize
nltk.download('punkt')
gos_text2speech = Text2Speech.from_pretrained(
model_tag="bartelds/gos_tts",
device="cpu",
speed_control_alpha=1.0,
noise_scale=1.0,
noise_scale_dur=1.0
)
def inference(text, lang):
with torch.no_grad():
lines = sent_tokenize(text.lower())
outputs = []
for line in lines:
line = line.lower()
if lang == "Hoogelaandsters":
wav = gos_text2speech(line, sids=np.array([1]))["wav"]
elif lang == "Oldambsters":
wav = gos_text2speech(line, sids=np.array([2]))["wav"]
elif lang == "Westerkertaaiers":
wav = gos_text2speech(line, sids=np.array([3]))["wav"]
outputs.append(wav)
concatenated_wav = np.concatenate([o.view(-1).cpu().numpy() for o in outputs])
sf.write("out.wav", concatenated_wav, gos_text2speech.fs)
return "out.wav", "out.wav"
title = "Gronings text-to-speech"
examples = [
['Mamme mos even noar winkel om n bosschop.', 'Hoogelaandsters']
]
gr.Interface(
inference,
[gr.inputs.Textbox(label="Input text", lines=3), gr.inputs.Radio(choices=["Hoogelaandsters", "Oldambsters", "Westerkertaaiers"], type="value", default="Hoogelaandsters", label="Variant")],
[gr.outputs.Audio(type="file", label="Output"), gr.outputs.File()],
title=title,
examples=examples
).launch(enable_queue=True)