GROTTS / app.py
Martijn Bartelds
Update app
c3ebb96
raw
history blame
1.71 kB
import os
import time
import torch
import urllib.request
import gradio as gr
import nltk
import numpy as np
import soundfile as sf
from espnet2.bin.tts_inference import Text2Speech
from espnet2.utils.types import str_or_none
from pathlib import Path
from nltk.tokenize import sent_tokenize
nltk.download('punkt')
gos_text2speech = Text2Speech.from_pretrained(
model_tag="bartelds/gos_tts",
device="cpu",
speed_control_alpha=1.0,
noise_scale=1.0,
noise_scale_dur=1.0
)
def inference(text, lang):
with torch.no_grad():
lines = sent_tokenize(text.lower())
outputs = []
for line in lines:
line = line.lower()
if lang == "Hoogelaandsters":
wav = gos_text2speech(line, sids=np.array([1]))["wav"]
elif lang == "Oldambsters":
wav = gos_text2speech(line, sids=np.array([2]))["wav"]
elif lang == "Westerkertaaiers":
wav = gos_text2speech(line, sids=np.array([3]))["wav"]
outputs.append(wav)
concatenated_wav = np.concatenate([o.view(-1).cpu().numpy() for o in outputs])
sf.write("out.wav", concatenated_wav, gos_text2speech.fs)
return "out.wav", "out.wav"
title = "Gronings text-to-speech"
examples = [
['Mamme mos even noar winkel om n bosschop.', 'Hoogelaandsters']
]
gr.Interface(
inference,
[gr.inputs.Textbox(label="Input text", lines=3), gr.inputs.Radio(choices=["Hoogelaandsters", "Oldambsters", "Westerkertaaiers"], type="value", default="Hoogelaandsters", label="Variant")],
[gr.outputs.Audio(type="file", label="Output"), gr.outputs.File()],
title=title,
examples=examples
).launch(enable_queue=True)