clone_vox / coqui_tts.py
Amamrnaf
fixe
9232118
raw
history blame
981 Bytes
import os
import noisereduce as nr
import soundfile as sf
# from moviepy.editor import *
import string
import json
from glob import glob
import torchaudio
import subprocess
import shutil
import pyloudnorm as pyln
import torch
from TTS.api import TTS
def run_audio_generation_v1(text,accent='None'):
gpu = True if torch.cuda.is_available() else False
tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2", gpu=gpu) # gpu should be true when server (cuda)
# pre-process story audio file
# convert to 16 bit mono
# remove noise
speaker_wav_data, speaker_wav_rate = sf.read("./tmp/audio/input_src/0.wav")
speaker_wav_data_no_noise = nr.reduce_noise(y=speaker_wav_data, sr=speaker_wav_rate)
sf.write('./tmp/audio/speaker_wav.wav', speaker_wav_data_no_noise, speaker_wav_rate, subtype='PCM_16')
tts.tts_to_file(
text,
speaker_wav="./tmp/audio/speaker_wav.wav",
language="en",
file_path="audio/output.wav"
)