File size: 1,364 Bytes

17cf0ef

import sys
import os
import base64
from tts import TTS
from utils.file_utils import load_prompt_speech_from_file, load_voices
from app.config import settings
from datetime import datetime


def load_model(

    speed,

    voice,

    text,

    output_path,

    output_format="wav",

):
    print("Loading TTS model...", settings.DIR_ROOT)
    tts_obj = TTS(model_dir=os.path.join(settings.DIR_ROOT, "VietTTS", "models"))
    VOICE_MAP = load_voices(os.path.join(settings.DIR_ROOT, "VietTTS", "samples"))

    speed = float(speed)

    if voice.isdigit():
        voice_file = list(VOICE_MAP.values())[int(voice)]
    else:
        voice_file = VOICE_MAP.get(voice)

    if not voice_file or not os.path.exists(voice_file):
        raise ValueError("Voice file not found")

    print(f"Output path: {output_path}")

    prompt_speech_16k = load_prompt_speech_from_file(filepath=voice_file, min_duration=3, max_duration=10)

    tts_obj.tts_to_file(text=text, prompt_speech_16k=prompt_speech_16k, output_path=output_path, speed=speed)

    print("END TTS worker")


if __name__ == "__main__":
    speed = sys.argv[1]
    voice = sys.argv[2]
    text = sys.argv[3]
    output_path = sys.argv[4]
    output_format = sys.argv[5] if len(sys.argv) > 5 else "wav"

    load_model(speed, voice, text, output_path, output_format)