File size: 4,111 Bytes
eb3ba2e
 
 
 
 
 
 
 
 
6f1ebe2
5837809
97fc03f
925d97e
 
 
 
 
 
 
 
 
 
6f1ebe2
f98d769
6f1ebe2
 
 
 
 
 
 
 
 
 
f98d769
eb3ba2e
 
709d04e
eb3ba2e
 
 
 
 
 
 
 
 
f98d769
eb3ba2e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f98d769
 
 
 
 
925d97e
 
 
f98d769
 
1714f95
 
f98d769
6f1ebe2
97fc03f
b56d565
5837809
 
 
 
 
 
 
 
 
 
 
6f1ebe2
 
 
eb3ba2e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
import os
import uuid
import numpy as np
import torch
import soundfile as sf
from gtts import gTTS
import edge_tts
from inference import Inference
import asyncio
from elevenlabs import voices, generate, save
from elevenlabs.api.error import UnauthenticatedRateLimitError
# Not working in windows
import platform

COQUI_LANGUAGES = []
if platform.system() != 'Windows':
    from neon_tts_plugin_coqui import CoquiTTS
    
    # CoquiTTS
    COQUI_LANGUAGES = list(CoquiTTS.langs.keys())
    coquiTTS = CoquiTTS()


# Elevenlabs
ELEVENLABS_VOICES_RAW = voices()

def get_elevenlabs_voice_names():
    elevenlabs_voice_names = []
    for voice in ELEVENLABS_VOICES_RAW:
        elevenlabs_voice_names.append(voice.name)
    return elevenlabs_voice_names

ELEVENLABS_VOICES_NAMES = get_elevenlabs_voice_names()

def tts_infer(tts_text, model_url, tts_method, tts_model, tts_api_key, language):
    if not tts_text:
        return 'Primero escribe el texto que quieres convertir.', None
    if not tts_model and tts_method != 'CoquiTTS':
        return 'Selecciona un modelo TTS antes de convertir.', None
        
    f0_method = "harvest" 
    output_folder = "audios"
    os.makedirs(output_folder, exist_ok=True)
    converted_tts_filename = os.path.join(output_folder, f"tts_out_{uuid.uuid4()}.wav")
    success = False

    if tts_method == "Edge-tts":
        language = tts_model[:2]
        try:
            asyncio.run(
                edge_tts.Communicate(
                    tts_text, "-".join(tts_model.split("-")[:-1])
                ).save(converted_tts_filename)
            )
            success = True
        except Exception as e:
            print("ERROR", e)
            try:
                tts = gTTS(tts_text, lang=language)
                tts.save(converted_tts_filename)
                print(
                    f"No audio was received. Please change the tts voice for {tts_model}. USING gTTS."
                )
                success = True
            except:
                tts = gTTS("a", lang=language)
                tts.save(converted_tts_filename)
                print("Error: Audio will be replaced.")
                success = False
                
    # if tts_method == "Tortoise":
    #     api.TextToSpeech()
        
    if tts_method == "CoquiTTS":
        if platform.system() == 'Windows':
            return "Funcionalidad no disponible en windows", None
            
        print(tts_text, language)
        # return output
        coquiTTS.get_tts(tts_text, converted_tts_filename, speaker = {"language" : language})
        success = True
        
    if tts_method == 'ElevenLabs':
        if len(tts_text) > 2499:
            return "El límite de cuentas no logeadas es de 2500 caracteres.", None
        try:
            audio = generate(
                text=tts_text,
                voice=tts_model,
                model="eleven_multilingual_v2",
                api_key=tts_api_key
            )
            save(audio=audio, filename=converted_tts_filename)
            success = True
        except UnauthenticatedRateLimitError:
            return "Necesitas configurar tu API Key para usar elevenlabs", None
        
    if not model_url:
        return 'Pon la url del modelo si quieres aplicarle otro tono.', converted_tts_filename

    if success:
        inference = Inference(
            model_name=model_url,
            f0_method=f0_method,
            source_audio_path=converted_tts_filename,
            output_file_name=os.path.join("./audio-outputs", os.path.basename(converted_tts_filename)),
        )
        output = inference.run()
        if os.path.exists(converted_tts_filename):
            os.remove(converted_tts_filename)
        
        if os.path.exists(os.path.join("weights", inference.model_name)):
            os.remove(os.path.join("weights", inference.model_name))
            
        if 'success' in output and output['success']:
            return output, output['file']
        else:
            return output, None
    else:
        return "Ocurrió un error durante la conversión", None