File size: 3,963 Bytes
8c4d22a
 
64fcafd
8c4d22a
 
 
 
 
 
64fcafd
8c4d22a
 
 
 
 
 
 
c49c056
64fcafd
 
 
8c4d22a
 
 
 
 
64fcafd
 
 
57c5ab0
64fcafd
 
 
 
57c5ab0
8c4d22a
 
 
64fcafd
 
 
 
c49c056
64fcafd
 
8c4d22a
c49c056
 
 
 
 
 
 
8c4d22a
64fcafd
8c4d22a
c49c056
8c4d22a
 
 
 
 
 
 
a575152
8c4d22a
 
57c5ab0
64fcafd
8c4d22a
 
 
 
 
 
64fcafd
 
 
 
 
 
8c4d22a
 
 
 
 
 
 
 
 
 
 
57c5ab0
8c4d22a
 
 
64fcafd
8c4d22a
 
 
64fcafd
8c4d22a
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
from io import BytesIO
import requests
from os.path import exists, join
from TTS.utils.synthesizer import Synthesizer
from enum import Enum
from .formatter import preprocess_text
from torch import no_grad

class Voices(Enum):
    """List of available voices for the model."""
    Olena = "olena"
    Mykyta = "mykyta"
    Lada = "lada"
    Dmytro = "dmytro"
    Olga = "olga"


class Stress(Enum):
    """Options how to stress sentence.
    - `dictionary` - performs lookup in dictionary, taking into account grammatical case of a word and its' neighbors
    - `model` - stress using transformer model"""
    Dictionary = "dictionary"
    Model = "model"


class TTS:
    """
    
    """
    def __init__(self, cache_folder=None, use_cuda=False) -> None:
        """
        Class to setup a text-to-speech engine, from download to model creation.  \n
        Downloads or uses files from `cache_folder` directory.  \n
        By default stores in current directory."""
        self.__setup_cache(cache_folder, use_cuda=use_cuda)


    def tts(self, text: str, voice: str, stress: str, output_fp=BytesIO()):
        """
        Run a Text-to-Speech engine and output to `output_fp` BytesIO-like object.
        - `text` - your model input text.
        - `voice` - one of predefined voices from `Voices` enum.
        - `stress` - stress method options, predefined in `Stress` enum.
        - `output_fp` - file-like object output. Stores in RAM by default.
        """

        if stress not in [option.value for option in Stress]:
            raise ValueError(f"Invalid value for stress option selected! Please use one of the following values: {', '.join([option.value for option in Stress])}.")

        if stress == Stress.Model.value:
            stress = True
        else:
            stress = False
        if voice not in [option.value for option in Voices]:
            raise ValueError(f"Invalid value for voice selected! Please use one of the following values: {', '.join([option.value for option in Voices])}.")

        text = preprocess_text(text, stress)

        with no_grad():
            wavs = self.synthesizer.tts(text, speaker_name=voice)
            self.synthesizer.save_wav(wavs, output_fp)

        output_fp.seek(0)

        return output_fp, text


    def __setup_cache(self, cache_folder=None, use_cuda=False):
        """Downloads models and stores them into `cache_folder`. By default stores in current directory."""
        print("downloading uk/mykyta/vits-tts")
        release_number = "v3.0.0"
        model_link = f"https://github.com/robinhad/ukrainian-tts/releases/download/{release_number}/model-inference.pth"
        config_link = f"https://github.com/robinhad/ukrainian-tts/releases/download/{release_number}/config.json"
        speakers_link = f"https://github.com/robinhad/ukrainian-tts/releases/download/{release_number}/speakers.pth"

        if cache_folder is None:
            cache_folder = "."

        model_path = join(cache_folder, "model.pth")
        config_path = join(cache_folder, "config.json")
        speakers_path = join(cache_folder, "speakers.pth")

        self.__download(model_link, model_path)
        self.__download(config_link, config_path)
        self.__download(speakers_link, speakers_path)

        self.synthesizer = Synthesizer(
            model_path,
            config_path,
            speakers_path,
            None,
            None,
            use_cuda=use_cuda
        )

        if self.synthesizer is None:
            raise NameError("Model not found")


    def __download(self, url, file_name):
        """Downloads file from `url` into local `file_name` file."""
        if not exists(file_name):
            print(f"Downloading {file_name}")
            r = requests.get(url, allow_redirects=True)
            with open(file_name, "wb") as file:
                file.write(r.content)
        else:
            print(f"Found {file_name}. Skipping download...")