antoniomae commited on
Commit
bbb0b42
1 Parent(s): 513c7c5

f291528ef840e0812fc4459f0c0be17a01952a1e

Browse files
Files changed (9) hide show
  1. README.md +11 -1
  2. app.py +34 -0
  3. gitattributes +35 -0
  4. hubconf.py +46 -0
  5. requirements.dev.txt +5 -0
  6. requirements.ja.txt +5 -0
  7. requirements.notebooks.txt +0 -0
  8. requirements.txt +57 -0
  9. setup.py +141 -0
README.md CHANGED
@@ -1,3 +1,13 @@
1
  ---
2
- license: apache-2.0
 
 
 
 
 
 
 
 
3
  ---
 
 
 
1
  ---
2
+ title: Voice Clone
3
+ emoji: 🏃
4
+ colorFrom: blue
5
+ colorTo: blue
6
+ sdk: gradio
7
+ sdk_version: 4.5.0
8
+ app_file: app.py
9
+ pinned: false
10
+ license: mit
11
  ---
12
+
13
+ Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import subprocess
2
+
3
+ # Run the setup.py install command
4
+ try:
5
+ subprocess.run(['python', 'setup.py', 'install', '--user'], check=True)
6
+ print("Installation successful.")
7
+ except subprocess.CalledProcessError as e:
8
+ print(f"Installation failed with error: {e}")
9
+
10
+ import gradio as gr
11
+ import torch
12
+ from TTS.api import TTS
13
+
14
+ # Get device
15
+ device = "cuda" if torch.cuda.is_available() else "cpu"
16
+
17
+ # Init TTS
18
+ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
19
+
20
+ def voice_clone(text: str, speaker_wav: str, language: str):
21
+ # Run TTS
22
+ print("Speaker wav:", speaker_wav)
23
+ tts.tts_to_file(text=text, speaker_wav=speaker_wav, language=language, file_path="output.mp3")
24
+ return "output.mp3"
25
+
26
+ iface = gr.Interface(fn=voice_clone,
27
+ inputs=[gr.Textbox(lines=2, placeholder="Enter the text...", label="Text"),
28
+ gr.Audio(type="filepath", label="Upload audio file"),
29
+ gr.Radio(['ru', 'en', 'zh-cn', 'ja', 'de', 'fr', 'it', 'pt', 'pl', 'tr', 'ko', 'nl', 'cs', 'ar', 'es', 'hu'], label="language"),
30
+ ],
31
+ outputs=gr.Audio(type="filepath", label="Generated audio file"),
32
+ title="Voice Cloning")
33
+
34
+ iface.launch()
gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
hubconf.py ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ dependencies = [
2
+ 'torch', 'gdown', 'pysbd', 'gruut', 'anyascii', 'pypinyin', 'coqpit', 'mecab-python3', 'unidic-lite'
3
+ ]
4
+ import torch
5
+
6
+ from TTS.utils.manage import ModelManager
7
+ from TTS.utils.synthesizer import Synthesizer
8
+
9
+
10
+ def tts(model_name='tts_models/en/ljspeech/tacotron2-DCA',
11
+ vocoder_name=None,
12
+ use_cuda=False):
13
+ """TTS entry point for PyTorch Hub that provides a Synthesizer object to synthesize speech from a give text.
14
+
15
+ Example:
16
+ >>> synthesizer = torch.hub.load('coqui-ai/TTS', 'tts', source='github')
17
+ >>> wavs = synthesizer.tts("This is a test! This is also a test!!")
18
+ wavs - is a list of values of the synthesized speech.
19
+
20
+ Args:
21
+ model_name (str, optional): One of the model names from .model.json. Defaults to 'tts_models/en/ljspeech/tacotron2-DCA'.
22
+ vocoder_name (str, optional): One of the model names from .model.json. Defaults to 'vocoder_models/en/ljspeech/multiband-melgan'.
23
+ pretrained (bool, optional): [description]. Defaults to True.
24
+
25
+ Returns:
26
+ TTS.utils.synthesizer.Synthesizer: Synthesizer object wrapping both vocoder and tts models.
27
+ """
28
+ manager = ModelManager()
29
+
30
+ model_path, config_path, model_item = manager.download_model(model_name)
31
+ vocoder_name = model_item[
32
+ 'default_vocoder'] if vocoder_name is None else vocoder_name
33
+ vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
34
+
35
+ # create synthesizer
36
+ synt = Synthesizer(tts_checkpoint=model_path,
37
+ tts_config_path=config_path,
38
+ vocoder_checkpoint=vocoder_path,
39
+ vocoder_config=vocoder_config_path,
40
+ use_cuda=use_cuda)
41
+ return synt
42
+
43
+
44
+ if __name__ == '__main__':
45
+ synthesizer = torch.hub.load('coqui-ai/TTS:dev', 'tts', source='github')
46
+ synthesizer.tts("This is a test!")
requirements.dev.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ black
2
+ coverage
3
+ isort
4
+ nose2
5
+ pylint==2.10.2
requirements.ja.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # These cause some compatibility issues on some systems and are not strictly necessary
2
+ # japanese g2p deps
3
+ mecab-python3==1.0.6
4
+ unidic-lite==1.0.8
5
+ cutlet
requirements.notebooks.txt ADDED
File without changes
requirements.txt ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # core deps
2
+ numpy==1.22.0;python_version<="3.10"
3
+ numpy>=1.24.3;python_version>"3.10"
4
+ cython>=0.29.30
5
+ scipy>=1.11.2
6
+ torch>=2.1
7
+ torchaudio
8
+ soundfile>=0.12.0
9
+ librosa>=0.10.0
10
+ scikit-learn>=1.3.0
11
+ numba==0.55.1;python_version<"3.9"
12
+ numba>=0.57.0;python_version>="3.9"
13
+ inflect>=5.6.0
14
+ tqdm>=4.64.1
15
+ anyascii>=0.3.0
16
+ pyyaml>=6.0
17
+ fsspec>=2023.6.0 # <= 2023.9.1 makes aux tests fail
18
+ aiohttp>=3.8.1
19
+ packaging>=23.1
20
+ # deps for examples
21
+ flask>=2.0.1
22
+ # deps for inference
23
+ pysbd>=0.3.4
24
+ # deps for notebooks
25
+ umap-learn>=0.5.1
26
+ pandas>=1.4,<2.0
27
+ # deps for training
28
+ matplotlib>=3.7.0
29
+ # coqui stack
30
+ trainer>=0.0.32
31
+ # config management
32
+ coqpit>=0.0.16
33
+ # chinese g2p deps
34
+ jieba
35
+ pypinyin
36
+ # korean
37
+ hangul_romanize
38
+ # gruut+supported langs
39
+ gruut[de,es,fr]==2.2.3
40
+ # deps for korean
41
+ jamo
42
+ nltk
43
+ g2pkk>=0.1.1
44
+ # deps for bangla
45
+ bangla
46
+ bnnumerizer
47
+ bnunicodenormalizer
48
+ #deps for tortoise
49
+ einops>=0.6.0
50
+ transformers>=4.33.0
51
+ #deps for bark
52
+ encodec>=0.1.1
53
+ # deps for XTTS
54
+ unidecode>=1.3.2
55
+ num2words
56
+ spacy[ja]>=3
57
+ gradio
setup.py ADDED
@@ -0,0 +1,141 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+ # ,*++++++*, ,*++++++*,
3
+ # *++. .+++ *++. .++*
4
+ # *+* ,++++* *+* *+* ,++++, *+*
5
+ # ,+, .++++++++++* ,++,,,,*+, ,++++++++++. *+,
6
+ # *+. .++++++++++++..++ *+.,++++++++++++. .+*
7
+ # .+* ++++++++++++.*+, .+*.++++++++++++ *+,
8
+ # .++ *++++++++* ++, .++.*++++++++* ++,
9
+ # ,+++*. . .*++, ,++*. .*+++*
10
+ # *+, .,*++**. .**++**. ,+*
11
+ # .+* *+,
12
+ # *+. Coqui .+*
13
+ # *+* +++ TTS +++ *+*
14
+ # .+++*. . . *+++.
15
+ # ,+* *+++*... ...*+++* *+,
16
+ # .++. .""""+++++++****+++++++"""". ++.
17
+ # ,++. .++,
18
+ # .++* *++.
19
+ # *+++, ,+++*
20
+ # .,*++++::::::++++*,.
21
+ # ``````
22
+
23
+ import os
24
+ import subprocess
25
+ import sys
26
+ from packaging.version import Version
27
+
28
+ import numpy
29
+ import setuptools.command.build_py
30
+ import setuptools.command.develop
31
+ from Cython.Build import cythonize
32
+ from setuptools import Extension, find_packages, setup
33
+
34
+ python_version = sys.version.split()[0]
35
+ if Version(python_version) < Version("3.9") or Version(python_version) >= Version("3.12"):
36
+ raise RuntimeError("TTS requires python >= 3.9 and < 3.12 " "but your Python version is {}".format(sys.version))
37
+
38
+
39
+ cwd = os.path.dirname(os.path.abspath(__file__))
40
+ with open(os.path.join(cwd, "TTS", "VERSION")) as fin:
41
+ version = fin.read().strip()
42
+
43
+
44
+ class build_py(setuptools.command.build_py.build_py): # pylint: disable=too-many-ancestors
45
+ def run(self):
46
+ setuptools.command.build_py.build_py.run(self)
47
+
48
+
49
+ class develop(setuptools.command.develop.develop):
50
+ def run(self):
51
+ setuptools.command.develop.develop.run(self)
52
+
53
+
54
+ # The documentation for this feature is in server/README.md
55
+ package_data = ["TTS/server/templates/*"]
56
+
57
+
58
+ def pip_install(package_name):
59
+ subprocess.call([sys.executable, "-m", "pip", "install", package_name])
60
+
61
+
62
+ requirements = open(os.path.join(cwd, "requirements.txt"), "r").readlines()
63
+ with open(os.path.join(cwd, "requirements.notebooks.txt"), "r") as f:
64
+ requirements_notebooks = f.readlines()
65
+ with open(os.path.join(cwd, "requirements.dev.txt"), "r") as f:
66
+ requirements_dev = f.readlines()
67
+ with open(os.path.join(cwd, "requirements.ja.txt"), "r") as f:
68
+ requirements_ja = f.readlines()
69
+ requirements_all = requirements_dev + requirements_notebooks + requirements_ja
70
+
71
+ with open("README.md", "r", encoding="utf-8") as readme_file:
72
+ README = readme_file.read()
73
+
74
+ exts = [
75
+ Extension(
76
+ name="TTS.tts.utils.monotonic_align.core",
77
+ sources=["TTS/tts/utils/monotonic_align/core.pyx"],
78
+ )
79
+ ]
80
+ setup(
81
+ name="TTS",
82
+ version=version,
83
+ url="https://github.com/coqui-ai/TTS",
84
+ author="Eren Gölge",
85
+ author_email="egolge@coqui.ai",
86
+ description="Deep learning for Text to Speech by Coqui.",
87
+ long_description=README,
88
+ long_description_content_type="text/markdown",
89
+ license="MPL-2.0",
90
+ # cython
91
+ include_dirs=numpy.get_include(),
92
+ ext_modules=cythonize(exts, language_level=3),
93
+ # ext_modules=find_cython_extensions(),
94
+ # package
95
+ include_package_data=True,
96
+ packages=find_packages(include=["TTS"], exclude=["*.tests", "*tests.*", "tests.*", "*tests", "tests"]),
97
+ package_data={
98
+ "TTS": [
99
+ "VERSION",
100
+ ]
101
+ },
102
+ project_urls={
103
+ "Documentation": "https://github.com/coqui-ai/TTS/wiki",
104
+ "Tracker": "https://github.com/coqui-ai/TTS/issues",
105
+ "Repository": "https://github.com/coqui-ai/TTS",
106
+ "Discussions": "https://github.com/coqui-ai/TTS/discussions",
107
+ },
108
+ cmdclass={
109
+ "build_py": build_py,
110
+ "develop": develop,
111
+ # 'build_ext': build_ext
112
+ },
113
+ install_requires=requirements,
114
+ extras_require={
115
+ "all": requirements_all,
116
+ "dev": requirements_dev,
117
+ "notebooks": requirements_notebooks,
118
+ "ja": requirements_ja,
119
+ },
120
+ python_requires=">=3.9.0, <3.12",
121
+ entry_points={"console_scripts": ["tts=TTS.bin.synthesize:main", "tts-server = TTS.server.server:main"]},
122
+ classifiers=[
123
+ "Programming Language :: Python",
124
+ "Programming Language :: Python :: 3",
125
+ "Programming Language :: Python :: 3.9",
126
+ "Programming Language :: Python :: 3.10",
127
+ "Programming Language :: Python :: 3.11",
128
+ "Development Status :: 3 - Alpha",
129
+ "Intended Audience :: Science/Research",
130
+ "Intended Audience :: Developers",
131
+ "Operating System :: POSIX :: Linux",
132
+ "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)",
133
+ "Topic :: Software Development",
134
+ "Topic :: Software Development :: Libraries :: Python Modules",
135
+ "Topic :: Multimedia :: Sound/Audio :: Speech",
136
+ "Topic :: Multimedia :: Sound/Audio",
137
+ "Topic :: Multimedia",
138
+ "Topic :: Scientific/Engineering :: Artificial Intelligence",
139
+ ],
140
+ zip_safe=False,
141
+ )