f291528ef840e0812fc4459f0c0be17a01952a1e

Browse files

Files changed (9) hide show

README.md +11 -1
app.py +34 -0
gitattributes +35 -0
hubconf.py +46 -0
requirements.dev.txt +5 -0
requirements.ja.txt +5 -0
requirements.notebooks.txt +0 -0
requirements.txt +57 -0
setup.py +141 -0

README.md CHANGED Viewed

@@ -1,3 +1,13 @@
 ---
-license: apache-2.0
 ---

 ---
+title: Voice Clone
+emoji: 🏃
+colorFrom: blue
+colorTo: blue
+sdk: gradio
+sdk_version: 4.5.0
+app_file: app.py
+pinned: false
+license: mit
 ---
+Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,34 @@

+import subprocess
+# Run the setup.py install command
+try:
+    subprocess.run(['python', 'setup.py', 'install', '--user'], check=True)
+    print("Installation successful.")
+except subprocess.CalledProcessError as e:
+    print(f"Installation failed with error: {e}")
+import gradio as gr
+import torch
+from TTS.api import TTS
+# Get device
+device = "cuda" if torch.cuda.is_available() else "cpu"
+# Init TTS
+tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
+def voice_clone(text: str, speaker_wav: str, language: str):
+    # Run TTS
+    print("Speaker wav:", speaker_wav)
+    tts.tts_to_file(text=text, speaker_wav=speaker_wav, language=language, file_path="output.mp3")
+    return "output.mp3"
+iface = gr.Interface(fn=voice_clone,
+                     inputs=[gr.Textbox(lines=2, placeholder="Enter the text...", label="Text"),
+                             gr.Audio(type="filepath", label="Upload audio file"),
+                             gr.Radio(['ru', 'en', 'zh-cn', 'ja', 'de', 'fr', 'it', 'pt', 'pl', 'tr', 'ko', 'nl', 'cs', 'ar', 'es', 'hu'], label="language"),
+                            ],
+                     outputs=gr.Audio(type="filepath", label="Generated audio file"),
+                     title="Voice Cloning")
+iface.launch()

gitattributes ADDED Viewed

	@@ -0,0 +1,35 @@

+*.7z filter=lfs diff=lfs merge=lfs -text
+*.arrow filter=lfs diff=lfs merge=lfs -text
+*.bin filter=lfs diff=lfs merge=lfs -text
+*.bz2 filter=lfs diff=lfs merge=lfs -text
+*.ckpt filter=lfs diff=lfs merge=lfs -text
+*.ftz filter=lfs diff=lfs merge=lfs -text
+*.gz filter=lfs diff=lfs merge=lfs -text
+*.h5 filter=lfs diff=lfs merge=lfs -text
+*.joblib filter=lfs diff=lfs merge=lfs -text
+*.lfs.* filter=lfs diff=lfs merge=lfs -text
+*.mlmodel filter=lfs diff=lfs merge=lfs -text
+*.model filter=lfs diff=lfs merge=lfs -text
+*.msgpack filter=lfs diff=lfs merge=lfs -text
+*.npy filter=lfs diff=lfs merge=lfs -text
+*.npz filter=lfs diff=lfs merge=lfs -text
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.ot filter=lfs diff=lfs merge=lfs -text
+*.parquet filter=lfs diff=lfs merge=lfs -text
+*.pb filter=lfs diff=lfs merge=lfs -text
+*.pickle filter=lfs diff=lfs merge=lfs -text
+*.pkl filter=lfs diff=lfs merge=lfs -text
+*.pt filter=lfs diff=lfs merge=lfs -text
+*.pth filter=lfs diff=lfs merge=lfs -text
+*.rar filter=lfs diff=lfs merge=lfs -text
+*.safetensors filter=lfs diff=lfs merge=lfs -text
+saved_model/**/* filter=lfs diff=lfs merge=lfs -text
+*.tar.* filter=lfs diff=lfs merge=lfs -text
+*.tar filter=lfs diff=lfs merge=lfs -text
+*.tflite filter=lfs diff=lfs merge=lfs -text
+*.tgz filter=lfs diff=lfs merge=lfs -text
+*.wasm filter=lfs diff=lfs merge=lfs -text
+*.xz filter=lfs diff=lfs merge=lfs -text
+*.zip filter=lfs diff=lfs merge=lfs -text
+*.zst filter=lfs diff=lfs merge=lfs -text
+*tfevents* filter=lfs diff=lfs merge=lfs -text

hubconf.py ADDED Viewed

	@@ -0,0 +1,46 @@

+dependencies = [
+    'torch', 'gdown', 'pysbd', 'gruut', 'anyascii', 'pypinyin', 'coqpit', 'mecab-python3', 'unidic-lite'
+]
+import torch
+from TTS.utils.manage import ModelManager
+from TTS.utils.synthesizer import Synthesizer
+def tts(model_name='tts_models/en/ljspeech/tacotron2-DCA',
+        vocoder_name=None,
+        use_cuda=False):
+    """TTS entry point for PyTorch Hub that provides a Synthesizer object to synthesize speech from a give text.
+    Example:
+        >>> synthesizer = torch.hub.load('coqui-ai/TTS', 'tts', source='github')
+        >>> wavs = synthesizer.tts("This is a test! This is also a test!!")
+            wavs - is a list of values of the synthesized speech.
+    Args:
+        model_name (str, optional): One of the model names from .model.json. Defaults to 'tts_models/en/ljspeech/tacotron2-DCA'.
+        vocoder_name (str, optional): One of the model names from .model.json. Defaults to 'vocoder_models/en/ljspeech/multiband-melgan'.
+        pretrained (bool, optional): [description]. Defaults to True.
+    Returns:
+        TTS.utils.synthesizer.Synthesizer: Synthesizer object wrapping both vocoder and tts models.
+    """
+    manager = ModelManager()
+    model_path, config_path, model_item = manager.download_model(model_name)
+    vocoder_name = model_item[
+        'default_vocoder'] if vocoder_name is None else vocoder_name
+    vocoder_path, vocoder_config_path, _ = manager.download_model(vocoder_name)
+    # create synthesizer
+    synt = Synthesizer(tts_checkpoint=model_path,
+                       tts_config_path=config_path,
+                       vocoder_checkpoint=vocoder_path,
+                       vocoder_config=vocoder_config_path,
+                       use_cuda=use_cuda)
+    return synt
+if __name__ == '__main__':
+    synthesizer = torch.hub.load('coqui-ai/TTS:dev', 'tts', source='github')
+    synthesizer.tts("This is a test!")

requirements.dev.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+black
+coverage
+isort
+nose2
+pylint==2.10.2

requirements.ja.txt ADDED Viewed

	@@ -0,0 +1,5 @@

+# These cause some compatibility issues on some systems and are not strictly necessary
+# japanese g2p deps
+mecab-python3==1.0.6
+unidic-lite==1.0.8
+cutlet

requirements.notebooks.txt ADDED Viewed

File without changes

requirements.txt ADDED Viewed

	@@ -0,0 +1,57 @@

+# core deps
+numpy==1.22.0;python_version<="3.10"
+numpy>=1.24.3;python_version>"3.10"
+cython>=0.29.30
+scipy>=1.11.2
+torch>=2.1
+torchaudio
+soundfile>=0.12.0
+librosa>=0.10.0
+scikit-learn>=1.3.0
+numba==0.55.1;python_version<"3.9"
+numba>=0.57.0;python_version>="3.9"
+inflect>=5.6.0
+tqdm>=4.64.1
+anyascii>=0.3.0
+pyyaml>=6.0
+fsspec>=2023.6.0 # <= 2023.9.1 makes aux tests fail
+aiohttp>=3.8.1
+packaging>=23.1
+# deps for examples
+flask>=2.0.1
+# deps for inference
+pysbd>=0.3.4
+# deps for notebooks
+umap-learn>=0.5.1
+pandas>=1.4,<2.0
+# deps for training
+matplotlib>=3.7.0
+# coqui stack
+trainer>=0.0.32
+# config management
+coqpit>=0.0.16
+# chinese g2p deps
+jieba
+pypinyin
+# korean
+hangul_romanize
+# gruut+supported langs
+gruut[de,es,fr]==2.2.3
+# deps for korean
+jamo
+nltk
+g2pkk>=0.1.1
+# deps for bangla
+bangla
+bnnumerizer
+bnunicodenormalizer
+#deps for tortoise
+einops>=0.6.0
+transformers>=4.33.0
+#deps for bark
+encodec>=0.1.1
+# deps for XTTS
+unidecode>=1.3.2
+num2words
+spacy[ja]>=3
+gradio

setup.py ADDED Viewed

	@@ -0,0 +1,141 @@

+#!/usr/bin/env python
+#                   ,*++++++*,                ,*++++++*,
+#                *++.        .+++          *++.        .++*
+#              *+*     ,++++*   *+*      *+*   ,++++,     *+*
+#             ,+,   .++++++++++* ,++,,,,*+, ,++++++++++.   *+,
+#             *+.  .++++++++++++..++    *+.,++++++++++++.  .+*
+#             .+*   ++++++++++++.*+,    .+*.++++++++++++   *+,
+#              .++   *++++++++* ++,      .++.*++++++++*   ++,
+#               ,+++*.    . .*++,          ,++*.      .*+++*
+#              *+,   .,*++**.                  .**++**.   ,+*
+#             .+*                                          *+,
+#             *+.                   Coqui                  .+*
+#             *+*              +++   TTS  +++              *+*
+#             .+++*.            .          .             *+++.
+#              ,+* *+++*...                       ...*+++* *+,
+#               .++.    .""""+++++++****+++++++"""".     ++.
+#                 ,++.                                .++,
+#                   .++*                            *++.
+#                       *+++,                  ,+++*
+#                           .,*++++::::::++++*,.
+#                                  ``````
+import os
+import subprocess
+import sys
+from packaging.version import Version
+import numpy
+import setuptools.command.build_py
+import setuptools.command.develop
+from Cython.Build import cythonize
+from setuptools import Extension, find_packages, setup
+python_version = sys.version.split()[0]
+if Version(python_version) < Version("3.9") or Version(python_version) >= Version("3.12"):
+    raise RuntimeError("TTS requires python >= 3.9 and < 3.12 " "but your Python version is {}".format(sys.version))
+cwd = os.path.dirname(os.path.abspath(__file__))
+with open(os.path.join(cwd, "TTS", "VERSION")) as fin:
+    version = fin.read().strip()
+class build_py(setuptools.command.build_py.build_py):  # pylint: disable=too-many-ancestors
+    def run(self):
+        setuptools.command.build_py.build_py.run(self)
+class develop(setuptools.command.develop.develop):
+    def run(self):
+        setuptools.command.develop.develop.run(self)
+# The documentation for this feature is in server/README.md
+package_data = ["TTS/server/templates/*"]
+def pip_install(package_name):
+    subprocess.call([sys.executable, "-m", "pip", "install", package_name])
+requirements = open(os.path.join(cwd, "requirements.txt"), "r").readlines()
+with open(os.path.join(cwd, "requirements.notebooks.txt"), "r") as f:
+    requirements_notebooks = f.readlines()
+with open(os.path.join(cwd, "requirements.dev.txt"), "r") as f:
+    requirements_dev = f.readlines()
+with open(os.path.join(cwd, "requirements.ja.txt"), "r") as f:
+    requirements_ja = f.readlines()
+requirements_all = requirements_dev + requirements_notebooks + requirements_ja
+with open("README.md", "r", encoding="utf-8") as readme_file:
+    README = readme_file.read()
+exts = [
+    Extension(
+        name="TTS.tts.utils.monotonic_align.core",
+        sources=["TTS/tts/utils/monotonic_align/core.pyx"],
+    )
+]
+setup(
+    name="TTS",
+    version=version,
+    url="https://github.com/coqui-ai/TTS",
+    author="Eren Gölge",
+    author_email="egolge@coqui.ai",
+    description="Deep learning for Text to Speech by Coqui.",
+    long_description=README,
+    long_description_content_type="text/markdown",
+    license="MPL-2.0",
+    # cython
+    include_dirs=numpy.get_include(),
+    ext_modules=cythonize(exts, language_level=3),
+    # ext_modules=find_cython_extensions(),
+    # package
+    include_package_data=True,
+    packages=find_packages(include=["TTS"], exclude=["*.tests", "*tests.*", "tests.*", "*tests", "tests"]),
+    package_data={
+        "TTS": [
+            "VERSION",
+        ]
+    },
+    project_urls={
+        "Documentation": "https://github.com/coqui-ai/TTS/wiki",
+        "Tracker": "https://github.com/coqui-ai/TTS/issues",
+        "Repository": "https://github.com/coqui-ai/TTS",
+        "Discussions": "https://github.com/coqui-ai/TTS/discussions",
+    },
+    cmdclass={
+        "build_py": build_py,
+        "develop": develop,
+        # 'build_ext': build_ext
+    },
+    install_requires=requirements,
+    extras_require={
+        "all": requirements_all,
+        "dev": requirements_dev,
+        "notebooks": requirements_notebooks,
+        "ja": requirements_ja,
+    },
+    python_requires=">=3.9.0, <3.12",
+    entry_points={"console_scripts": ["tts=TTS.bin.synthesize:main", "tts-server = TTS.server.server:main"]},
+    classifiers=[
+        "Programming Language :: Python",
+        "Programming Language :: Python :: 3",
+        "Programming Language :: Python :: 3.9",
+        "Programming Language :: Python :: 3.10",
+        "Programming Language :: Python :: 3.11",
+        "Development Status :: 3 - Alpha",
+        "Intended Audience :: Science/Research",
+        "Intended Audience :: Developers",
+        "Operating System :: POSIX :: Linux",
+        "License :: OSI Approved :: Mozilla Public License 2.0 (MPL 2.0)",
+        "Topic :: Software Development",
+        "Topic :: Software Development :: Libraries :: Python Modules",
+        "Topic :: Multimedia :: Sound/Audio :: Speech",
+        "Topic :: Multimedia :: Sound/Audio",
+        "Topic :: Multimedia",
+        "Topic :: Scientific/Engineering :: Artificial Intelligence",
+    ],
+    zip_safe=False,
+)