Nông Văn Thắng commited on
Commit
33acd27
0 Parent(s):
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .archivist/config.json +22 -0
  2. .devcontainer/devcontainer.json +32 -0
  3. .dockerignore +8 -0
  4. .github/dependabot.yml +12 -0
  5. .gitignore +5 -0
  6. .gitmodules +4 -0
  7. .gradio/certificate.pem +31 -0
  8. 404.html +24 -0
  9. Dockerfile +87 -0
  10. LICENSE.txt +373 -0
  11. README.md +88 -0
  12. TTS/.cardboardlint.yml +5 -0
  13. TTS/.dockerignore +9 -0
  14. TTS/.github/ISSUE_TEMPLATE/bug_report.yaml +85 -0
  15. TTS/.github/ISSUE_TEMPLATE/config.yml +8 -0
  16. TTS/.github/ISSUE_TEMPLATE/feature_request.md +25 -0
  17. TTS/.github/PR_TEMPLATE.md +15 -0
  18. TTS/.github/stale.yml +18 -0
  19. TTS/.github/workflows/aux_tests.yml +51 -0
  20. TTS/.github/workflows/data_tests.yml +51 -0
  21. TTS/.github/workflows/docker.yaml +65 -0
  22. TTS/.github/workflows/inference_tests.yml +53 -0
  23. TTS/.github/workflows/pypi-release.yml +94 -0
  24. TTS/.github/workflows/style_check.yml +46 -0
  25. TTS/.github/workflows/text_tests.yml +50 -0
  26. TTS/.github/workflows/tts_tests.yml +53 -0
  27. TTS/.github/workflows/tts_tests2.yml +53 -0
  28. TTS/.github/workflows/vocoder_tests.yml +48 -0
  29. TTS/.github/workflows/xtts_tests.yml +53 -0
  30. TTS/.github/workflows/zoo_tests0.yml +54 -0
  31. TTS/.github/workflows/zoo_tests1.yml +53 -0
  32. TTS/.github/workflows/zoo_tests2.yml +52 -0
  33. TTS/.gitignore +172 -0
  34. TTS/.pre-commit-config.yaml +27 -0
  35. TTS/.pylintrc +599 -0
  36. TTS/.readthedocs.yml +23 -0
  37. TTS/CITATION.cff +20 -0
  38. TTS/CODE_OF_CONDUCT.md +133 -0
  39. TTS/CODE_OWNERS.rst +75 -0
  40. TTS/CONTRIBUTING.md +162 -0
  41. TTS/Dockerfile +19 -0
  42. TTS/LICENSE.txt +373 -0
  43. TTS/MANIFEST.in +15 -0
  44. TTS/Makefile +78 -0
  45. TTS/README.md +407 -0
  46. TTS/TTS/.models.json +938 -0
  47. TTS/TTS/VERSION +1 -0
  48. TTS/TTS/__init__.py +6 -0
  49. TTS/TTS/api.py +458 -0
  50. TTS/TTS/bin/__init__.py +0 -0
.archivist/config.json ADDED
@@ -0,0 +1,22 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "projectId": "8734bd91-50fa-42ef-832b-9e3e937c2e25",
3
+ "ignoredFiles": [
4
+ ".DS_Store",
5
+ ".env",
6
+ ".archivist/",
7
+ ".idea/",
8
+ ".vscode/",
9
+ "env/",
10
+ "venv/",
11
+ ".git/",
12
+ ".gitignore",
13
+ "__pycache__/",
14
+ "__init__.py",
15
+ "dist/",
16
+ "node_modules/",
17
+ "package-lock.json",
18
+ "yarn.lock",
19
+ "*.config.js",
20
+ ".next/"
21
+ ]
22
+ }
.devcontainer/devcontainer.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ // For format details, see https://aka.ms/devcontainer.json. For config options, see the
2
+ // README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-dockerfile
3
+ {
4
+ "name": "Existing Dockerfile",
5
+ "build": {
6
+ // Sets the run context to one level up instead of the .devcontainer folder.
7
+ "context": "..",
8
+ // Update the 'dockerFile' property if you aren't using the standard 'Dockerfile' filename.
9
+ "dockerfile": "../Dockerfile"
10
+ },
11
+ "features": {
12
+ "ghcr.io/devcontainers/features/python:1": {
13
+ "installTools": true,
14
+ "version": "3.10"
15
+ }
16
+ }
17
+
18
+ // Features to add to the dev container. More info: https://containers.dev/features.
19
+ // "features": {},
20
+
21
+ // Use 'forwardPorts' to make a list of ports inside the container available locally.
22
+ // "forwardPorts": [],
23
+
24
+ // Uncomment the next line to run commands after the container is created.
25
+ // "postCreateCommand": "cat /etc/os-release",
26
+
27
+ // Configure tool-specific properties.
28
+ // "customizations": {},
29
+
30
+ // Uncomment to connect as an existing user other than the container default. More info: https://aka.ms/dev-containers-non-root.
31
+ // "remoteUser": "devcontainer"
32
+ }
.dockerignore ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ TTS/build/
2
+ TTS/dist/
3
+ TTS/TTS.egg-info/
4
+ TTS/tests/outputs/*
5
+ TTS/tests/train_outputs/*
6
+ __pycache__/
7
+ TTS/__pycache__/
8
+ *.pyc
.github/dependabot.yml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # To get started with Dependabot version updates, you'll need to specify which
2
+ # package ecosystems to update and where the package manifests are located.
3
+ # Please see the documentation for more information:
4
+ # https://docs.github.com/github/administering-a-repository/configuration-options-for-dependency-updates
5
+ # https://containers.dev/guide/dependabot
6
+
7
+ version: 2
8
+ updates:
9
+ - package-ecosystem: "devcontainers"
10
+ directory: "/"
11
+ schedule:
12
+ interval: weekly
.gitignore ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ _site
2
+ .sass-cache
3
+ .jekyll-metadata
4
+ .env
5
+ .env/
.gitmodules ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ [submodule "TTS"]
2
+ path = TTS
3
+ url = https://github.com/thinhlpg/TTS.git
4
+ branch = add-vietnamese-xtts
.gradio/certificate.pem ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ -----BEGIN CERTIFICATE-----
2
+ MIIFazCCA1OgAwIBAgIRAIIQz7DSQONZRGPgu2OCiwAwDQYJKoZIhvcNAQELBQAw
3
+ TzELMAkGA1UEBhMCVVMxKTAnBgNVBAoTIEludGVybmV0IFNlY3VyaXR5IFJlc2Vh
4
+ cmNoIEdyb3VwMRUwEwYDVQQDEwxJU1JHIFJvb3QgWDEwHhcNMTUwNjA0MTEwNDM4
5
+ WhcNMzUwNjA0MTEwNDM4WjBPMQswCQYDVQQGEwJVUzEpMCcGA1UEChMgSW50ZXJu
6
+ ZXQgU2VjdXJpdHkgUmVzZWFyY2ggR3JvdXAxFTATBgNVBAMTDElTUkcgUm9vdCBY
7
+ MTCCAiIwDQYJKoZIhvcNAQEBBQADggIPADCCAgoCggIBAK3oJHP0FDfzm54rVygc
8
+ h77ct984kIxuPOZXoHj3dcKi/vVqbvYATyjb3miGbESTtrFj/RQSa78f0uoxmyF+
9
+ 0TM8ukj13Xnfs7j/EvEhmkvBioZxaUpmZmyPfjxwv60pIgbz5MDmgK7iS4+3mX6U
10
+ A5/TR5d8mUgjU+g4rk8Kb4Mu0UlXjIB0ttov0DiNewNwIRt18jA8+o+u3dpjq+sW
11
+ T8KOEUt+zwvo/7V3LvSye0rgTBIlDHCNAymg4VMk7BPZ7hm/ELNKjD+Jo2FR3qyH
12
+ B5T0Y3HsLuJvW5iB4YlcNHlsdu87kGJ55tukmi8mxdAQ4Q7e2RCOFvu396j3x+UC
13
+ B5iPNgiV5+I3lg02dZ77DnKxHZu8A/lJBdiB3QW0KtZB6awBdpUKD9jf1b0SHzUv
14
+ KBds0pjBqAlkd25HN7rOrFleaJ1/ctaJxQZBKT5ZPt0m9STJEadao0xAH0ahmbWn
15
+ OlFuhjuefXKnEgV4We0+UXgVCwOPjdAvBbI+e0ocS3MFEvzG6uBQE3xDk3SzynTn
16
+ jh8BCNAw1FtxNrQHusEwMFxIt4I7mKZ9YIqioymCzLq9gwQbooMDQaHWBfEbwrbw
17
+ qHyGO0aoSCqI3Haadr8faqU9GY/rOPNk3sgrDQoo//fb4hVC1CLQJ13hef4Y53CI
18
+ rU7m2Ys6xt0nUW7/vGT1M0NPAgMBAAGjQjBAMA4GA1UdDwEB/wQEAwIBBjAPBgNV
19
+ HRMBAf8EBTADAQH/MB0GA1UdDgQWBBR5tFnme7bl5AFzgAiIyBpY9umbbjANBgkq
20
+ hkiG9w0BAQsFAAOCAgEAVR9YqbyyqFDQDLHYGmkgJykIrGF1XIpu+ILlaS/V9lZL
21
+ ubhzEFnTIZd+50xx+7LSYK05qAvqFyFWhfFQDlnrzuBZ6brJFe+GnY+EgPbk6ZGQ
22
+ 3BebYhtF8GaV0nxvwuo77x/Py9auJ/GpsMiu/X1+mvoiBOv/2X/qkSsisRcOj/KK
23
+ NFtY2PwByVS5uCbMiogziUwthDyC3+6WVwW6LLv3xLfHTjuCvjHIInNzktHCgKQ5
24
+ ORAzI4JMPJ+GslWYHb4phowim57iaztXOoJwTdwJx4nLCgdNbOhdjsnvzqvHu7Ur
25
+ TkXWStAmzOVyyghqpZXjFaH3pO3JLF+l+/+sKAIuvtd7u+Nxe5AW0wdeRlN8NwdC
26
+ jNPElpzVmbUq4JUagEiuTDkHzsxHpFKVK7q4+63SM1N95R1NbdWhscdCb+ZAJzVc
27
+ oyi3B43njTOQ5yOf+1CceWxG1bQVs5ZufpsMljq4Ui0/1lvh+wjChP4kqKOJ2qxq
28
+ 4RgqsahDYVvTH9w7jXbyLeiNdd8XM2w9U/t7y0Ff/9yi0GE44Za4rF2LN9d11TPA
29
+ mRGunUHBcnWEvgJBQl9nJEiU0Zsnvgc/ubhPgXRR4Xq37Z0j4r7g1SgEEzwxA57d
30
+ emyPxgcYxn/eR44/KJ4EBs+lVDR3veyJm+kXQ99b21/+jh5Xos1AnX5iItreGCc=
31
+ -----END CERTIFICATE-----
404.html ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ layout: default
3
+ ---
4
+
5
+ <style type="text/css" media="screen">
6
+ .container {
7
+ margin: 10px auto;
8
+ max-width: 600px;
9
+ text-align: center;
10
+ }
11
+ h1 {
12
+ margin: 30px 0;
13
+ font-size: 4em;
14
+ line-height: 1;
15
+ letter-spacing: -1px;
16
+ }
17
+ </style>
18
+
19
+ <div class="container">
20
+ <h1>404</h1>
21
+
22
+ <p><strong>Page not found :(</strong></p>
23
+ <p>The requested page could not be found.</p>
24
+ </div>
Dockerfile ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Use an official Ubuntu base image
2
+ FROM python:3.10
3
+ # Set environment variables to avoid interactive prompts
4
+ ENV DEBIAN_FRONTEND=noninteractive
5
+ # Update package list and install necessary packages including Python 3.10
6
+
7
+
8
+ # Create a working directory
9
+ WORKDIR /app
10
+
11
+ EXPOSE 5050
12
+ # Copy all files from the current directory to the /app directory in the container
13
+ COPY . .
14
+
15
+
16
+ # docker run -it -p 5052:5052 akthangdz/tts1:latest /bin/bash -c "source .env/bin/activate && pip install --upgrade gradio && python vixtts_demo.py" --gpus all -t nvidia/cuda
17
+
18
+ RUN apt-get update
19
+
20
+ RUN apt-get -y install git \
21
+ curl \
22
+ autoconf \
23
+ bison \
24
+ build-essential \
25
+ libssl-dev \
26
+ libyaml-dev \
27
+ libreadline6-dev \
28
+ zlib1g-dev \
29
+ libncurses5-dev \
30
+ libffi-dev \
31
+ libgdbm6 \
32
+ libgdbm-dev \
33
+ libdb-dev \
34
+ apt-utils
35
+
36
+ # "#################################################"
37
+ # "GitHub Pages/Jekyll is based on Ruby. Set the version and path"
38
+ # "As of this writing, use Ruby 3.1.2
39
+ # "Based on: https://talk.jekyllrb.com/t/liquid-4-0-3-tainted/7946/12"
40
+ ENV RBENV_ROOT /usr/local/src/rbenv
41
+ ENV RUBY_VERSION 3.1.2
42
+ ENV PATH ${RBENV_ROOT}/bin:${RBENV_ROOT}/shims:$PATH
43
+
44
+ # "#################################################"
45
+ # "Install rbenv to manage Ruby versions"
46
+ RUN git clone https://github.com/rbenv/rbenv.git ${RBENV_ROOT} \
47
+ && git clone https://github.com/rbenv/ruby-build.git \
48
+ ${RBENV_ROOT}/plugins/ruby-build \
49
+ && ${RBENV_ROOT}/plugins/ruby-build/install.sh \
50
+ && echo 'eval "$(rbenv init -)"' >> /etc/profile.d/rbenv.sh
51
+
52
+ # "#################################################"
53
+ # "Install ruby and set the global version"
54
+ RUN rbenv install ${RUBY_VERSION} \
55
+ && rbenv global ${RUBY_VERSION}
56
+
57
+ # "#################################################"
58
+ # "Install the version of Jekyll that GitHub Pages supports"
59
+ # "Based on: https://pages.github.com/versions/"
60
+ # "Note: If you always want the latest 3.9.x version,"
61
+ # " use this line instead:"
62
+ # " RUN gem install jekyll -v '~>3.9'"
63
+ RUN gem install jekyll -v '3.9.3'
64
+
65
+ RUN apt-get update && \
66
+ apt-get install -y sudo && \
67
+ apt-get clean && \
68
+ rm -rf /var/lib/apt/lists/*
69
+
70
+ RUN python -m venv .env && \
71
+ . .env/bin/activate && \
72
+ git submodule update --init --recursive && \
73
+ cd TTS && \
74
+ git fetch --tags && \
75
+ git checkout 0.1.1 && \
76
+ echo "Installing TTS..." && \
77
+ pip install --use-deprecated=legacy-resolver -e . -q && \
78
+ cd .. && \
79
+ echo "Installing other requirements..." && \
80
+ pip install -r requirements.txt -q && \
81
+ echo "Downloading Japanese/Chinese tokenizer..." && \
82
+ python -m unidic download && \
83
+ pip install --upgrade gradio && \
84
+ touch .env/ok
85
+
86
+ # Set the default command to run when starting the container
87
+ CMD ["/bin/bash"]
LICENSE.txt ADDED
@@ -0,0 +1,373 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Mozilla Public License Version 2.0
2
+ ==================================
3
+
4
+ 1. Definitions
5
+ --------------
6
+
7
+ 1.1. "Contributor"
8
+ means each individual or legal entity that creates, contributes to
9
+ the creation of, or owns Covered Software.
10
+
11
+ 1.2. "Contributor Version"
12
+ means the combination of the Contributions of others (if any) used
13
+ by a Contributor and that particular Contributor's Contribution.
14
+
15
+ 1.3. "Contribution"
16
+ means Covered Software of a particular Contributor.
17
+
18
+ 1.4. "Covered Software"
19
+ means Source Code Form to which the initial Contributor has attached
20
+ the notice in Exhibit A, the Executable Form of such Source Code
21
+ Form, and Modifications of such Source Code Form, in each case
22
+ including portions thereof.
23
+
24
+ 1.5. "Incompatible With Secondary Licenses"
25
+ means
26
+
27
+ (a) that the initial Contributor has attached the notice described
28
+ in Exhibit B to the Covered Software; or
29
+
30
+ (b) that the Covered Software was made available under the terms of
31
+ version 1.1 or earlier of the License, but not also under the
32
+ terms of a Secondary License.
33
+
34
+ 1.6. "Executable Form"
35
+ means any form of the work other than Source Code Form.
36
+
37
+ 1.7. "Larger Work"
38
+ means a work that combines Covered Software with other material, in
39
+ a separate file or files, that is not Covered Software.
40
+
41
+ 1.8. "License"
42
+ means this document.
43
+
44
+ 1.9. "Licensable"
45
+ means having the right to grant, to the maximum extent possible,
46
+ whether at the time of the initial grant or subsequently, any and
47
+ all of the rights conveyed by this License.
48
+
49
+ 1.10. "Modifications"
50
+ means any of the following:
51
+
52
+ (a) any file in Source Code Form that results from an addition to,
53
+ deletion from, or modification of the contents of Covered
54
+ Software; or
55
+
56
+ (b) any new file in Source Code Form that contains any Covered
57
+ Software.
58
+
59
+ 1.11. "Patent Claims" of a Contributor
60
+ means any patent claim(s), including without limitation, method,
61
+ process, and apparatus claims, in any patent Licensable by such
62
+ Contributor that would be infringed, but for the grant of the
63
+ License, by the making, using, selling, offering for sale, having
64
+ made, import, or transfer of either its Contributions or its
65
+ Contributor Version.
66
+
67
+ 1.12. "Secondary License"
68
+ means either the GNU General Public License, Version 2.0, the GNU
69
+ Lesser General Public License, Version 2.1, the GNU Affero General
70
+ Public License, Version 3.0, or any later versions of those
71
+ licenses.
72
+
73
+ 1.13. "Source Code Form"
74
+ means the form of the work preferred for making modifications.
75
+
76
+ 1.14. "You" (or "Your")
77
+ means an individual or a legal entity exercising rights under this
78
+ License. For legal entities, "You" includes any entity that
79
+ controls, is controlled by, or is under common control with You. For
80
+ purposes of this definition, "control" means (a) the power, direct
81
+ or indirect, to cause the direction or management of such entity,
82
+ whether by contract or otherwise, or (b) ownership of more than
83
+ fifty percent (50%) of the outstanding shares or beneficial
84
+ ownership of such entity.
85
+
86
+ 2. License Grants and Conditions
87
+ --------------------------------
88
+
89
+ 2.1. Grants
90
+
91
+ Each Contributor hereby grants You a world-wide, royalty-free,
92
+ non-exclusive license:
93
+
94
+ (a) under intellectual property rights (other than patent or trademark)
95
+ Licensable by such Contributor to use, reproduce, make available,
96
+ modify, display, perform, distribute, and otherwise exploit its
97
+ Contributions, either on an unmodified basis, with Modifications, or
98
+ as part of a Larger Work; and
99
+
100
+ (b) under Patent Claims of such Contributor to make, use, sell, offer
101
+ for sale, have made, import, and otherwise transfer either its
102
+ Contributions or its Contributor Version.
103
+
104
+ 2.2. Effective Date
105
+
106
+ The licenses granted in Section 2.1 with respect to any Contribution
107
+ become effective for each Contribution on the date the Contributor first
108
+ distributes such Contribution.
109
+
110
+ 2.3. Limitations on Grant Scope
111
+
112
+ The licenses granted in this Section 2 are the only rights granted under
113
+ this License. No additional rights or licenses will be implied from the
114
+ distribution or licensing of Covered Software under this License.
115
+ Notwithstanding Section 2.1(b) above, no patent license is granted by a
116
+ Contributor:
117
+
118
+ (a) for any code that a Contributor has removed from Covered Software;
119
+ or
120
+
121
+ (b) for infringements caused by: (i) Your and any other third party's
122
+ modifications of Covered Software, or (ii) the combination of its
123
+ Contributions with other software (except as part of its Contributor
124
+ Version); or
125
+
126
+ (c) under Patent Claims infringed by Covered Software in the absence of
127
+ its Contributions.
128
+
129
+ This License does not grant any rights in the trademarks, service marks,
130
+ or logos of any Contributor (except as may be necessary to comply with
131
+ the notice requirements in Section 3.4).
132
+
133
+ 2.4. Subsequent Licenses
134
+
135
+ No Contributor makes additional grants as a result of Your choice to
136
+ distribute the Covered Software under a subsequent version of this
137
+ License (see Section 10.2) or under the terms of a Secondary License (if
138
+ permitted under the terms of Section 3.3).
139
+
140
+ 2.5. Representation
141
+
142
+ Each Contributor represents that the Contributor believes its
143
+ Contributions are its original creation(s) or it has sufficient rights
144
+ to grant the rights to its Contributions conveyed by this License.
145
+
146
+ 2.6. Fair Use
147
+
148
+ This License is not intended to limit any rights You have under
149
+ applicable copyright doctrines of fair use, fair dealing, or other
150
+ equivalents.
151
+
152
+ 2.7. Conditions
153
+
154
+ Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
155
+ in Section 2.1.
156
+
157
+ 3. Responsibilities
158
+ -------------------
159
+
160
+ 3.1. Distribution of Source Form
161
+
162
+ All distribution of Covered Software in Source Code Form, including any
163
+ Modifications that You create or to which You contribute, must be under
164
+ the terms of this License. You must inform recipients that the Source
165
+ Code Form of the Covered Software is governed by the terms of this
166
+ License, and how they can obtain a copy of this License. You may not
167
+ attempt to alter or restrict the recipients' rights in the Source Code
168
+ Form.
169
+
170
+ 3.2. Distribution of Executable Form
171
+
172
+ If You distribute Covered Software in Executable Form then:
173
+
174
+ (a) such Covered Software must also be made available in Source Code
175
+ Form, as described in Section 3.1, and You must inform recipients of
176
+ the Executable Form how they can obtain a copy of such Source Code
177
+ Form by reasonable means in a timely manner, at a charge no more
178
+ than the cost of distribution to the recipient; and
179
+
180
+ (b) You may distribute such Executable Form under the terms of this
181
+ License, or sublicense it under different terms, provided that the
182
+ license for the Executable Form does not attempt to limit or alter
183
+ the recipients' rights in the Source Code Form under this License.
184
+
185
+ 3.3. Distribution of a Larger Work
186
+
187
+ You may create and distribute a Larger Work under terms of Your choice,
188
+ provided that You also comply with the requirements of this License for
189
+ the Covered Software. If the Larger Work is a combination of Covered
190
+ Software with a work governed by one or more Secondary Licenses, and the
191
+ Covered Software is not Incompatible With Secondary Licenses, this
192
+ License permits You to additionally distribute such Covered Software
193
+ under the terms of such Secondary License(s), so that the recipient of
194
+ the Larger Work may, at their option, further distribute the Covered
195
+ Software under the terms of either this License or such Secondary
196
+ License(s).
197
+
198
+ 3.4. Notices
199
+
200
+ You may not remove or alter the substance of any license notices
201
+ (including copyright notices, patent notices, disclaimers of warranty,
202
+ or limitations of liability) contained within the Source Code Form of
203
+ the Covered Software, except that You may alter any license notices to
204
+ the extent required to remedy known factual inaccuracies.
205
+
206
+ 3.5. Application of Additional Terms
207
+
208
+ You may choose to offer, and to charge a fee for, warranty, support,
209
+ indemnity or liability obligations to one or more recipients of Covered
210
+ Software. However, You may do so only on Your own behalf, and not on
211
+ behalf of any Contributor. You must make it absolutely clear that any
212
+ such warranty, support, indemnity, or liability obligation is offered by
213
+ You alone, and You hereby agree to indemnify every Contributor for any
214
+ liability incurred by such Contributor as a result of warranty, support,
215
+ indemnity or liability terms You offer. You may include additional
216
+ disclaimers of warranty and limitations of liability specific to any
217
+ jurisdiction.
218
+
219
+ 4. Inability to Comply Due to Statute or Regulation
220
+ ---------------------------------------------------
221
+
222
+ If it is impossible for You to comply with any of the terms of this
223
+ License with respect to some or all of the Covered Software due to
224
+ statute, judicial order, or regulation then You must: (a) comply with
225
+ the terms of this License to the maximum extent possible; and (b)
226
+ describe the limitations and the code they affect. Such description must
227
+ be placed in a text file included with all distributions of the Covered
228
+ Software under this License. Except to the extent prohibited by statute
229
+ or regulation, such description must be sufficiently detailed for a
230
+ recipient of ordinary skill to be able to understand it.
231
+
232
+ 5. Termination
233
+ --------------
234
+
235
+ 5.1. The rights granted under this License will terminate automatically
236
+ if You fail to comply with any of its terms. However, if You become
237
+ compliant, then the rights granted under this License from a particular
238
+ Contributor are reinstated (a) provisionally, unless and until such
239
+ Contributor explicitly and finally terminates Your grants, and (b) on an
240
+ ongoing basis, if such Contributor fails to notify You of the
241
+ non-compliance by some reasonable means prior to 60 days after You have
242
+ come back into compliance. Moreover, Your grants from a particular
243
+ Contributor are reinstated on an ongoing basis if such Contributor
244
+ notifies You of the non-compliance by some reasonable means, this is the
245
+ first time You have received notice of non-compliance with this License
246
+ from such Contributor, and You become compliant prior to 30 days after
247
+ Your receipt of the notice.
248
+
249
+ 5.2. If You initiate litigation against any entity by asserting a patent
250
+ infringement claim (excluding declaratory judgment actions,
251
+ counter-claims, and cross-claims) alleging that a Contributor Version
252
+ directly or indirectly infringes any patent, then the rights granted to
253
+ You by any and all Contributors for the Covered Software under Section
254
+ 2.1 of this License shall terminate.
255
+
256
+ 5.3. In the event of termination under Sections 5.1 or 5.2 above, all
257
+ end user license agreements (excluding distributors and resellers) which
258
+ have been validly granted by You or Your distributors under this License
259
+ prior to termination shall survive termination.
260
+
261
+ ************************************************************************
262
+ * *
263
+ * 6. Disclaimer of Warranty *
264
+ * ------------------------- *
265
+ * *
266
+ * Covered Software is provided under this License on an "as is" *
267
+ * basis, without warranty of any kind, either expressed, implied, or *
268
+ * statutory, including, without limitation, warranties that the *
269
+ * Covered Software is free of defects, merchantable, fit for a *
270
+ * particular purpose or non-infringing. The entire risk as to the *
271
+ * quality and performance of the Covered Software is with You. *
272
+ * Should any Covered Software prove defective in any respect, You *
273
+ * (not any Contributor) assume the cost of any necessary servicing, *
274
+ * repair, or correction. This disclaimer of warranty constitutes an *
275
+ * essential part of this License. No use of any Covered Software is *
276
+ * authorized under this License except under this disclaimer. *
277
+ * *
278
+ ************************************************************************
279
+
280
+ ************************************************************************
281
+ * *
282
+ * 7. Limitation of Liability *
283
+ * -------------------------- *
284
+ * *
285
+ * Under no circumstances and under no legal theory, whether tort *
286
+ * (including negligence), contract, or otherwise, shall any *
287
+ * Contributor, or anyone who distributes Covered Software as *
288
+ * permitted above, be liable to You for any direct, indirect, *
289
+ * special, incidental, or consequential damages of any character *
290
+ * including, without limitation, damages for lost profits, loss of *
291
+ * goodwill, work stoppage, computer failure or malfunction, or any *
292
+ * and all other commercial damages or losses, even if such party *
293
+ * shall have been informed of the possibility of such damages. This *
294
+ * limitation of liability shall not apply to liability for death or *
295
+ * personal injury resulting from such party's negligence to the *
296
+ * extent applicable law prohibits such limitation. Some *
297
+ * jurisdictions do not allow the exclusion or limitation of *
298
+ * incidental or consequential damages, so this exclusion and *
299
+ * limitation may not apply to You. *
300
+ * *
301
+ ************************************************************************
302
+
303
+ 8. Litigation
304
+ -------------
305
+
306
+ Any litigation relating to this License may be brought only in the
307
+ courts of a jurisdiction where the defendant maintains its principal
308
+ place of business and such litigation shall be governed by laws of that
309
+ jurisdiction, without reference to its conflict-of-law provisions.
310
+ Nothing in this Section shall prevent a party's ability to bring
311
+ cross-claims or counter-claims.
312
+
313
+ 9. Miscellaneous
314
+ ----------------
315
+
316
+ This License represents the complete agreement concerning the subject
317
+ matter hereof. If any provision of this License is held to be
318
+ unenforceable, such provision shall be reformed only to the extent
319
+ necessary to make it enforceable. Any law or regulation which provides
320
+ that the language of a contract shall be construed against the drafter
321
+ shall not be used to construe this License against a Contributor.
322
+
323
+ 10. Versions of the License
324
+ ---------------------------
325
+
326
+ 10.1. New Versions
327
+
328
+ Mozilla Foundation is the license steward. Except as provided in Section
329
+ 10.3, no one other than the license steward has the right to modify or
330
+ publish new versions of this License. Each version will be given a
331
+ distinguishing version number.
332
+
333
+ 10.2. Effect of New Versions
334
+
335
+ You may distribute the Covered Software under the terms of the version
336
+ of the License under which You originally received the Covered Software,
337
+ or under the terms of any subsequent version published by the license
338
+ steward.
339
+
340
+ 10.3. Modified Versions
341
+
342
+ If you create software not governed by this License, and you want to
343
+ create a new license for such software, you may create and use a
344
+ modified version of this License if you rename the license and remove
345
+ any references to the name of the license steward (except to note that
346
+ such modified license differs from this License).
347
+
348
+ 10.4. Distributing Source Code Form that is Incompatible With Secondary
349
+ Licenses
350
+
351
+ If You choose to distribute Source Code Form that is Incompatible With
352
+ Secondary Licenses under the terms of this version of the License, the
353
+ notice described in Exhibit B of this License must be attached.
354
+
355
+ Exhibit A - Source Code Form License Notice
356
+ -------------------------------------------
357
+
358
+ This Source Code Form is subject to the terms of the Mozilla Public
359
+ License, v. 2.0. If a copy of the MPL was not distributed with this
360
+ file, You can obtain one at http://mozilla.org/MPL/2.0/.
361
+
362
+ If it is not possible or desirable to put the notice in a particular
363
+ file, then You may include the notice in a location (such as a LICENSE
364
+ file in a relevant directory) where a recipient would be likely to look
365
+ for such a notice.
366
+
367
+ You may add additional accurate notices of copyright ownership.
368
+
369
+ Exhibit B - "Incompatible With Secondary Licenses" Notice
370
+ ---------------------------------------------------------
371
+
372
+ This Source Code Form is "Incompatible With Secondary Licenses", as
373
+ defined by the Mozilla Public License, v. 2.0.
README.md ADDED
@@ -0,0 +1,88 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # viXTTS Demo 🗣️🔥
2
+
3
+ ## Sử dụng nhanh ✨
4
+
5
+ 👉 Truy cập <https://huggingface.co/spaces/thinhlpg/vixtts-demo> để dùng ngay mà không cần cài đặt.
6
+
7
+ ## Introduction 👋
8
+
9
+ viXTTS is a text-to-speech voice generation tool that offers voice cloning voices in Vietnamese and other languages. This model is a fine-tuned version based on the [XTTS-v2.0.3](https://huggingface.co/coqui/XTTS-v2) model, utilizing the [viVoice](https://huggingface.co/datasets/capleaf/viVoice) dataset. This repository is primarily intended for demostration purposes.
10
+
11
+ The model can be accessed at: [viXTTS on Hugging Face](https://huggingface.co/capleaf/viXTTS)
12
+
13
+ ## Online usage (Recommended)
14
+
15
+ - You can try the model here: <https://huggingface.co/spaces/thinhlpg/vixtts-demo>
16
+ - For a quick demonstration, please refer to [this notebook](./viXTTS_Demo.ipynb) on Google Colab.
17
+ Tutorial (Vietnamese): <https://youtu.be/pbwEbpOy0m8?feature=shared>
18
+ ![viXTTS Colab Demo](assets/vixtts_colab.png)
19
+
20
+ ## Local Usage
21
+
22
+ This code is specifically designed for running on Ubuntu or WSL2. It is not intended for use on macOS or Windows systems.
23
+ ![viXTTS Gradio Demo](assets/vixtts_gradio_ui.png)
24
+
25
+ ### Hardware Recommendations
26
+
27
+ - At least 10GB of free disk space
28
+ - At least 16GB of RAM
29
+ - **Nvidia GPU** with a minimum of 4GB of VRAM
30
+ - By default, the model will utilize the GPU. In the absence of a GPU, it will run on the CPU and run much slower.
31
+
32
+ ### Required Software
33
+
34
+ - Git
35
+ - Python version >=3.9 and <= 3.11. The default version is set to 3.11, but you can modify the Python version in the `run.sh` file.
36
+
37
+ ### Usage
38
+
39
+ ```bash
40
+ git clone https://github.com/thinhlpg/vixtts-demo
41
+ cd vixtts-demo
42
+ ./run.sh
43
+ ```
44
+
45
+ 1. Run `run.sh` (dependencies will be automatically installed for the first run).
46
+ 2. Access the Gradio demo link.
47
+ 3. Load the model and wait for it to load.
48
+ 4. Inference and Enjoy 🤗
49
+ 5. The result will be saved in `output/`
50
+
51
+ ## Limitation
52
+
53
+ - Subpar performance for input sentences under 10 words in Vietnamese language (yielding inconsistent output and odd trailing sounds).
54
+ - This model is only fine-tuned in Vietnamese. The model's effectiveness with languages other than Vietnamese hasn't been tested, potentially reducing quality.
55
+
56
+ ## Contributions
57
+
58
+ This project is not being actively maintained, and I do not plan to release the finetuning code due to sensitive reasons, as it might be used for unethical purposes. If you want to contribute by creating versions for other operating systems, such as Windows or macOS, please fork the repository, create a new branch, test thoroughly on the respective OS, and submit a pull request specifying your contributions.
59
+
60
+ ## Acknowledgements
61
+
62
+ We would like to express our gratitude to all libraries, and resources that have played a role in the development of this demo, especially:
63
+
64
+ - [Coqui TTS](https://github.com/coqui-ai/TTS) for XTTS foundation model and inference code
65
+ - [Vinorm](https://github.com/v-nhandt21/Vinorm) and [Undethesea](https://github.com/undertheseanlp/underthesea) for Vietnamese text normalization
66
+ - [Deepspeed](https://github.com/microsoft/DeepSpeed) for fast inference
67
+ - [Huggingface Hub](https://huggingface.co/) for hosting the model
68
+ - [Gradio](https://www.gradio.app/) for web UI
69
+ - [DeepFilterNet](https://github.com/Rikorose/DeepFilterNet) for noise removal
70
+
71
+ ## Citation
72
+
73
+ ```bibtex
74
+ @misc{viVoice,
75
+   author = {Thinh Le Phuoc Gia, Tuan Pham Minh, Hung Nguyen Quoc, Trung Nguyen Quoc, Vinh Truong Hoang},
76
+   title = {viVoice: Enabling Vietnamese Multi-Speaker Speech Synthesis},
77
+   url = {https://github.com/thinhlpg/viVoice},
78
+   year = {2024}
79
+ }
80
+ ```
81
+
82
+ A manuscript and a friendly dev log documenting the process might be made available later (including other works that were experimented with, but details about the filtering process are not specified in this README file).
83
+
84
+ ## Contact 💬
85
+
86
+ - Facebook: <https://fb.com/thinhlpg/> (preferred; feel free to add friend and message me casually)
87
+ - GitHub: <https://github.com/thinhlpg>
88
+ - Email: <thinhlpg@gmail.com> (please don't; I prefer friendly, casual talk 💀)
TTS/.cardboardlint.yml ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ linters:
2
+ - pylint:
3
+ # pylintrc: pylintrc
4
+ filefilter: ['- test_*.py', '+ *.py', '- *.npy']
5
+ # exclude:
TTS/.dockerignore ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ .git/
2
+ Dockerfile
3
+ build/
4
+ dist/
5
+ TTS.egg-info/
6
+ tests/outputs/*
7
+ tests/train_outputs/*
8
+ __pycache__/
9
+ *.pyc
TTS/.github/ISSUE_TEMPLATE/bug_report.yaml ADDED
@@ -0,0 +1,85 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "🐛 Bug report"
2
+ description: Create a bug report to help 🐸 improve
3
+ title: '[Bug] '
4
+ labels: [ "bug" ]
5
+ body:
6
+ - type: markdown
7
+ attributes:
8
+ value: |
9
+ Welcome to the 🐸TTS! Thanks for taking the time to fill out this bug report!
10
+
11
+ - type: textarea
12
+ id: bug-description
13
+ attributes:
14
+ label: Describe the bug
15
+ description: A clear and concise description of what the bug is. If you intend to submit a PR for this issue, tell us in the description. Thanks!
16
+ placeholder: Bug description
17
+ validations:
18
+ required: true
19
+
20
+ - type: textarea
21
+ id: reproduction
22
+ attributes:
23
+ label: To Reproduce
24
+ description: |
25
+ Please share your code to reproduce the error.
26
+
27
+ Issues are fixed faster if you can provide a working example.
28
+
29
+ The best place for sharing code is colab. https://colab.research.google.com/
30
+ So we can directly run your code and reproduce the issue.
31
+
32
+ In the worse case, provide steps to reproduce the behavior.
33
+
34
+ 1. Run the following command '...'
35
+ 2. ...
36
+ 3. See error
37
+ placeholder: Reproduction
38
+ validations:
39
+ required: true
40
+
41
+ - type: textarea
42
+ id: expected-behavior
43
+ attributes:
44
+ label: Expected behavior
45
+ description: "Write down what the expected behaviour"
46
+
47
+ - type: textarea
48
+ id: logs
49
+ attributes:
50
+ label: Logs
51
+ description: "Please include the relevant logs if you can."
52
+ render: shell
53
+
54
+ - type: textarea
55
+ id: system-info
56
+ attributes:
57
+ label: Environment
58
+ description: |
59
+ You can either run `TTS/bin/collect_env_info.py`
60
+
61
+ ```bash
62
+ wget https://raw.githubusercontent.com/coqui-ai/TTS/main/TTS/bin/collect_env_info.py
63
+ python collect_env_info.py
64
+ ```
65
+
66
+ or fill in the fields below manually.
67
+ render: shell
68
+ placeholder: |
69
+ - 🐸TTS Version (e.g., 1.3.0):
70
+ - PyTorch Version (e.g., 1.8)
71
+ - Python version:
72
+ - OS (e.g., Linux):
73
+ - CUDA/cuDNN version:
74
+ - GPU models and configuration:
75
+ - How you installed PyTorch (`conda`, `pip`, source):
76
+ - Any other relevant information:
77
+ validations:
78
+ required: true
79
+ - type: textarea
80
+ id: context
81
+ attributes:
82
+ label: Additional context
83
+ description: Add any other context about the problem here.
84
+ validations:
85
+ required: false
TTS/.github/ISSUE_TEMPLATE/config.yml ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ blank_issues_enabled: false
2
+ contact_links:
3
+ - name: CoquiTTS GitHub Discussions
4
+ url: https://github.com/coqui-ai/TTS/discussions
5
+ about: Please ask and answer questions here.
6
+ - name: Coqui Security issue disclosure
7
+ url: mailto:info@coqui.ai
8
+ about: Please report security vulnerabilities here.
TTS/.github/ISSUE_TEMPLATE/feature_request.md ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ name: 🚀 Feature request
3
+ about: Suggest a feature or an idea for this project
4
+ title: '[Feature request] '
5
+ labels: feature request
6
+ assignees: ''
7
+
8
+ ---
9
+ <!-- Welcome to the 🐸TTS project!
10
+ We are excited to see your interest, and appreciate your support! --->
11
+ **🚀 Feature Description**
12
+
13
+ <!--A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] -->
14
+
15
+ **Solution**
16
+
17
+ <!-- A clear and concise description of what you want to happen. -->
18
+
19
+ **Alternative Solutions**
20
+
21
+ <!-- A clear and concise description of any alternative solutions or features you've considered. -->
22
+
23
+ **Additional context**
24
+
25
+ <!-- Add any other context or screenshots about the feature request here. -->
TTS/.github/PR_TEMPLATE.md ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Pull request guidelines
2
+
3
+ Welcome to the 🐸TTS project! We are excited to see your interest, and appreciate your support!
4
+
5
+ This repository is governed by the Contributor Covenant Code of Conduct. For more details, see the [CODE_OF_CONDUCT.md](CODE_OF_CONDUCT.md) file.
6
+
7
+ In order to make a good pull request, please see our [CONTRIBUTING.md](CONTRIBUTING.md) file.
8
+
9
+ Before accepting your pull request, you will be asked to sign a [Contributor License Agreement](https://cla-assistant.io/coqui-ai/TTS).
10
+
11
+ This [Contributor License Agreement](https://cla-assistant.io/coqui-ai/TTS):
12
+
13
+ - Protects you, Coqui, and the users of the code.
14
+ - Does not change your rights to use your contributions for any purpose.
15
+ - Does not change the license of the 🐸TTS project. It just makes the terms of your contribution clearer and lets us know you are OK to contribute.
TTS/.github/stale.yml ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Number of days of inactivity before an issue becomes stale
2
+ daysUntilStale: 30
3
+ # Number of days of inactivity before a stale issue is closed
4
+ daysUntilClose: 7
5
+ # Issues with these labels will never be considered stale
6
+ exemptLabels:
7
+ - pinned
8
+ - security
9
+ # Label to use when marking an issue as stale
10
+ staleLabel: wontfix
11
+ # Comment to post when marking an issue as stale. Set to `false` to disable
12
+ markComment: >
13
+ This issue has been automatically marked as stale because it has not had
14
+ recent activity. It will be closed if no further activity occurs. Thank you
15
+ for your contributions. You might also look our discussion channels.
16
+ # Comment to post when closing a stale issue. Set to `false` to disable
17
+ closeComment: false
18
+
TTS/.github/workflows/aux_tests.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: aux-tests
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ pull_request:
8
+ types: [opened, synchronize, reopened]
9
+ jobs:
10
+ check_skip:
11
+ runs-on: ubuntu-latest
12
+ if: "! contains(github.event.head_commit.message, '[ci skip]')"
13
+ steps:
14
+ - run: echo "${{ github.event.head_commit.message }}"
15
+
16
+ test:
17
+ runs-on: ubuntu-latest
18
+ strategy:
19
+ fail-fast: false
20
+ matrix:
21
+ python-version: [3.9, "3.10", "3.11"]
22
+ experimental: [false]
23
+ steps:
24
+ - uses: actions/checkout@v3
25
+ - name: Set up Python ${{ matrix.python-version }}
26
+ uses: actions/setup-python@v4
27
+ with:
28
+ python-version: ${{ matrix.python-version }}
29
+ architecture: x64
30
+ cache: 'pip'
31
+ cache-dependency-path: 'requirements*'
32
+ - name: check OS
33
+ run: cat /etc/os-release
34
+ - name: set ENV
35
+ run: export TRAINER_TELEMETRY=0
36
+ - name: Install dependencies
37
+ run: |
38
+ sudo apt-get update
39
+ sudo apt-get install -y git make gcc
40
+ make system-deps
41
+ - name: Install/upgrade Python setup deps
42
+ run: python3 -m pip install --upgrade pip setuptools wheel
43
+ - name: Replace scarf urls
44
+ run: |
45
+ sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json
46
+ - name: Install TTS
47
+ run: |
48
+ python3 -m pip install .[all]
49
+ python3 setup.py egg_info
50
+ - name: Unit tests
51
+ run: make test_aux
TTS/.github/workflows/data_tests.yml ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: data-tests
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ pull_request:
8
+ types: [opened, synchronize, reopened]
9
+ jobs:
10
+ check_skip:
11
+ runs-on: ubuntu-latest
12
+ if: "! contains(github.event.head_commit.message, '[ci skip]')"
13
+ steps:
14
+ - run: echo "${{ github.event.head_commit.message }}"
15
+
16
+ test:
17
+ runs-on: ubuntu-latest
18
+ strategy:
19
+ fail-fast: false
20
+ matrix:
21
+ python-version: [3.9, "3.10", "3.11"]
22
+ experimental: [false]
23
+ steps:
24
+ - uses: actions/checkout@v3
25
+ - name: Set up Python ${{ matrix.python-version }}
26
+ uses: actions/setup-python@v4
27
+ with:
28
+ python-version: ${{ matrix.python-version }}
29
+ architecture: x64
30
+ cache: 'pip'
31
+ cache-dependency-path: 'requirements*'
32
+ - name: check OS
33
+ run: cat /etc/os-release
34
+ - name: set ENV
35
+ run: export TRAINER_TELEMETRY=0
36
+ - name: Install dependencies
37
+ run: |
38
+ sudo apt-get update
39
+ sudo apt-get install -y --no-install-recommends git make gcc
40
+ make system-deps
41
+ - name: Install/upgrade Python setup deps
42
+ run: python3 -m pip install --upgrade pip setuptools wheel
43
+ - name: Replace scarf urls
44
+ run: |
45
+ sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json
46
+ - name: Install TTS
47
+ run: |
48
+ python3 -m pip install .[all]
49
+ python3 setup.py egg_info
50
+ - name: Unit tests
51
+ run: make data_tests
TTS/.github/workflows/docker.yaml ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: "Docker build and push"
2
+ on:
3
+ pull_request:
4
+ push:
5
+ branches:
6
+ - main
7
+ - dev
8
+ tags:
9
+ - v*
10
+ jobs:
11
+ docker-build:
12
+ name: "Build and push Docker image"
13
+ runs-on: ubuntu-20.04
14
+ strategy:
15
+ matrix:
16
+ arch: ["amd64"]
17
+ base:
18
+ - "nvidia/cuda:11.8.0-base-ubuntu22.04" # GPU enabled
19
+ - "python:3.10.8-slim" # CPU only
20
+ steps:
21
+ - uses: actions/checkout@v2
22
+ - name: Log in to the Container registry
23
+ uses: docker/login-action@v1
24
+ with:
25
+ registry: ghcr.io
26
+ username: ${{ github.actor }}
27
+ password: ${{ secrets.GITHUB_TOKEN }}
28
+ - name: Compute Docker tags, check VERSION file matches tag
29
+ id: compute-tag
30
+ run: |
31
+ set -ex
32
+ base="ghcr.io/coqui-ai/tts"
33
+ tags="" # PR build
34
+
35
+ if [[ ${{ matrix.base }} = "python:3.10.8-slim" ]]; then
36
+ base="ghcr.io/coqui-ai/tts-cpu"
37
+ fi
38
+
39
+ if [[ "${{ startsWith(github.ref, 'refs/heads/') }}" = "true" ]]; then
40
+ # Push to branch
41
+ github_ref="${{ github.ref }}"
42
+ branch=${github_ref#*refs/heads/} # strip prefix to get branch name
43
+ tags="${base}:${branch},${base}:${{ github.sha }},"
44
+ elif [[ "${{ startsWith(github.ref, 'refs/tags/') }}" = "true" ]]; then
45
+ VERSION="v$(cat TTS/VERSION)"
46
+ if [[ "${{ github.ref }}" != "refs/tags/${VERSION}" ]]; then
47
+ echo "Pushed tag does not match VERSION file. Aborting push."
48
+ exit 1
49
+ fi
50
+ tags="${base}:${VERSION},${base}:latest,${base}:${{ github.sha }}"
51
+ fi
52
+ echo "::set-output name=tags::${tags}"
53
+ - name: Set up QEMU
54
+ uses: docker/setup-qemu-action@v1
55
+ - name: Set up Docker Buildx
56
+ id: buildx
57
+ uses: docker/setup-buildx-action@v1
58
+ - name: Build and push
59
+ uses: docker/build-push-action@v2
60
+ with:
61
+ context: .
62
+ platforms: linux/${{ matrix.arch }}
63
+ push: ${{ github.event_name == 'push' }}
64
+ build-args: "BASE=${{ matrix.base }}"
65
+ tags: ${{ steps.compute-tag.outputs.tags }}
TTS/.github/workflows/inference_tests.yml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: inference_tests
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ pull_request:
8
+ types: [opened, synchronize, reopened]
9
+ jobs:
10
+ check_skip:
11
+ runs-on: ubuntu-latest
12
+ if: "! contains(github.event.head_commit.message, '[ci skip]')"
13
+ steps:
14
+ - run: echo "${{ github.event.head_commit.message }}"
15
+
16
+ test:
17
+ runs-on: ubuntu-latest
18
+ strategy:
19
+ fail-fast: false
20
+ matrix:
21
+ python-version: [3.9, "3.10", "3.11"]
22
+ experimental: [false]
23
+ steps:
24
+ - uses: actions/checkout@v3
25
+ - name: Set up Python ${{ matrix.python-version }}
26
+ uses: actions/setup-python@v4
27
+ with:
28
+ python-version: ${{ matrix.python-version }}
29
+ architecture: x64
30
+ cache: 'pip'
31
+ cache-dependency-path: 'requirements*'
32
+ - name: check OS
33
+ run: cat /etc/os-release
34
+ - name: set ENV
35
+ run: |
36
+ export TRAINER_TELEMETRY=0
37
+ - name: Install dependencies
38
+ run: |
39
+ sudo apt-get update
40
+ sudo apt-get install -y --no-install-recommends git make gcc
41
+ sudo apt-get install espeak-ng
42
+ make system-deps
43
+ - name: Install/upgrade Python setup deps
44
+ run: python3 -m pip install --upgrade pip setuptools wheel
45
+ - name: Replace scarf urls
46
+ run: |
47
+ sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json
48
+ - name: Install TTS
49
+ run: |
50
+ python3 -m pip install .[all]
51
+ python3 setup.py egg_info
52
+ - name: Unit tests
53
+ run: make inference_tests
TTS/.github/workflows/pypi-release.yml ADDED
@@ -0,0 +1,94 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Publish Python 🐍 distributions 📦 to PyPI
2
+ on:
3
+ release:
4
+ types: [published]
5
+ defaults:
6
+ run:
7
+ shell:
8
+ bash
9
+ jobs:
10
+ build-sdist:
11
+ runs-on: ubuntu-20.04
12
+ steps:
13
+ - uses: actions/checkout@v3
14
+ - name: Verify tag matches version
15
+ run: |
16
+ set -ex
17
+ version=$(cat TTS/VERSION)
18
+ tag="${GITHUB_REF/refs\/tags\/}"
19
+ if [[ "v$version" != "$tag" ]]; then
20
+ exit 1
21
+ fi
22
+ - uses: actions/setup-python@v2
23
+ with:
24
+ python-version: 3.9
25
+ - run: |
26
+ python -m pip install -U pip setuptools wheel build
27
+ - run: |
28
+ python -m build
29
+ - run: |
30
+ pip install dist/*.tar.gz
31
+ - uses: actions/upload-artifact@v2
32
+ with:
33
+ name: sdist
34
+ path: dist/*.tar.gz
35
+ build-wheels:
36
+ runs-on: ubuntu-20.04
37
+ strategy:
38
+ matrix:
39
+ python-version: ["3.9", "3.10", "3.11"]
40
+ steps:
41
+ - uses: actions/checkout@v3
42
+ - uses: actions/setup-python@v2
43
+ with:
44
+ python-version: ${{ matrix.python-version }}
45
+ - name: Install pip requirements
46
+ run: |
47
+ python -m pip install -U pip setuptools wheel build
48
+ python -m pip install -r requirements.txt
49
+ - name: Setup and install manylinux1_x86_64 wheel
50
+ run: |
51
+ python setup.py bdist_wheel --plat-name=manylinux1_x86_64
52
+ python -m pip install dist/*-manylinux*.whl
53
+ - uses: actions/upload-artifact@v2
54
+ with:
55
+ name: wheel-${{ matrix.python-version }}
56
+ path: dist/*-manylinux*.whl
57
+ publish-artifacts:
58
+ runs-on: ubuntu-20.04
59
+ needs: [build-sdist, build-wheels]
60
+ steps:
61
+ - run: |
62
+ mkdir dist
63
+ - uses: actions/download-artifact@v2
64
+ with:
65
+ name: "sdist"
66
+ path: "dist/"
67
+ - uses: actions/download-artifact@v2
68
+ with:
69
+ name: "wheel-3.9"
70
+ path: "dist/"
71
+ - uses: actions/download-artifact@v2
72
+ with:
73
+ name: "wheel-3.10"
74
+ path: "dist/"
75
+ - uses: actions/download-artifact@v2
76
+ with:
77
+ name: "wheel-3.11"
78
+ path: "dist/"
79
+ - run: |
80
+ ls -lh dist/
81
+ - name: Setup PyPI config
82
+ run: |
83
+ cat << EOF > ~/.pypirc
84
+ [pypi]
85
+ username=__token__
86
+ password=${{ secrets.PYPI_TOKEN }}
87
+ EOF
88
+ - uses: actions/setup-python@v2
89
+ with:
90
+ python-version: 3.9
91
+ - run: |
92
+ python -m pip install twine
93
+ - run: |
94
+ twine upload --repository pypi dist/*
TTS/.github/workflows/style_check.yml ADDED
@@ -0,0 +1,46 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: style-check
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ pull_request:
8
+ types: [opened, synchronize, reopened]
9
+ jobs:
10
+ check_skip:
11
+ runs-on: ubuntu-latest
12
+ if: "! contains(github.event.head_commit.message, '[ci skip]')"
13
+ steps:
14
+ - run: echo "${{ github.event.head_commit.message }}"
15
+
16
+ test:
17
+ runs-on: ubuntu-latest
18
+ strategy:
19
+ fail-fast: false
20
+ matrix:
21
+ python-version: [3.9]
22
+ experimental: [false]
23
+ steps:
24
+ - uses: actions/checkout@v3
25
+ - name: Set up Python ${{ matrix.python-version }}
26
+ uses: actions/setup-python@v4
27
+ with:
28
+ python-version: ${{ matrix.python-version }}
29
+ architecture: x64
30
+ cache: 'pip'
31
+ cache-dependency-path: 'requirements*'
32
+ - name: check OS
33
+ run: cat /etc/os-release
34
+ - name: Install dependencies
35
+ run: |
36
+ sudo apt-get update
37
+ sudo apt-get install -y git make gcc
38
+ make system-deps
39
+ - name: Install/upgrade Python setup deps
40
+ run: python3 -m pip install --upgrade pip setuptools wheel
41
+ - name: Install TTS
42
+ run: |
43
+ python3 -m pip install .[all]
44
+ python3 setup.py egg_info
45
+ - name: Style check
46
+ run: make style
TTS/.github/workflows/text_tests.yml ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: text-tests
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ pull_request:
8
+ types: [opened, synchronize, reopened]
9
+ jobs:
10
+ check_skip:
11
+ runs-on: ubuntu-latest
12
+ if: "! contains(github.event.head_commit.message, '[ci skip]')"
13
+ steps:
14
+ - run: echo "${{ github.event.head_commit.message }}"
15
+
16
+ test:
17
+ runs-on: ubuntu-latest
18
+ strategy:
19
+ fail-fast: false
20
+ matrix:
21
+ python-version: [3.9, "3.10", "3.11"]
22
+ experimental: [false]
23
+ steps:
24
+ - uses: actions/checkout@v3
25
+ - name: Set up Python ${{ matrix.python-version }}
26
+ uses: actions/setup-python@v4
27
+ with:
28
+ python-version: ${{ matrix.python-version }}
29
+ architecture: x64
30
+ cache: 'pip'
31
+ cache-dependency-path: 'requirements*'
32
+ - name: check OS
33
+ run: cat /etc/os-release
34
+ - name: set ENV
35
+ run: export TRAINER_TELEMETRY=0
36
+ - name: Install dependencies
37
+ run: |
38
+ sudo apt-get update
39
+ sudo apt-get install -y --no-install-recommends git make gcc
40
+ sudo apt-get install espeak
41
+ sudo apt-get install espeak-ng
42
+ make system-deps
43
+ - name: Install/upgrade Python setup deps
44
+ run: python3 -m pip install --upgrade pip setuptools wheel
45
+ - name: Install TTS
46
+ run: |
47
+ python3 -m pip install .[all]
48
+ python3 setup.py egg_info
49
+ - name: Unit tests
50
+ run: make test_text
TTS/.github/workflows/tts_tests.yml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: tts-tests
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ pull_request:
8
+ types: [opened, synchronize, reopened]
9
+ jobs:
10
+ check_skip:
11
+ runs-on: ubuntu-latest
12
+ if: "! contains(github.event.head_commit.message, '[ci skip]')"
13
+ steps:
14
+ - run: echo "${{ github.event.head_commit.message }}"
15
+
16
+ test:
17
+ runs-on: ubuntu-latest
18
+ strategy:
19
+ fail-fast: false
20
+ matrix:
21
+ python-version: [3.9, "3.10", "3.11"]
22
+ experimental: [false]
23
+ steps:
24
+ - uses: actions/checkout@v3
25
+ - name: Set up Python ${{ matrix.python-version }}
26
+ uses: actions/setup-python@v4
27
+ with:
28
+ python-version: ${{ matrix.python-version }}
29
+ architecture: x64
30
+ cache: 'pip'
31
+ cache-dependency-path: 'requirements*'
32
+ - name: check OS
33
+ run: cat /etc/os-release
34
+ - name: set ENV
35
+ run: export TRAINER_TELEMETRY=0
36
+ - name: Install dependencies
37
+ run: |
38
+ sudo apt-get update
39
+ sudo apt-get install -y --no-install-recommends git make gcc
40
+ sudo apt-get install espeak
41
+ sudo apt-get install espeak-ng
42
+ make system-deps
43
+ - name: Install/upgrade Python setup deps
44
+ run: python3 -m pip install --upgrade pip setuptools wheel
45
+ - name: Replace scarf urls
46
+ run: |
47
+ sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json
48
+ - name: Install TTS
49
+ run: |
50
+ python3 -m pip install .[all]
51
+ python3 setup.py egg_info
52
+ - name: Unit tests
53
+ run: make test_tts
TTS/.github/workflows/tts_tests2.yml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: tts-tests2
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ pull_request:
8
+ types: [opened, synchronize, reopened]
9
+ jobs:
10
+ check_skip:
11
+ runs-on: ubuntu-latest
12
+ if: "! contains(github.event.head_commit.message, '[ci skip]')"
13
+ steps:
14
+ - run: echo "${{ github.event.head_commit.message }}"
15
+
16
+ test:
17
+ runs-on: ubuntu-latest
18
+ strategy:
19
+ fail-fast: false
20
+ matrix:
21
+ python-version: [3.9, "3.10", "3.11"]
22
+ experimental: [false]
23
+ steps:
24
+ - uses: actions/checkout@v3
25
+ - name: Set up Python ${{ matrix.python-version }}
26
+ uses: actions/setup-python@v4
27
+ with:
28
+ python-version: ${{ matrix.python-version }}
29
+ architecture: x64
30
+ cache: 'pip'
31
+ cache-dependency-path: 'requirements*'
32
+ - name: check OS
33
+ run: cat /etc/os-release
34
+ - name: set ENV
35
+ run: export TRAINER_TELEMETRY=0
36
+ - name: Install dependencies
37
+ run: |
38
+ sudo apt-get update
39
+ sudo apt-get install -y --no-install-recommends git make gcc
40
+ sudo apt-get install espeak
41
+ sudo apt-get install espeak-ng
42
+ make system-deps
43
+ - name: Install/upgrade Python setup deps
44
+ run: python3 -m pip install --upgrade pip setuptools wheel
45
+ - name: Replace scarf urls
46
+ run: |
47
+ sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json
48
+ - name: Install TTS
49
+ run: |
50
+ python3 -m pip install .[all]
51
+ python3 setup.py egg_info
52
+ - name: Unit tests
53
+ run: make test_tts2
TTS/.github/workflows/vocoder_tests.yml ADDED
@@ -0,0 +1,48 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: vocoder-tests
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ pull_request:
8
+ types: [opened, synchronize, reopened]
9
+ jobs:
10
+ check_skip:
11
+ runs-on: ubuntu-latest
12
+ if: "! contains(github.event.head_commit.message, '[ci skip]')"
13
+ steps:
14
+ - run: echo "${{ github.event.head_commit.message }}"
15
+
16
+ test:
17
+ runs-on: ubuntu-latest
18
+ strategy:
19
+ fail-fast: false
20
+ matrix:
21
+ python-version: [3.9, "3.10", "3.11"]
22
+ experimental: [false]
23
+ steps:
24
+ - uses: actions/checkout@v3
25
+ - name: Set up Python ${{ matrix.python-version }}
26
+ uses: actions/setup-python@v4
27
+ with:
28
+ python-version: ${{ matrix.python-version }}
29
+ architecture: x64
30
+ cache: 'pip'
31
+ cache-dependency-path: 'requirements*'
32
+ - name: check OS
33
+ run: cat /etc/os-release
34
+ - name: set ENV
35
+ run: export TRAINER_TELEMETRY=0
36
+ - name: Install dependencies
37
+ run: |
38
+ sudo apt-get update
39
+ sudo apt-get install -y git make gcc
40
+ make system-deps
41
+ - name: Install/upgrade Python setup deps
42
+ run: python3 -m pip install --upgrade pip setuptools wheel
43
+ - name: Install TTS
44
+ run: |
45
+ python3 -m pip install .[all]
46
+ python3 setup.py egg_info
47
+ - name: Unit tests
48
+ run: make test_vocoder
TTS/.github/workflows/xtts_tests.yml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: xtts-tests
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ pull_request:
8
+ types: [opened, synchronize, reopened]
9
+ jobs:
10
+ check_skip:
11
+ runs-on: ubuntu-latest
12
+ if: "! contains(github.event.head_commit.message, '[ci skip]')"
13
+ steps:
14
+ - run: echo "${{ github.event.head_commit.message }}"
15
+
16
+ test:
17
+ runs-on: ubuntu-latest
18
+ strategy:
19
+ fail-fast: false
20
+ matrix:
21
+ python-version: [3.9, "3.10", "3.11"]
22
+ experimental: [false]
23
+ steps:
24
+ - uses: actions/checkout@v3
25
+ - name: Set up Python ${{ matrix.python-version }}
26
+ uses: actions/setup-python@v4
27
+ with:
28
+ python-version: ${{ matrix.python-version }}
29
+ architecture: x64
30
+ cache: 'pip'
31
+ cache-dependency-path: 'requirements*'
32
+ - name: check OS
33
+ run: cat /etc/os-release
34
+ - name: set ENV
35
+ run: export TRAINER_TELEMETRY=0
36
+ - name: Install dependencies
37
+ run: |
38
+ sudo apt-get update
39
+ sudo apt-get install -y --no-install-recommends git make gcc
40
+ sudo apt-get install espeak
41
+ sudo apt-get install espeak-ng
42
+ make system-deps
43
+ - name: Install/upgrade Python setup deps
44
+ run: python3 -m pip install --upgrade pip setuptools wheel
45
+ - name: Replace scarf urls
46
+ run: |
47
+ sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json
48
+ - name: Install TTS
49
+ run: |
50
+ python3 -m pip install .[all]
51
+ python3 setup.py egg_info
52
+ - name: Unit tests
53
+ run: make test_xtts
TTS/.github/workflows/zoo_tests0.yml ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: zoo-tests-0
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ pull_request:
8
+ types: [opened, synchronize, reopened]
9
+ jobs:
10
+ check_skip:
11
+ runs-on: ubuntu-latest
12
+ if: "! contains(github.event.head_commit.message, '[ci skip]')"
13
+ steps:
14
+ - run: echo "${{ github.event.head_commit.message }}"
15
+
16
+ test:
17
+ runs-on: ubuntu-latest
18
+ strategy:
19
+ fail-fast: false
20
+ matrix:
21
+ python-version: [3.9, "3.10", "3.11"]
22
+ experimental: [false]
23
+ steps:
24
+ - uses: actions/checkout@v3
25
+ - name: Set up Python ${{ matrix.python-version }}
26
+ uses: actions/setup-python@v4
27
+ with:
28
+ python-version: ${{ matrix.python-version }}
29
+ architecture: x64
30
+ cache: 'pip'
31
+ cache-dependency-path: 'requirements*'
32
+ - name: check OS
33
+ run: cat /etc/os-release
34
+ - name: set ENV
35
+ run: export TRAINER_TELEMETRY=0
36
+ - name: Install dependencies
37
+ run: |
38
+ sudo apt-get update
39
+ sudo apt-get install -y git make gcc
40
+ sudo apt-get install espeak espeak-ng
41
+ make system-deps
42
+ - name: Install/upgrade Python setup deps
43
+ run: python3 -m pip install --upgrade pip setuptools wheel
44
+ - name: Replace scarf urls
45
+ run: |
46
+ sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json
47
+ - name: Install TTS
48
+ run: |
49
+ python3 -m pip install .[all]
50
+ python3 setup.py egg_info
51
+ - name: Unit tests
52
+ run: |
53
+ nose2 -F -v -B TTS tests.zoo_tests.test_models.test_models_offset_0_step_3
54
+ nose2 -F -v -B TTS tests.zoo_tests.test_models.test_voice_conversion
TTS/.github/workflows/zoo_tests1.yml ADDED
@@ -0,0 +1,53 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: zoo-tests-1
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ pull_request:
8
+ types: [opened, synchronize, reopened]
9
+ jobs:
10
+ check_skip:
11
+ runs-on: ubuntu-latest
12
+ if: "! contains(github.event.head_commit.message, '[ci skip]')"
13
+ steps:
14
+ - run: echo "${{ github.event.head_commit.message }}"
15
+
16
+ test:
17
+ runs-on: ubuntu-latest
18
+ strategy:
19
+ fail-fast: false
20
+ matrix:
21
+ python-version: [3.9, "3.10", "3.11"]
22
+ experimental: [false]
23
+ steps:
24
+ - uses: actions/checkout@v3
25
+ - name: Set up Python ${{ matrix.python-version }}
26
+ uses: actions/setup-python@v4
27
+ with:
28
+ python-version: ${{ matrix.python-version }}
29
+ architecture: x64
30
+ cache: 'pip'
31
+ cache-dependency-path: 'requirements*'
32
+ - name: check OS
33
+ run: cat /etc/os-release
34
+ - name: set ENV
35
+ run: export TRAINER_TELEMETRY=0
36
+ - name: Install dependencies
37
+ run: |
38
+ sudo apt-get update
39
+ sudo apt-get install -y git make gcc
40
+ sudo apt-get install espeak espeak-ng
41
+ make system-deps
42
+ - name: Install/upgrade Python setup deps
43
+ run: python3 -m pip install --upgrade pip setuptools wheel
44
+ - name: Replace scarf urls
45
+ run: |
46
+ sed -i 's/https:\/\/coqui.gateway.scarf.sh\/hf\/bark\//https:\/\/huggingface.co\/erogol\/bark\/resolve\/main\//g' TTS/.models.json
47
+ sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json
48
+ - name: Install TTS
49
+ run: |
50
+ python3 -m pip install .[all]
51
+ python3 setup.py egg_info
52
+ - name: Unit tests
53
+ run: nose2 -F -v -B --with-coverage --coverage TTS tests.zoo_tests.test_models.test_models_offset_1_step_3
TTS/.github/workflows/zoo_tests2.yml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: zoo-tests-2
2
+
3
+ on:
4
+ push:
5
+ branches:
6
+ - main
7
+ pull_request:
8
+ types: [opened, synchronize, reopened]
9
+ jobs:
10
+ check_skip:
11
+ runs-on: ubuntu-latest
12
+ if: "! contains(github.event.head_commit.message, '[ci skip]')"
13
+ steps:
14
+ - run: echo "${{ github.event.head_commit.message }}"
15
+
16
+ test:
17
+ runs-on: ubuntu-latest
18
+ strategy:
19
+ fail-fast: false
20
+ matrix:
21
+ python-version: [3.9, "3.10", "3.11"]
22
+ experimental: [false]
23
+ steps:
24
+ - uses: actions/checkout@v3
25
+ - name: Set up Python ${{ matrix.python-version }}
26
+ uses: actions/setup-python@v4
27
+ with:
28
+ python-version: ${{ matrix.python-version }}
29
+ architecture: x64
30
+ cache: 'pip'
31
+ cache-dependency-path: 'requirements*'
32
+ - name: check OS
33
+ run: cat /etc/os-release
34
+ - name: set ENV
35
+ run: export TRAINER_TELEMETRY=0
36
+ - name: Install dependencies
37
+ run: |
38
+ sudo apt-get update
39
+ sudo apt-get install -y git make gcc
40
+ sudo apt-get install espeak espeak-ng
41
+ make system-deps
42
+ - name: Install/upgrade Python setup deps
43
+ run: python3 -m pip install --upgrade pip setuptools wheel
44
+ - name: Replace scarf urls
45
+ run: |
46
+ sed -i 's/https:\/\/coqui.gateway.scarf.sh\//https:\/\/github.com\/coqui-ai\/TTS\/releases\/download\//g' TTS/.models.json
47
+ - name: Install TTS
48
+ run: |
49
+ python3 -m pip install .[all]
50
+ python3 setup.py egg_info
51
+ - name: Unit tests
52
+ run: nose2 -F -v -B --with-coverage --coverage TTS tests.zoo_tests.test_models.test_models_offset_2_step_3
TTS/.gitignore ADDED
@@ -0,0 +1,172 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ WadaSNR/
2
+ .idea/
3
+ *.pyc
4
+ .DS_Store
5
+ ./__init__.py
6
+ # Byte-compiled / optimized / DLL files
7
+ __pycache__/
8
+ *.py[cod]
9
+ *$py.class
10
+
11
+ # C extensions
12
+ *.so
13
+
14
+ # Distribution / packaging
15
+ .Python
16
+ build/
17
+ develop-eggs/
18
+ dist/
19
+ downloads/
20
+ eggs/
21
+ .eggs/
22
+ lib/
23
+ lib64/
24
+ parts/
25
+ sdist/
26
+ var/
27
+ wheels/
28
+ *.egg-info/
29
+ .installed.cfg
30
+ *.egg
31
+ MANIFEST
32
+
33
+ # PyInstaller
34
+ # Usually these files are written by a python script from a template
35
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
36
+ *.manifest
37
+ *.spec
38
+
39
+ # Installer logs
40
+ pip-log.txt
41
+ pip-delete-this-directory.txt
42
+
43
+ # Unit test / coverage reports
44
+ htmlcov/
45
+ .tox/
46
+ .coverage
47
+ .coverage.*
48
+ .cache
49
+ nosetests.xml
50
+ coverage.xml
51
+ *.cover
52
+ .hypothesis/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ .static_storage/
61
+ .media/
62
+ local_settings.py
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ target/
76
+
77
+ # Jupyter Notebook
78
+ .ipynb_checkpoints
79
+
80
+ # pyenv
81
+ .python-version
82
+
83
+ # celery beat schedule file
84
+ celerybeat-schedule
85
+
86
+ # SageMath parsed files
87
+ *.sage.py
88
+
89
+ # Environments
90
+ .env
91
+ .venv
92
+ env/
93
+ venv/
94
+ ENV/
95
+ env.bak/
96
+ venv.bak/
97
+
98
+ # Spyder project settings
99
+ .spyderproject
100
+ .spyproject
101
+
102
+ # Rope project settings
103
+ .ropeproject
104
+
105
+ # mkdocs documentation
106
+ /site
107
+
108
+ # mypy
109
+ .mypy_cache/
110
+
111
+ # vim
112
+ *.swp
113
+ *.swm
114
+ *.swn
115
+ *.swo
116
+
117
+ # pytorch models
118
+ *.pth
119
+ *.pth.tar
120
+ !dummy_speakers.pth
121
+ result/
122
+
123
+ # setup.py
124
+ version.py
125
+
126
+ # jupyter dummy files
127
+ core
128
+
129
+ # ignore local datasets
130
+ recipes/WIP/*
131
+ recipes/ljspeech/LJSpeech-1.1/*
132
+ recipes/vctk/VCTK/*
133
+ recipes/**/*.npy
134
+ recipes/**/*.json
135
+ VCTK-Corpus-removed-silence/*
136
+
137
+ # ignore training logs
138
+ trainer_*_log.txt
139
+
140
+ # files used internally for dev, test etc.
141
+ tests/outputs/*
142
+ tests/train_outputs/*
143
+ TODO.txt
144
+ .vscode/*
145
+ data/*
146
+ notebooks/data/*
147
+ TTS/tts/utils/monotonic_align/core.c
148
+ .vscode-upload.json
149
+ temp_build/*
150
+ events.out*
151
+ old_configs/*
152
+ model_importers/*
153
+ model_profiling/*
154
+ docs/source/TODO/*
155
+ .noseids
156
+ .dccache
157
+ log.txt
158
+ umap.png
159
+ *.out
160
+ SocialMedia.txt
161
+ output.wav
162
+ tts_output.wav
163
+ deps.json
164
+ speakers.json
165
+ internal/*
166
+ *_pitch.npy
167
+ *_phoneme.npy
168
+ wandb
169
+ depot/*
170
+ coqui_recipes/*
171
+ local_scripts/*
172
+ coqui_demos/*
TTS/.pre-commit-config.yaml ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ repos:
2
+ - repo: 'https://github.com/pre-commit/pre-commit-hooks'
3
+ rev: v2.3.0
4
+ hooks:
5
+ - id: check-yaml
6
+ - id: end-of-file-fixer
7
+ - id: trailing-whitespace
8
+ - repo: 'https://github.com/psf/black'
9
+ rev: 22.3.0
10
+ hooks:
11
+ - id: black
12
+ language_version: python3
13
+ - repo: https://github.com/pycqa/isort
14
+ rev: 5.8.0
15
+ hooks:
16
+ - id: isort
17
+ name: isort (python)
18
+ - id: isort
19
+ name: isort (cython)
20
+ types: [cython]
21
+ - id: isort
22
+ name: isort (pyi)
23
+ types: [pyi]
24
+ - repo: https://github.com/pycqa/pylint
25
+ rev: v2.8.2
26
+ hooks:
27
+ - id: pylint
TTS/.pylintrc ADDED
@@ -0,0 +1,599 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [MASTER]
2
+
3
+ # A comma-separated list of package or module names from where C extensions may
4
+ # be loaded. Extensions are loading into the active Python interpreter and may
5
+ # run arbitrary code.
6
+ extension-pkg-whitelist=
7
+
8
+ # Add files or directories to the blacklist. They should be base names, not
9
+ # paths.
10
+ ignore=CVS
11
+
12
+ # Add files or directories matching the regex patterns to the blacklist. The
13
+ # regex matches against base names, not paths.
14
+ ignore-patterns=
15
+
16
+ # Python code to execute, usually for sys.path manipulation such as
17
+ # pygtk.require().
18
+ #init-hook=
19
+
20
+ # Use multiple processes to speed up Pylint. Specifying 0 will auto-detect the
21
+ # number of processors available to use.
22
+ jobs=1
23
+
24
+ # Control the amount of potential inferred values when inferring a single
25
+ # object. This can help the performance when dealing with large functions or
26
+ # complex, nested conditions.
27
+ limit-inference-results=100
28
+
29
+ # List of plugins (as comma separated values of python modules names) to load,
30
+ # usually to register additional checkers.
31
+ load-plugins=
32
+
33
+ # Pickle collected data for later comparisons.
34
+ persistent=yes
35
+
36
+ # Specify a configuration file.
37
+ #rcfile=
38
+
39
+ # When enabled, pylint would attempt to guess common misconfiguration and emit
40
+ # user-friendly hints instead of false-positive error messages.
41
+ suggestion-mode=yes
42
+
43
+ # Allow loading of arbitrary C extensions. Extensions are imported into the
44
+ # active Python interpreter and may run arbitrary code.
45
+ unsafe-load-any-extension=no
46
+
47
+
48
+ [MESSAGES CONTROL]
49
+
50
+ # Only show warnings with the listed confidence levels. Leave empty to show
51
+ # all. Valid levels: HIGH, INFERENCE, INFERENCE_FAILURE, UNDEFINED.
52
+ confidence=
53
+
54
+ # Disable the message, report, category or checker with the given id(s). You
55
+ # can either give multiple identifiers separated by comma (,) or put this
56
+ # option multiple times (only on the command line, not in the configuration
57
+ # file where it should appear only once). You can also use "--disable=all" to
58
+ # disable everything first and then reenable specific checks. For example, if
59
+ # you want to run only the similarities checker, you can use "--disable=all
60
+ # --enable=similarities". If you want to run only the classes checker, but have
61
+ # no Warning level messages displayed, use "--disable=all --enable=classes
62
+ # --disable=W".
63
+ disable=missing-docstring,
64
+ too-many-public-methods,
65
+ too-many-lines,
66
+ bare-except,
67
+ ## for avoiding weird p3.6 CI linter error
68
+ ## TODO: see later if we can remove this
69
+ assigning-non-slot,
70
+ unsupported-assignment-operation,
71
+ ## end
72
+ line-too-long,
73
+ fixme,
74
+ wrong-import-order,
75
+ ungrouped-imports,
76
+ wrong-import-position,
77
+ import-error,
78
+ invalid-name,
79
+ too-many-instance-attributes,
80
+ arguments-differ,
81
+ arguments-renamed,
82
+ no-name-in-module,
83
+ no-member,
84
+ unsubscriptable-object,
85
+ print-statement,
86
+ parameter-unpacking,
87
+ unpacking-in-except,
88
+ old-raise-syntax,
89
+ backtick,
90
+ long-suffix,
91
+ old-ne-operator,
92
+ old-octal-literal,
93
+ import-star-module-level,
94
+ non-ascii-bytes-literal,
95
+ raw-checker-failed,
96
+ bad-inline-option,
97
+ locally-disabled,
98
+ file-ignored,
99
+ suppressed-message,
100
+ useless-suppression,
101
+ deprecated-pragma,
102
+ use-symbolic-message-instead,
103
+ useless-object-inheritance,
104
+ too-few-public-methods,
105
+ too-many-branches,
106
+ too-many-arguments,
107
+ too-many-locals,
108
+ too-many-statements,
109
+ apply-builtin,
110
+ basestring-builtin,
111
+ buffer-builtin,
112
+ cmp-builtin,
113
+ coerce-builtin,
114
+ execfile-builtin,
115
+ file-builtin,
116
+ long-builtin,
117
+ raw_input-builtin,
118
+ reduce-builtin,
119
+ standarderror-builtin,
120
+ unicode-builtin,
121
+ xrange-builtin,
122
+ coerce-method,
123
+ delslice-method,
124
+ getslice-method,
125
+ setslice-method,
126
+ no-absolute-import,
127
+ old-division,
128
+ dict-iter-method,
129
+ dict-view-method,
130
+ next-method-called,
131
+ metaclass-assignment,
132
+ indexing-exception,
133
+ raising-string,
134
+ reload-builtin,
135
+ oct-method,
136
+ hex-method,
137
+ nonzero-method,
138
+ cmp-method,
139
+ input-builtin,
140
+ round-builtin,
141
+ intern-builtin,
142
+ unichr-builtin,
143
+ map-builtin-not-iterating,
144
+ zip-builtin-not-iterating,
145
+ range-builtin-not-iterating,
146
+ filter-builtin-not-iterating,
147
+ using-cmp-argument,
148
+ eq-without-hash,
149
+ div-method,
150
+ idiv-method,
151
+ rdiv-method,
152
+ exception-message-attribute,
153
+ invalid-str-codec,
154
+ sys-max-int,
155
+ bad-python3-import,
156
+ deprecated-string-function,
157
+ deprecated-str-translate-call,
158
+ deprecated-itertools-function,
159
+ deprecated-types-field,
160
+ next-method-defined,
161
+ dict-items-not-iterating,
162
+ dict-keys-not-iterating,
163
+ dict-values-not-iterating,
164
+ deprecated-operator-function,
165
+ deprecated-urllib-function,
166
+ xreadlines-attribute,
167
+ deprecated-sys-function,
168
+ exception-escape,
169
+ comprehension-escape,
170
+ duplicate-code,
171
+ not-callable,
172
+ import-outside-toplevel,
173
+ logging-fstring-interpolation,
174
+ logging-not-lazy
175
+
176
+ # Enable the message, report, category or checker with the given id(s). You can
177
+ # either give multiple identifier separated by comma (,) or put this option
178
+ # multiple time (only on the command line, not in the configuration file where
179
+ # it should appear only once). See also the "--disable" option for examples.
180
+ enable=c-extension-no-member
181
+
182
+
183
+ [REPORTS]
184
+
185
+ # Python expression which should return a note less than 10 (10 is the highest
186
+ # note). You have access to the variables errors warning, statement which
187
+ # respectively contain the number of errors / warnings messages and the total
188
+ # number of statements analyzed. This is used by the global evaluation report
189
+ # (RP0004).
190
+ evaluation=10.0 - ((float(5 * error + warning + refactor + convention) / statement) * 10)
191
+
192
+ # Template used to display messages. This is a python new-style format string
193
+ # used to format the message information. See doc for all details.
194
+ #msg-template=
195
+
196
+ # Set the output format. Available formats are text, parseable, colorized, json
197
+ # and msvs (visual studio). You can also give a reporter class, e.g.
198
+ # mypackage.mymodule.MyReporterClass.
199
+ output-format=text
200
+
201
+ # Tells whether to display a full report or only the messages.
202
+ reports=no
203
+
204
+ # Activate the evaluation score.
205
+ score=yes
206
+
207
+
208
+ [REFACTORING]
209
+
210
+ # Maximum number of nested blocks for function / method body
211
+ max-nested-blocks=5
212
+
213
+ # Complete name of functions that never returns. When checking for
214
+ # inconsistent-return-statements if a never returning function is called then
215
+ # it will be considered as an explicit return statement and no message will be
216
+ # printed.
217
+ never-returning-functions=sys.exit
218
+
219
+
220
+ [LOGGING]
221
+
222
+ # Format style used to check logging format string. `old` means using %
223
+ # formatting, while `new` is for `{}` formatting.
224
+ logging-format-style=old
225
+
226
+ # Logging modules to check that the string format arguments are in logging
227
+ # function parameter format.
228
+ logging-modules=logging
229
+
230
+
231
+ [SPELLING]
232
+
233
+ # Limits count of emitted suggestions for spelling mistakes.
234
+ max-spelling-suggestions=4
235
+
236
+ # Spelling dictionary name. Available dictionaries: none. To make it working
237
+ # install python-enchant package..
238
+ spelling-dict=
239
+
240
+ # List of comma separated words that should not be checked.
241
+ spelling-ignore-words=
242
+
243
+ # A path to a file that contains private dictionary; one word per line.
244
+ spelling-private-dict-file=
245
+
246
+ # Tells whether to store unknown words to indicated private dictionary in
247
+ # --spelling-private-dict-file option instead of raising a message.
248
+ spelling-store-unknown-words=no
249
+
250
+
251
+ [MISCELLANEOUS]
252
+
253
+ # List of note tags to take in consideration, separated by a comma.
254
+ notes=FIXME,
255
+ XXX,
256
+ TODO
257
+
258
+
259
+ [TYPECHECK]
260
+
261
+ # List of decorators that produce context managers, such as
262
+ # contextlib.contextmanager. Add to this list to register other decorators that
263
+ # produce valid context managers.
264
+ contextmanager-decorators=contextlib.contextmanager
265
+
266
+ # List of members which are set dynamically and missed by pylint inference
267
+ # system, and so shouldn't trigger E1101 when accessed. Python regular
268
+ # expressions are accepted.
269
+ generated-members=numpy.*,torch.*
270
+
271
+ # Tells whether missing members accessed in mixin class should be ignored. A
272
+ # mixin class is detected if its name ends with "mixin" (case insensitive).
273
+ ignore-mixin-members=yes
274
+
275
+ # Tells whether to warn about missing members when the owner of the attribute
276
+ # is inferred to be None.
277
+ ignore-none=yes
278
+
279
+ # This flag controls whether pylint should warn about no-member and similar
280
+ # checks whenever an opaque object is returned when inferring. The inference
281
+ # can return multiple potential results while evaluating a Python object, but
282
+ # some branches might not be evaluated, which results in partial inference. In
283
+ # that case, it might be useful to still emit no-member and other checks for
284
+ # the rest of the inferred objects.
285
+ ignore-on-opaque-inference=yes
286
+
287
+ # List of class names for which member attributes should not be checked (useful
288
+ # for classes with dynamically set attributes). This supports the use of
289
+ # qualified names.
290
+ ignored-classes=optparse.Values,thread._local,_thread._local
291
+
292
+ # List of module names for which member attributes should not be checked
293
+ # (useful for modules/projects where namespaces are manipulated during runtime
294
+ # and thus existing member attributes cannot be deduced by static analysis. It
295
+ # supports qualified module names, as well as Unix pattern matching.
296
+ ignored-modules=
297
+
298
+ # Show a hint with possible names when a member name was not found. The aspect
299
+ # of finding the hint is based on edit distance.
300
+ missing-member-hint=yes
301
+
302
+ # The minimum edit distance a name should have in order to be considered a
303
+ # similar match for a missing member name.
304
+ missing-member-hint-distance=1
305
+
306
+ # The total number of similar names that should be taken in consideration when
307
+ # showing a hint for a missing member.
308
+ missing-member-max-choices=1
309
+
310
+
311
+ [VARIABLES]
312
+
313
+ # List of additional names supposed to be defined in builtins. Remember that
314
+ # you should avoid defining new builtins when possible.
315
+ additional-builtins=
316
+
317
+ # Tells whether unused global variables should be treated as a violation.
318
+ allow-global-unused-variables=yes
319
+
320
+ # List of strings which can identify a callback function by name. A callback
321
+ # name must start or end with one of those strings.
322
+ callbacks=cb_,
323
+ _cb
324
+
325
+ # A regular expression matching the name of dummy variables (i.e. expected to
326
+ # not be used).
327
+ dummy-variables-rgx=_+$|(_[a-zA-Z0-9_]*[a-zA-Z0-9]+?$)|dummy|^ignored_|^unused_
328
+
329
+ # Argument names that match this expression will be ignored. Default to name
330
+ # with leading underscore.
331
+ ignored-argument-names=_.*|^ignored_|^unused_
332
+
333
+ # Tells whether we should check for unused import in __init__ files.
334
+ init-import=no
335
+
336
+ # List of qualified module names which can have objects that can redefine
337
+ # builtins.
338
+ redefining-builtins-modules=six.moves,past.builtins,future.builtins,builtins,io
339
+
340
+
341
+ [FORMAT]
342
+
343
+ # Expected format of line ending, e.g. empty (any line ending), LF or CRLF.
344
+ expected-line-ending-format=
345
+
346
+ # Regexp for a line that is allowed to be longer than the limit.
347
+ ignore-long-lines=^\s*(# )?<?https?://\S+>?$
348
+
349
+ # Number of spaces of indent required inside a hanging or continued line.
350
+ indent-after-paren=4
351
+
352
+ # String used as indentation unit. This is usually " " (4 spaces) or "\t" (1
353
+ # tab).
354
+ indent-string=' '
355
+
356
+ # Maximum number of characters on a single line.
357
+ max-line-length=120
358
+
359
+ # Maximum number of lines in a module.
360
+ max-module-lines=1000
361
+
362
+ # List of optional constructs for which whitespace checking is disabled. `dict-
363
+ # separator` is used to allow tabulation in dicts, etc.: {1 : 1,\n222: 2}.
364
+ # `trailing-comma` allows a space between comma and closing bracket: (a, ).
365
+ # `empty-line` allows space-only lines.
366
+ no-space-check=trailing-comma,
367
+ dict-separator
368
+
369
+ # Allow the body of a class to be on the same line as the declaration if body
370
+ # contains single statement.
371
+ single-line-class-stmt=no
372
+
373
+ # Allow the body of an if to be on the same line as the test if there is no
374
+ # else.
375
+ single-line-if-stmt=no
376
+
377
+
378
+ [SIMILARITIES]
379
+
380
+ # Ignore comments when computing similarities.
381
+ ignore-comments=yes
382
+
383
+ # Ignore docstrings when computing similarities.
384
+ ignore-docstrings=yes
385
+
386
+ # Ignore imports when computing similarities.
387
+ ignore-imports=no
388
+
389
+ # Minimum lines number of a similarity.
390
+ min-similarity-lines=4
391
+
392
+
393
+ [BASIC]
394
+
395
+ # Naming style matching correct argument names.
396
+ argument-naming-style=snake_case
397
+
398
+ # Regular expression matching correct argument names. Overrides argument-
399
+ # naming-style.
400
+ argument-rgx=[a-z_][a-z0-9_]{0,30}$
401
+
402
+ # Naming style matching correct attribute names.
403
+ attr-naming-style=snake_case
404
+
405
+ # Regular expression matching correct attribute names. Overrides attr-naming-
406
+ # style.
407
+ #attr-rgx=
408
+
409
+ # Bad variable names which should always be refused, separated by a comma.
410
+ bad-names=
411
+
412
+ # Naming style matching correct class attribute names.
413
+ class-attribute-naming-style=any
414
+
415
+ # Regular expression matching correct class attribute names. Overrides class-
416
+ # attribute-naming-style.
417
+ #class-attribute-rgx=
418
+
419
+ # Naming style matching correct class names.
420
+ class-naming-style=PascalCase
421
+
422
+ # Regular expression matching correct class names. Overrides class-naming-
423
+ # style.
424
+ #class-rgx=
425
+
426
+ # Naming style matching correct constant names.
427
+ const-naming-style=UPPER_CASE
428
+
429
+ # Regular expression matching correct constant names. Overrides const-naming-
430
+ # style.
431
+ #const-rgx=
432
+
433
+ # Minimum line length for functions/classes that require docstrings, shorter
434
+ # ones are exempt.
435
+ docstring-min-length=-1
436
+
437
+ # Naming style matching correct function names.
438
+ function-naming-style=snake_case
439
+
440
+ # Regular expression matching correct function names. Overrides function-
441
+ # naming-style.
442
+ #function-rgx=
443
+
444
+ # Good variable names which should always be accepted, separated by a comma.
445
+ good-names=i,
446
+ j,
447
+ k,
448
+ x,
449
+ ex,
450
+ Run,
451
+ _
452
+
453
+ # Include a hint for the correct naming format with invalid-name.
454
+ include-naming-hint=no
455
+
456
+ # Naming style matching correct inline iteration names.
457
+ inlinevar-naming-style=any
458
+
459
+ # Regular expression matching correct inline iteration names. Overrides
460
+ # inlinevar-naming-style.
461
+ #inlinevar-rgx=
462
+
463
+ # Naming style matching correct method names.
464
+ method-naming-style=snake_case
465
+
466
+ # Regular expression matching correct method names. Overrides method-naming-
467
+ # style.
468
+ #method-rgx=
469
+
470
+ # Naming style matching correct module names.
471
+ module-naming-style=snake_case
472
+
473
+ # Regular expression matching correct module names. Overrides module-naming-
474
+ # style.
475
+ #module-rgx=
476
+
477
+ # Colon-delimited sets of names that determine each other's naming style when
478
+ # the name regexes allow several styles.
479
+ name-group=
480
+
481
+ # Regular expression which should only match function or class names that do
482
+ # not require a docstring.
483
+ no-docstring-rgx=^_
484
+
485
+ # List of decorators that produce properties, such as abc.abstractproperty. Add
486
+ # to this list to register other decorators that produce valid properties.
487
+ # These decorators are taken in consideration only for invalid-name.
488
+ property-classes=abc.abstractproperty
489
+
490
+ # Naming style matching correct variable names.
491
+ variable-naming-style=snake_case
492
+
493
+ # Regular expression matching correct variable names. Overrides variable-
494
+ # naming-style.
495
+ variable-rgx=[a-z_][a-z0-9_]{0,30}$
496
+
497
+
498
+ [STRING]
499
+
500
+ # This flag controls whether the implicit-str-concat-in-sequence should
501
+ # generate a warning on implicit string concatenation in sequences defined over
502
+ # several lines.
503
+ check-str-concat-over-line-jumps=no
504
+
505
+
506
+ [IMPORTS]
507
+
508
+ # Allow wildcard imports from modules that define __all__.
509
+ allow-wildcard-with-all=no
510
+
511
+ # Analyse import fallback blocks. This can be used to support both Python 2 and
512
+ # 3 compatible code, which means that the block might have code that exists
513
+ # only in one or another interpreter, leading to false positives when analysed.
514
+ analyse-fallback-blocks=no
515
+
516
+ # Deprecated modules which should not be used, separated by a comma.
517
+ deprecated-modules=optparse,tkinter.tix
518
+
519
+ # Create a graph of external dependencies in the given file (report RP0402 must
520
+ # not be disabled).
521
+ ext-import-graph=
522
+
523
+ # Create a graph of every (i.e. internal and external) dependencies in the
524
+ # given file (report RP0402 must not be disabled).
525
+ import-graph=
526
+
527
+ # Create a graph of internal dependencies in the given file (report RP0402 must
528
+ # not be disabled).
529
+ int-import-graph=
530
+
531
+ # Force import order to recognize a module as part of the standard
532
+ # compatibility libraries.
533
+ known-standard-library=
534
+
535
+ # Force import order to recognize a module as part of a third party library.
536
+ known-third-party=enchant
537
+
538
+
539
+ [CLASSES]
540
+
541
+ # List of method names used to declare (i.e. assign) instance attributes.
542
+ defining-attr-methods=__init__,
543
+ __new__,
544
+ setUp
545
+
546
+ # List of member names, which should be excluded from the protected access
547
+ # warning.
548
+ exclude-protected=_asdict,
549
+ _fields,
550
+ _replace,
551
+ _source,
552
+ _make
553
+
554
+ # List of valid names for the first argument in a class method.
555
+ valid-classmethod-first-arg=cls
556
+
557
+ # List of valid names for the first argument in a metaclass class method.
558
+ valid-metaclass-classmethod-first-arg=cls
559
+
560
+
561
+ [DESIGN]
562
+
563
+ # Maximum number of arguments for function / method.
564
+ max-args=5
565
+
566
+ # Maximum number of attributes for a class (see R0902).
567
+ max-attributes=7
568
+
569
+ # Maximum number of boolean expressions in an if statement.
570
+ max-bool-expr=5
571
+
572
+ # Maximum number of branch for function / method body.
573
+ max-branches=12
574
+
575
+ # Maximum number of locals for function / method body.
576
+ max-locals=15
577
+
578
+ # Maximum number of parents for a class (see R0901).
579
+ max-parents=15
580
+
581
+ # Maximum number of public methods for a class (see R0904).
582
+ max-public-methods=20
583
+
584
+ # Maximum number of return / yield for function / method body.
585
+ max-returns=6
586
+
587
+ # Maximum number of statements in function / method body.
588
+ max-statements=50
589
+
590
+ # Minimum number of public methods for a class (see R0903).
591
+ min-public-methods=2
592
+
593
+
594
+ [EXCEPTIONS]
595
+
596
+ # Exceptions that will emit a warning when being caught. Defaults to
597
+ # "BaseException, Exception".
598
+ overgeneral-exceptions=BaseException,
599
+ Exception
TTS/.readthedocs.yml ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # .readthedocs.yml
2
+ # Read the Docs configuration file
3
+ # See https://docs.readthedocs.io/en/stable/config-file/v2.html for details
4
+
5
+ # Required
6
+ version: 2
7
+
8
+ # Set the version of Python and other tools you might need
9
+ build:
10
+ os: ubuntu-22.04
11
+ tools:
12
+ python: "3.11"
13
+
14
+ # Optionally set the version of Python and requirements required to build your docs
15
+ python:
16
+ install:
17
+ - requirements: docs/requirements.txt
18
+ - requirements: requirements.txt
19
+
20
+ # Build documentation in the docs/ directory with Sphinx
21
+ sphinx:
22
+ builder: html
23
+ configuration: docs/source/conf.py
TTS/CITATION.cff ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ cff-version: 1.2.0
2
+ message: "If you want to cite 🐸💬, feel free to use this (but only if you loved it 😊)"
3
+ title: "Coqui TTS"
4
+ abstract: "A deep learning toolkit for Text-to-Speech, battle-tested in research and production"
5
+ date-released: 2021-01-01
6
+ authors:
7
+ - family-names: "Eren"
8
+ given-names: "Gölge"
9
+ - name: "The Coqui TTS Team"
10
+ version: 1.4
11
+ doi: 10.5281/zenodo.6334862
12
+ license: "MPL-2.0"
13
+ url: "https://www.coqui.ai"
14
+ repository-code: "https://github.com/coqui-ai/TTS"
15
+ keywords:
16
+ - machine learning
17
+ - deep learning
18
+ - artificial intelligence
19
+ - text to speech
20
+ - TTS
TTS/CODE_OF_CONDUCT.md ADDED
@@ -0,0 +1,133 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ # Contributor Covenant Code of Conduct
3
+
4
+ ## Our Pledge
5
+
6
+ We as members, contributors, and leaders pledge to make participation in our
7
+ community a harassment-free experience for everyone, regardless of age, body
8
+ size, visible or invisible disability, ethnicity, sex characteristics, gender
9
+ identity and expression, level of experience, education, socio-economic status,
10
+ nationality, personal appearance, race, caste, color, religion, or sexual identity
11
+ and orientation.
12
+
13
+ We pledge to act and interact in ways that contribute to an open, welcoming,
14
+ diverse, inclusive, and healthy community.
15
+
16
+ ## Our Standards
17
+
18
+ Examples of behavior that contributes to a positive environment for our
19
+ community include:
20
+
21
+ * Demonstrating empathy and kindness toward other people
22
+ * Being respectful of differing opinions, viewpoints, and experiences
23
+ * Giving and gracefully accepting constructive feedback
24
+ * Accepting responsibility and apologizing to those affected by our mistakes,
25
+ and learning from the experience
26
+ * Focusing on what is best not just for us as individuals, but for the
27
+ overall community
28
+
29
+ Examples of unacceptable behavior include:
30
+
31
+ * The use of sexualized language or imagery, and sexual attention or
32
+ advances of any kind
33
+ * Trolling, insulting or derogatory comments, and personal or political attacks
34
+ * Public or private harassment
35
+ * Publishing others' private information, such as a physical or email
36
+ address, without their explicit permission
37
+ * Other conduct which could reasonably be considered inappropriate in a
38
+ professional setting
39
+
40
+ ## Enforcement Responsibilities
41
+
42
+ Community leaders are responsible for clarifying and enforcing our standards of
43
+ acceptable behavior and will take appropriate and fair corrective action in
44
+ response to any behavior that they deem inappropriate, threatening, offensive,
45
+ or harmful.
46
+
47
+ Community leaders have the right and responsibility to remove, edit, or reject
48
+ comments, commits, code, wiki edits, issues, and other contributions that are
49
+ not aligned to this Code of Conduct, and will communicate reasons for moderation
50
+ decisions when appropriate.
51
+
52
+ ## Scope
53
+
54
+ This Code of Conduct applies within all community spaces, and also applies when
55
+ an individual is officially representing the community in public spaces.
56
+ Examples of representing our community include using an official e-mail address,
57
+ posting via an official social media account, or acting as an appointed
58
+ representative at an online or offline event.
59
+
60
+ ## Enforcement
61
+
62
+ Instances of abusive, harassing, or otherwise unacceptable behavior may be
63
+ reported to the community leaders responsible for enforcement at
64
+ coc-report@coqui.ai.
65
+ All complaints will be reviewed and investigated promptly and fairly.
66
+
67
+ All community leaders are obligated to respect the privacy and security of the
68
+ reporter of any incident.
69
+
70
+ ## Enforcement Guidelines
71
+
72
+ Community leaders will follow these Community Impact Guidelines in determining
73
+ the consequences for any action they deem in violation of this Code of Conduct:
74
+
75
+ ### 1. Correction
76
+
77
+ **Community Impact**: Use of inappropriate language or other behavior deemed
78
+ unprofessional or unwelcome in the community.
79
+
80
+ **Consequence**: A private, written warning from community leaders, providing
81
+ clarity around the nature of the violation and an explanation of why the
82
+ behavior was inappropriate. A public apology may be requested.
83
+
84
+ ### 2. Warning
85
+
86
+ **Community Impact**: A violation through a single incident or series
87
+ of actions.
88
+
89
+ **Consequence**: A warning with consequences for continued behavior. No
90
+ interaction with the people involved, including unsolicited interaction with
91
+ those enforcing the Code of Conduct, for a specified period of time. This
92
+ includes avoiding interactions in community spaces as well as external channels
93
+ like social media. Violating these terms may lead to a temporary or
94
+ permanent ban.
95
+
96
+ ### 3. Temporary Ban
97
+
98
+ **Community Impact**: A serious violation of community standards, including
99
+ sustained inappropriate behavior.
100
+
101
+ **Consequence**: A temporary ban from any sort of interaction or public
102
+ communication with the community for a specified period of time. No public or
103
+ private interaction with the people involved, including unsolicited interaction
104
+ with those enforcing the Code of Conduct, is allowed during this period.
105
+ Violating these terms may lead to a permanent ban.
106
+
107
+ ### 4. Permanent Ban
108
+
109
+ **Community Impact**: Demonstrating a pattern of violation of community
110
+ standards, including sustained inappropriate behavior, harassment of an
111
+ individual, or aggression toward or disparagement of classes of individuals.
112
+
113
+ **Consequence**: A permanent ban from any sort of public interaction within
114
+ the community.
115
+
116
+ ## Attribution
117
+
118
+ This Code of Conduct is adapted from the [Contributor Covenant][homepage],
119
+ version 2.0, available at
120
+ [https://www.contributor-covenant.org/version/2/0/code_of_conduct.html][v2.0].
121
+
122
+ Community Impact Guidelines were inspired by
123
+ [Mozilla's code of conduct enforcement ladder][Mozilla CoC].
124
+
125
+ For answers to common questions about this code of conduct, see the FAQ at
126
+ [https://www.contributor-covenant.org/faq][FAQ]. Translations are available
127
+ at [https://www.contributor-covenant.org/translations][translations].
128
+
129
+ [homepage]: https://www.contributor-covenant.org
130
+ [v2.0]: https://www.contributor-covenant.org/version/2/0/code_of_conduct.html
131
+ [Mozilla CoC]: https://github.com/mozilla/diversity
132
+ [FAQ]: https://www.contributor-covenant.org/faq
133
+ [translations]: https://www.contributor-covenant.org/translations
TTS/CODE_OWNERS.rst ADDED
@@ -0,0 +1,75 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ TTS code owners / governance system
2
+ ==========================================
3
+
4
+ TTS is run under a governance system inspired (and partially copied from) by the `Mozilla module ownership system <https://www.mozilla.org/about/governance/policies/module-ownership/>`_. The project is roughly divided into modules, and each module has its owners, which are responsible for reviewing pull requests and deciding on technical direction for their modules. Module ownership authority is given to people who have worked extensively on areas of the project.
5
+
6
+ Module owners also have the authority of naming other module owners or appointing module peers, which are people with authority to review pull requests in that module. They can also sub-divide their module into sub-modules with their owners.
7
+
8
+ Module owners are not tyrants. They are chartered to make decisions with input from the community and in the best interest of the community. Module owners are not required to make code changes or additions solely because the community wants them to do so. (Like anyone else, the module owners may write code because they want to, because their employers want them to, because the community wants them to, or for some other reason.) Module owners do need to pay attention to patches submitted to that module. However “pay attention” does not mean agreeing to every patch. Some patches may not make sense for the WebThings project; some may be poorly implemented. Module owners have the authority to decline a patch; this is a necessary part of the role. We ask the module owners to describe in the relevant issue their reasons for wanting changes to a patch, for declining it altogether, or for postponing review for some period. We don’t ask or expect them to rewrite patches to make them acceptable. Similarly, module owners may need to delay review of a promising patch due to an upcoming deadline. For example, a patch may be of interest, but not for the next milestone. In such a case it may make sense for the module owner to postpone review of a patch until after matters needed for a milestone have been finalized. Again, we expect this to be described in the relevant issue. And of course, it shouldn’t go on very often or for very long or escalation and review is likely.
9
+
10
+ The work of the various module owners and peers is overseen by the global owners, which are responsible for making final decisions in case there's conflict between owners as well as set the direction for the project as a whole.
11
+
12
+ This file describes module owners who are active on the project and which parts of the code they have expertise on (and interest in). If you're making changes to the code and are wondering who's an appropriate person to talk to, this list will tell you who to ping.
13
+
14
+ There's overlap in the areas of expertise of each owner, and in particular when looking at which files are covered by each area, there is a lot of overlap. Don't worry about getting it exactly right when requesting review, any code owner will be happy to redirect the request to a more appropriate person.
15
+
16
+ Global owners
17
+ ----------------
18
+
19
+ These are people who have worked on the project extensively and are familiar with all or most parts of it. Their expertise and review guidance is trusted by other code owners to cover their own areas of expertise. In case of conflicting opinions from other owners, global owners will make a final decision.
20
+
21
+ - Eren Gölge (@erogol)
22
+ - Reuben Morais (@reuben)
23
+
24
+ Training, feeding
25
+ -----------------
26
+
27
+ - Eren Gölge (@erogol)
28
+
29
+ Model exporting
30
+ ---------------
31
+
32
+ - Eren Gölge (@erogol)
33
+
34
+ Multi-Speaker TTS
35
+ -----------------
36
+
37
+ - Eren Gölge (@erogol)
38
+ - Edresson Casanova (@edresson)
39
+
40
+ TTS
41
+ ---
42
+
43
+ - Eren Gölge (@erogol)
44
+
45
+ Vocoders
46
+ --------
47
+
48
+ - Eren Gölge (@erogol)
49
+
50
+ Speaker Encoder
51
+ ---------------
52
+
53
+ - Eren Gölge (@erogol)
54
+
55
+ Testing & CI
56
+ ------------
57
+
58
+ - Eren Gölge (@erogol)
59
+ - Reuben Morais (@reuben)
60
+
61
+ Python bindings
62
+ ---------------
63
+
64
+ - Eren Gölge (@erogol)
65
+ - Reuben Morais (@reuben)
66
+
67
+ Documentation
68
+ -------------
69
+
70
+ - Eren Gölge (@erogol)
71
+
72
+ Third party bindings
73
+ --------------------
74
+
75
+ Owned by the author.
TTS/CONTRIBUTING.md ADDED
@@ -0,0 +1,162 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Contribution guidelines
2
+
3
+ Welcome to the 🐸TTS!
4
+
5
+ This repository is governed by [the Contributor Covenant Code of Conduct](https://github.com/coqui-ai/TTS/blob/main/CODE_OF_CONDUCT.md).
6
+
7
+ ## Where to start.
8
+ We welcome everyone who likes to contribute to 🐸TTS.
9
+
10
+ You can contribute not only with code but with bug reports, comments, questions, answers, or just a simple tweet to spread the word.
11
+
12
+ If you like to contribute code, squash a bug but if you don't know where to start, here are some pointers.
13
+
14
+ - [Development Road Map](https://github.com/coqui-ai/TTS/issues/378)
15
+
16
+ You can pick something out of our road map. We keep the progess of the project in this simple issue thread. It has new model proposals or developmental updates etc.
17
+
18
+ - [Github Issues Tracker](https://github.com/coqui-ai/TTS/issues)
19
+
20
+ This is a place to find feature requests, bugs.
21
+
22
+ Issues with the ```good first issue``` tag are good place for beginners to take on.
23
+
24
+ - ✨**PR**✨ [pages](https://github.com/coqui-ai/TTS/pulls) with the ```🚀new version``` tag.
25
+
26
+ We list all the target improvements for the next version. You can pick one of them and start contributing.
27
+
28
+ - Also feel free to suggest new features, ideas and models. We're always open for new things.
29
+
30
+ ## Call for sharing language models
31
+ If possible, please consider sharing your pre-trained models in any language (if the licences allow for you to do so). We will include them in our model catalogue for public use and give the proper attribution, whether it be your name, company, website or any other source specified.
32
+
33
+ This model can be shared in two ways:
34
+ 1. Share the model files with us and we serve them with the next 🐸 TTS release.
35
+ 2. Upload your models on GDrive and share the link.
36
+
37
+ Models are served under `.models.json` file and any model is available under TTS CLI or Server end points.
38
+
39
+ Either way you choose, please make sure you send the models [here](https://github.com/coqui-ai/TTS/discussions/930).
40
+
41
+ ## Sending a ✨**PR**✨
42
+
43
+ If you have a new feature, a model to implement, or a bug to squash, go ahead and send a ✨**PR**✨.
44
+ Please use the following steps to send a ✨**PR**✨.
45
+ Let us know if you encounter a problem along the way.
46
+
47
+ The following steps are tested on an Ubuntu system.
48
+
49
+ 1. Fork 🐸TTS[https://github.com/coqui-ai/TTS] by clicking the fork button at the top right corner of the project page.
50
+
51
+ 2. Clone 🐸TTS and add the main repo as a new remote named ```upstream```.
52
+
53
+ ```bash
54
+ $ git clone git@github.com:<your Github name>/TTS.git
55
+ $ cd TTS
56
+ $ git remote add upstream https://github.com/coqui-ai/TTS.git
57
+ ```
58
+
59
+ 3. Install 🐸TTS for development.
60
+
61
+ ```bash
62
+ $ make system-deps # intended to be used on Ubuntu (Debian). Let us know if you have a different OS.
63
+ $ make install
64
+ ```
65
+
66
+ 4. Create a new branch with an informative name for your goal.
67
+
68
+ ```bash
69
+ $ git checkout -b an_informative_name_for_my_branch
70
+ ```
71
+
72
+ 5. Implement your changes on your new branch.
73
+
74
+ 6. Explain your code using [Google Style](https://google.github.io/styleguide/pyguide.html#381-docstrings) docstrings.
75
+
76
+ 7. Add your tests to our test suite under ```tests``` folder. It is important to show that your code works, edge cases are considered, and inform others about the intended use.
77
+
78
+ 8. Run the tests to see how your updates work with the rest of the project. You can repeat this step multiple times as you implement your changes to make sure you are on the right direction.
79
+
80
+ ```bash
81
+ $ make test # stop at the first error
82
+ $ make test_all # run all the tests, report all the errors
83
+ ```
84
+
85
+ 9. Format your code. We use ```black``` for code and ```isort``` for ```import``` formatting.
86
+
87
+ ```bash
88
+ $ make style
89
+ ```
90
+
91
+ 10. Run the linter and correct the issues raised. We use ```pylint``` for linting. It helps to enforce a coding standard, offers simple refactoring suggestions.
92
+
93
+ ```bash
94
+ $ make lint
95
+ ```
96
+
97
+ 11. When things are good, add new files and commit your changes.
98
+
99
+ ```bash
100
+ $ git add my_file1.py my_file2.py ...
101
+ $ git commit
102
+ ```
103
+
104
+ It's a good practice to regularly sync your local copy of the project with the upstream code to keep up with the recent updates.
105
+
106
+ ```bash
107
+ $ git fetch upstream
108
+ $ git rebase upstream/master
109
+ # or for the development version
110
+ $ git rebase upstream/dev
111
+ ```
112
+
113
+ 12. Send a PR to ```dev``` branch.
114
+
115
+ Push your branch to your fork.
116
+
117
+ ```bash
118
+ $ git push -u origin an_informative_name_for_my_branch
119
+ ```
120
+
121
+ Then go to your fork's Github page and click on 'Pull request' to send your ✨**PR**✨.
122
+
123
+ Please set ✨**PR**✨'s target branch to ```dev``` as we use ```dev``` to work on the next version.
124
+
125
+ 13. Let's discuss until it is perfect. 💪
126
+
127
+ We might ask you for certain changes that would appear in the ✨**PR**✨'s page under 🐸TTS[https://github.com/coqui-ai/TTS/pulls].
128
+
129
+ 14. Once things look perfect, We merge it to the ```dev``` branch and make it ready for the next version.
130
+
131
+ ## Development in Docker container
132
+
133
+ If you prefer working within a Docker container as your development environment, you can do the following:
134
+
135
+ 1. Fork 🐸TTS[https://github.com/coqui-ai/TTS] by clicking the fork button at the top right corner of the project page.
136
+
137
+ 2. Clone 🐸TTS and add the main repo as a new remote named ```upsteam```.
138
+
139
+ ```bash
140
+ $ git clone git@github.com:<your Github name>/TTS.git
141
+ $ cd TTS
142
+ $ git remote add upstream https://github.com/coqui-ai/TTS.git
143
+ ```
144
+
145
+ 3. Build the Docker Image as your development environment (it installs all of the dependencies for you):
146
+
147
+ ```
148
+ docker build --tag=tts-dev:latest -f .\dockerfiles\Dockerfile.dev .
149
+ ```
150
+
151
+ 4. Run the container with GPU support:
152
+
153
+ ```
154
+ docker run -it --gpus all tts-dev:latest /bin/bash
155
+ ```
156
+
157
+ Feel free to ping us at any step you need help using our communication channels.
158
+
159
+ If you are new to Github or open-source contribution, These are good resources.
160
+
161
+ - [Github Docs](https://docs.github.com/en/github/collaborating-with-issues-and-pull-requests/proposing-changes-to-your-work-with-pull-requests)
162
+ - [First-Contribution](https://github.com/firstcontributions/first-contributions)
TTS/Dockerfile ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ARG BASE=nvidia/cuda:11.8.0-base-ubuntu22.04
2
+ FROM ${BASE}
3
+
4
+ RUN apt-get update && apt-get upgrade -y
5
+ RUN apt-get install -y --no-install-recommends gcc g++ make python3 python3-dev python3-pip python3-venv python3-wheel espeak-ng libsndfile1-dev && rm -rf /var/lib/apt/lists/*
6
+ RUN pip3 install llvmlite --ignore-installed
7
+
8
+ # Install Dependencies:
9
+ RUN pip3 install torch torchaudio --extra-index-url https://download.pytorch.org/whl/cu118
10
+ RUN rm -rf /root/.cache/pip
11
+
12
+ # Copy TTS repository contents:
13
+ WORKDIR /root
14
+ COPY . /root
15
+
16
+ RUN make install
17
+
18
+ ENTRYPOINT ["tts"]
19
+ CMD ["--help"]
TTS/LICENSE.txt ADDED
@@ -0,0 +1,373 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Mozilla Public License Version 2.0
2
+ ==================================
3
+
4
+ 1. Definitions
5
+ --------------
6
+
7
+ 1.1. "Contributor"
8
+ means each individual or legal entity that creates, contributes to
9
+ the creation of, or owns Covered Software.
10
+
11
+ 1.2. "Contributor Version"
12
+ means the combination of the Contributions of others (if any) used
13
+ by a Contributor and that particular Contributor's Contribution.
14
+
15
+ 1.3. "Contribution"
16
+ means Covered Software of a particular Contributor.
17
+
18
+ 1.4. "Covered Software"
19
+ means Source Code Form to which the initial Contributor has attached
20
+ the notice in Exhibit A, the Executable Form of such Source Code
21
+ Form, and Modifications of such Source Code Form, in each case
22
+ including portions thereof.
23
+
24
+ 1.5. "Incompatible With Secondary Licenses"
25
+ means
26
+
27
+ (a) that the initial Contributor has attached the notice described
28
+ in Exhibit B to the Covered Software; or
29
+
30
+ (b) that the Covered Software was made available under the terms of
31
+ version 1.1 or earlier of the License, but not also under the
32
+ terms of a Secondary License.
33
+
34
+ 1.6. "Executable Form"
35
+ means any form of the work other than Source Code Form.
36
+
37
+ 1.7. "Larger Work"
38
+ means a work that combines Covered Software with other material, in
39
+ a separate file or files, that is not Covered Software.
40
+
41
+ 1.8. "License"
42
+ means this document.
43
+
44
+ 1.9. "Licensable"
45
+ means having the right to grant, to the maximum extent possible,
46
+ whether at the time of the initial grant or subsequently, any and
47
+ all of the rights conveyed by this License.
48
+
49
+ 1.10. "Modifications"
50
+ means any of the following:
51
+
52
+ (a) any file in Source Code Form that results from an addition to,
53
+ deletion from, or modification of the contents of Covered
54
+ Software; or
55
+
56
+ (b) any new file in Source Code Form that contains any Covered
57
+ Software.
58
+
59
+ 1.11. "Patent Claims" of a Contributor
60
+ means any patent claim(s), including without limitation, method,
61
+ process, and apparatus claims, in any patent Licensable by such
62
+ Contributor that would be infringed, but for the grant of the
63
+ License, by the making, using, selling, offering for sale, having
64
+ made, import, or transfer of either its Contributions or its
65
+ Contributor Version.
66
+
67
+ 1.12. "Secondary License"
68
+ means either the GNU General Public License, Version 2.0, the GNU
69
+ Lesser General Public License, Version 2.1, the GNU Affero General
70
+ Public License, Version 3.0, or any later versions of those
71
+ licenses.
72
+
73
+ 1.13. "Source Code Form"
74
+ means the form of the work preferred for making modifications.
75
+
76
+ 1.14. "You" (or "Your")
77
+ means an individual or a legal entity exercising rights under this
78
+ License. For legal entities, "You" includes any entity that
79
+ controls, is controlled by, or is under common control with You. For
80
+ purposes of this definition, "control" means (a) the power, direct
81
+ or indirect, to cause the direction or management of such entity,
82
+ whether by contract or otherwise, or (b) ownership of more than
83
+ fifty percent (50%) of the outstanding shares or beneficial
84
+ ownership of such entity.
85
+
86
+ 2. License Grants and Conditions
87
+ --------------------------------
88
+
89
+ 2.1. Grants
90
+
91
+ Each Contributor hereby grants You a world-wide, royalty-free,
92
+ non-exclusive license:
93
+
94
+ (a) under intellectual property rights (other than patent or trademark)
95
+ Licensable by such Contributor to use, reproduce, make available,
96
+ modify, display, perform, distribute, and otherwise exploit its
97
+ Contributions, either on an unmodified basis, with Modifications, or
98
+ as part of a Larger Work; and
99
+
100
+ (b) under Patent Claims of such Contributor to make, use, sell, offer
101
+ for sale, have made, import, and otherwise transfer either its
102
+ Contributions or its Contributor Version.
103
+
104
+ 2.2. Effective Date
105
+
106
+ The licenses granted in Section 2.1 with respect to any Contribution
107
+ become effective for each Contribution on the date the Contributor first
108
+ distributes such Contribution.
109
+
110
+ 2.3. Limitations on Grant Scope
111
+
112
+ The licenses granted in this Section 2 are the only rights granted under
113
+ this License. No additional rights or licenses will be implied from the
114
+ distribution or licensing of Covered Software under this License.
115
+ Notwithstanding Section 2.1(b) above, no patent license is granted by a
116
+ Contributor:
117
+
118
+ (a) for any code that a Contributor has removed from Covered Software;
119
+ or
120
+
121
+ (b) for infringements caused by: (i) Your and any other third party's
122
+ modifications of Covered Software, or (ii) the combination of its
123
+ Contributions with other software (except as part of its Contributor
124
+ Version); or
125
+
126
+ (c) under Patent Claims infringed by Covered Software in the absence of
127
+ its Contributions.
128
+
129
+ This License does not grant any rights in the trademarks, service marks,
130
+ or logos of any Contributor (except as may be necessary to comply with
131
+ the notice requirements in Section 3.4).
132
+
133
+ 2.4. Subsequent Licenses
134
+
135
+ No Contributor makes additional grants as a result of Your choice to
136
+ distribute the Covered Software under a subsequent version of this
137
+ License (see Section 10.2) or under the terms of a Secondary License (if
138
+ permitted under the terms of Section 3.3).
139
+
140
+ 2.5. Representation
141
+
142
+ Each Contributor represents that the Contributor believes its
143
+ Contributions are its original creation(s) or it has sufficient rights
144
+ to grant the rights to its Contributions conveyed by this License.
145
+
146
+ 2.6. Fair Use
147
+
148
+ This License is not intended to limit any rights You have under
149
+ applicable copyright doctrines of fair use, fair dealing, or other
150
+ equivalents.
151
+
152
+ 2.7. Conditions
153
+
154
+ Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted
155
+ in Section 2.1.
156
+
157
+ 3. Responsibilities
158
+ -------------------
159
+
160
+ 3.1. Distribution of Source Form
161
+
162
+ All distribution of Covered Software in Source Code Form, including any
163
+ Modifications that You create or to which You contribute, must be under
164
+ the terms of this License. You must inform recipients that the Source
165
+ Code Form of the Covered Software is governed by the terms of this
166
+ License, and how they can obtain a copy of this License. You may not
167
+ attempt to alter or restrict the recipients' rights in the Source Code
168
+ Form.
169
+
170
+ 3.2. Distribution of Executable Form
171
+
172
+ If You distribute Covered Software in Executable Form then:
173
+
174
+ (a) such Covered Software must also be made available in Source Code
175
+ Form, as described in Section 3.1, and You must inform recipients of
176
+ the Executable Form how they can obtain a copy of such Source Code
177
+ Form by reasonable means in a timely manner, at a charge no more
178
+ than the cost of distribution to the recipient; and
179
+
180
+ (b) You may distribute such Executable Form under the terms of this
181
+ License, or sublicense it under different terms, provided that the
182
+ license for the Executable Form does not attempt to limit or alter
183
+ the recipients' rights in the Source Code Form under this License.
184
+
185
+ 3.3. Distribution of a Larger Work
186
+
187
+ You may create and distribute a Larger Work under terms of Your choice,
188
+ provided that You also comply with the requirements of this License for
189
+ the Covered Software. If the Larger Work is a combination of Covered
190
+ Software with a work governed by one or more Secondary Licenses, and the
191
+ Covered Software is not Incompatible With Secondary Licenses, this
192
+ License permits You to additionally distribute such Covered Software
193
+ under the terms of such Secondary License(s), so that the recipient of
194
+ the Larger Work may, at their option, further distribute the Covered
195
+ Software under the terms of either this License or such Secondary
196
+ License(s).
197
+
198
+ 3.4. Notices
199
+
200
+ You may not remove or alter the substance of any license notices
201
+ (including copyright notices, patent notices, disclaimers of warranty,
202
+ or limitations of liability) contained within the Source Code Form of
203
+ the Covered Software, except that You may alter any license notices to
204
+ the extent required to remedy known factual inaccuracies.
205
+
206
+ 3.5. Application of Additional Terms
207
+
208
+ You may choose to offer, and to charge a fee for, warranty, support,
209
+ indemnity or liability obligations to one or more recipients of Covered
210
+ Software. However, You may do so only on Your own behalf, and not on
211
+ behalf of any Contributor. You must make it absolutely clear that any
212
+ such warranty, support, indemnity, or liability obligation is offered by
213
+ You alone, and You hereby agree to indemnify every Contributor for any
214
+ liability incurred by such Contributor as a result of warranty, support,
215
+ indemnity or liability terms You offer. You may include additional
216
+ disclaimers of warranty and limitations of liability specific to any
217
+ jurisdiction.
218
+
219
+ 4. Inability to Comply Due to Statute or Regulation
220
+ ---------------------------------------------------
221
+
222
+ If it is impossible for You to comply with any of the terms of this
223
+ License with respect to some or all of the Covered Software due to
224
+ statute, judicial order, or regulation then You must: (a) comply with
225
+ the terms of this License to the maximum extent possible; and (b)
226
+ describe the limitations and the code they affect. Such description must
227
+ be placed in a text file included with all distributions of the Covered
228
+ Software under this License. Except to the extent prohibited by statute
229
+ or regulation, such description must be sufficiently detailed for a
230
+ recipient of ordinary skill to be able to understand it.
231
+
232
+ 5. Termination
233
+ --------------
234
+
235
+ 5.1. The rights granted under this License will terminate automatically
236
+ if You fail to comply with any of its terms. However, if You become
237
+ compliant, then the rights granted under this License from a particular
238
+ Contributor are reinstated (a) provisionally, unless and until such
239
+ Contributor explicitly and finally terminates Your grants, and (b) on an
240
+ ongoing basis, if such Contributor fails to notify You of the
241
+ non-compliance by some reasonable means prior to 60 days after You have
242
+ come back into compliance. Moreover, Your grants from a particular
243
+ Contributor are reinstated on an ongoing basis if such Contributor
244
+ notifies You of the non-compliance by some reasonable means, this is the
245
+ first time You have received notice of non-compliance with this License
246
+ from such Contributor, and You become compliant prior to 30 days after
247
+ Your receipt of the notice.
248
+
249
+ 5.2. If You initiate litigation against any entity by asserting a patent
250
+ infringement claim (excluding declaratory judgment actions,
251
+ counter-claims, and cross-claims) alleging that a Contributor Version
252
+ directly or indirectly infringes any patent, then the rights granted to
253
+ You by any and all Contributors for the Covered Software under Section
254
+ 2.1 of this License shall terminate.
255
+
256
+ 5.3. In the event of termination under Sections 5.1 or 5.2 above, all
257
+ end user license agreements (excluding distributors and resellers) which
258
+ have been validly granted by You or Your distributors under this License
259
+ prior to termination shall survive termination.
260
+
261
+ ************************************************************************
262
+ * *
263
+ * 6. Disclaimer of Warranty *
264
+ * ------------------------- *
265
+ * *
266
+ * Covered Software is provided under this License on an "as is" *
267
+ * basis, without warranty of any kind, either expressed, implied, or *
268
+ * statutory, including, without limitation, warranties that the *
269
+ * Covered Software is free of defects, merchantable, fit for a *
270
+ * particular purpose or non-infringing. The entire risk as to the *
271
+ * quality and performance of the Covered Software is with You. *
272
+ * Should any Covered Software prove defective in any respect, You *
273
+ * (not any Contributor) assume the cost of any necessary servicing, *
274
+ * repair, or correction. This disclaimer of warranty constitutes an *
275
+ * essential part of this License. No use of any Covered Software is *
276
+ * authorized under this License except under this disclaimer. *
277
+ * *
278
+ ************************************************************************
279
+
280
+ ************************************************************************
281
+ * *
282
+ * 7. Limitation of Liability *
283
+ * -------------------------- *
284
+ * *
285
+ * Under no circumstances and under no legal theory, whether tort *
286
+ * (including negligence), contract, or otherwise, shall any *
287
+ * Contributor, or anyone who distributes Covered Software as *
288
+ * permitted above, be liable to You for any direct, indirect, *
289
+ * special, incidental, or consequential damages of any character *
290
+ * including, without limitation, damages for lost profits, loss of *
291
+ * goodwill, work stoppage, computer failure or malfunction, or any *
292
+ * and all other commercial damages or losses, even if such party *
293
+ * shall have been informed of the possibility of such damages. This *
294
+ * limitation of liability shall not apply to liability for death or *
295
+ * personal injury resulting from such party's negligence to the *
296
+ * extent applicable law prohibits such limitation. Some *
297
+ * jurisdictions do not allow the exclusion or limitation of *
298
+ * incidental or consequential damages, so this exclusion and *
299
+ * limitation may not apply to You. *
300
+ * *
301
+ ************************************************************************
302
+
303
+ 8. Litigation
304
+ -------------
305
+
306
+ Any litigation relating to this License may be brought only in the
307
+ courts of a jurisdiction where the defendant maintains its principal
308
+ place of business and such litigation shall be governed by laws of that
309
+ jurisdiction, without reference to its conflict-of-law provisions.
310
+ Nothing in this Section shall prevent a party's ability to bring
311
+ cross-claims or counter-claims.
312
+
313
+ 9. Miscellaneous
314
+ ----------------
315
+
316
+ This License represents the complete agreement concerning the subject
317
+ matter hereof. If any provision of this License is held to be
318
+ unenforceable, such provision shall be reformed only to the extent
319
+ necessary to make it enforceable. Any law or regulation which provides
320
+ that the language of a contract shall be construed against the drafter
321
+ shall not be used to construe this License against a Contributor.
322
+
323
+ 10. Versions of the License
324
+ ---------------------------
325
+
326
+ 10.1. New Versions
327
+
328
+ Mozilla Foundation is the license steward. Except as provided in Section
329
+ 10.3, no one other than the license steward has the right to modify or
330
+ publish new versions of this License. Each version will be given a
331
+ distinguishing version number.
332
+
333
+ 10.2. Effect of New Versions
334
+
335
+ You may distribute the Covered Software under the terms of the version
336
+ of the License under which You originally received the Covered Software,
337
+ or under the terms of any subsequent version published by the license
338
+ steward.
339
+
340
+ 10.3. Modified Versions
341
+
342
+ If you create software not governed by this License, and you want to
343
+ create a new license for such software, you may create and use a
344
+ modified version of this License if you rename the license and remove
345
+ any references to the name of the license steward (except to note that
346
+ such modified license differs from this License).
347
+
348
+ 10.4. Distributing Source Code Form that is Incompatible With Secondary
349
+ Licenses
350
+
351
+ If You choose to distribute Source Code Form that is Incompatible With
352
+ Secondary Licenses under the terms of this version of the License, the
353
+ notice described in Exhibit B of this License must be attached.
354
+
355
+ Exhibit A - Source Code Form License Notice
356
+ -------------------------------------------
357
+
358
+ This Source Code Form is subject to the terms of the Mozilla Public
359
+ License, v. 2.0. If a copy of the MPL was not distributed with this
360
+ file, You can obtain one at http://mozilla.org/MPL/2.0/.
361
+
362
+ If it is not possible or desirable to put the notice in a particular
363
+ file, then You may include the notice in a location (such as a LICENSE
364
+ file in a relevant directory) where a recipient would be likely to look
365
+ for such a notice.
366
+
367
+ You may add additional accurate notices of copyright ownership.
368
+
369
+ Exhibit B - "Incompatible With Secondary Licenses" Notice
370
+ ---------------------------------------------------------
371
+
372
+ This Source Code Form is "Incompatible With Secondary Licenses", as
373
+ defined by the Mozilla Public License, v. 2.0.
TTS/MANIFEST.in ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ include README.md
2
+ include LICENSE.txt
3
+ include requirements.*.txt
4
+ include *.cff
5
+ include requirements.txt
6
+ include TTS/VERSION
7
+ recursive-include TTS *.json
8
+ recursive-include TTS *.html
9
+ recursive-include TTS *.png
10
+ recursive-include TTS *.md
11
+ recursive-include TTS *.py
12
+ recursive-include TTS *.pyx
13
+ recursive-include images *.png
14
+ recursive-exclude tests *
15
+ prune tests*
TTS/Makefile ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ .DEFAULT_GOAL := help
2
+ .PHONY: test system-deps dev-deps deps style lint install help docs
3
+
4
+ help:
5
+ @grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}'
6
+
7
+ target_dirs := tests TTS notebooks recipes
8
+
9
+ test_all: ## run tests and don't stop on an error.
10
+ nose2 --with-coverage --coverage TTS tests
11
+ ./run_bash_tests.sh
12
+
13
+ test: ## run tests.
14
+ nose2 -F -v -B --with-coverage --coverage TTS tests
15
+
16
+ test_vocoder: ## run vocoder tests.
17
+ nose2 -F -v -B --with-coverage --coverage TTS tests.vocoder_tests
18
+
19
+ test_tts: ## run tts tests.
20
+ nose2 -F -v -B --with-coverage --coverage TTS tests.tts_tests
21
+
22
+ test_tts2: ## run tts tests.
23
+ nose2 -F -v -B --with-coverage --coverage TTS tests.tts_tests2
24
+
25
+ test_xtts:
26
+ nose2 -F -v -B --with-coverage --coverage TTS tests.xtts_tests
27
+
28
+ test_aux: ## run aux tests.
29
+ nose2 -F -v -B --with-coverage --coverage TTS tests.aux_tests
30
+ ./run_bash_tests.sh
31
+
32
+ test_zoo: ## run zoo tests.
33
+ nose2 -F -v -B --with-coverage --coverage TTS tests.zoo_tests
34
+
35
+ inference_tests: ## run inference tests.
36
+ nose2 -F -v -B --with-coverage --coverage TTS tests.inference_tests
37
+
38
+ data_tests: ## run data tests.
39
+ nose2 -F -v -B --with-coverage --coverage TTS tests.data_tests
40
+
41
+ test_text: ## run text tests.
42
+ nose2 -F -v -B --with-coverage --coverage TTS tests.text_tests
43
+
44
+ test_failed: ## only run tests failed the last time.
45
+ nose2 -F -v -B --with-coverage --coverage TTS tests
46
+
47
+ style: ## update code style.
48
+ black ${target_dirs}
49
+ isort ${target_dirs}
50
+
51
+ lint: ## run pylint linter.
52
+ pylint ${target_dirs}
53
+ black ${target_dirs} --check
54
+ isort ${target_dirs} --check-only
55
+
56
+ system-deps: ## install linux system deps
57
+ sudo apt-get install -y libsndfile1-dev
58
+
59
+ dev-deps: ## install development deps
60
+ pip install -r requirements.dev.txt
61
+
62
+ doc-deps: ## install docs dependencies
63
+ pip install -r docs/requirements.txt
64
+
65
+ build-docs: ## build the docs
66
+ cd docs && make clean && make build
67
+
68
+ hub-deps: ## install deps for torch hub use
69
+ pip install -r requirements.hub.txt
70
+
71
+ deps: ## install 🐸 requirements.
72
+ pip install -r requirements.txt
73
+
74
+ install: ## install 🐸 TTS for development.
75
+ pip install -e .[all]
76
+
77
+ docs: ## build the docs
78
+ $(MAKE) -C docs clean && $(MAKE) -C docs html
TTS/README.md ADDED
@@ -0,0 +1,407 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ ## 🐸Coqui.ai News
3
+ - 📣 ⓍTTSv2 is here with 16 languages and better performance across the board.
4
+ - 📣 ⓍTTS fine-tuning code is out. Check the [example recipes](https://github.com/coqui-ai/TTS/tree/dev/recipes/ljspeech).
5
+ - 📣 ⓍTTS can now stream with <200ms latency.
6
+ - 📣 ⓍTTS, our production TTS model that can speak 13 languages, is released [Blog Post](https://coqui.ai/blog/tts/open_xtts), [Demo](https://huggingface.co/spaces/coqui/xtts), [Docs](https://tts.readthedocs.io/en/dev/models/xtts.html)
7
+ - 📣 [🐶Bark](https://github.com/suno-ai/bark) is now available for inference with unconstrained voice cloning. [Docs](https://tts.readthedocs.io/en/dev/models/bark.html)
8
+ - 📣 You can use [~1100 Fairseq models](https://github.com/facebookresearch/fairseq/tree/main/examples/mms) with 🐸TTS.
9
+ - 📣 🐸TTS now supports 🐢Tortoise with faster inference. [Docs](https://tts.readthedocs.io/en/dev/models/tortoise.html)
10
+
11
+ <div align="center">
12
+ <img src="https://static.scarf.sh/a.png?x-pxid=cf317fe7-2188-4721-bc01-124bb5d5dbb2" />
13
+
14
+ ## <img src="https://raw.githubusercontent.com/coqui-ai/TTS/main/images/coqui-log-green-TTS.png" height="56"/>
15
+
16
+
17
+ **🐸TTS is a library for advanced Text-to-Speech generation.**
18
+
19
+ 🚀 Pretrained models in +1100 languages.
20
+
21
+ 🛠️ Tools for training new models and fine-tuning existing models in any language.
22
+
23
+ 📚 Utilities for dataset analysis and curation.
24
+ ______________________________________________________________________
25
+
26
+ [![Discord](https://img.shields.io/discord/1037326658807533628?color=%239B59B6&label=chat%20on%20discord)](https://discord.gg/5eXr5seRrv)
27
+ [![License](<https://img.shields.io/badge/License-MPL%202.0-brightgreen.svg>)](https://opensource.org/licenses/MPL-2.0)
28
+ [![PyPI version](https://badge.fury.io/py/TTS.svg)](https://badge.fury.io/py/TTS)
29
+ [![Covenant](https://camo.githubusercontent.com/7d620efaa3eac1c5b060ece5d6aacfcc8b81a74a04d05cd0398689c01c4463bb/68747470733a2f2f696d672e736869656c64732e696f2f62616467652f436f6e7472696275746f72253230436f76656e616e742d76322e3025323061646f707465642d6666363962342e737667)](https://github.com/coqui-ai/TTS/blob/master/CODE_OF_CONDUCT.md)
30
+ [![Downloads](https://pepy.tech/badge/tts)](https://pepy.tech/project/tts)
31
+ [![DOI](https://zenodo.org/badge/265612440.svg)](https://zenodo.org/badge/latestdoi/265612440)
32
+
33
+ ![GithubActions](https://github.com/coqui-ai/TTS/actions/workflows/aux_tests.yml/badge.svg)
34
+ ![GithubActions](https://github.com/coqui-ai/TTS/actions/workflows/data_tests.yml/badge.svg)
35
+ ![GithubActions](https://github.com/coqui-ai/TTS/actions/workflows/docker.yaml/badge.svg)
36
+ ![GithubActions](https://github.com/coqui-ai/TTS/actions/workflows/inference_tests.yml/badge.svg)
37
+ ![GithubActions](https://github.com/coqui-ai/TTS/actions/workflows/style_check.yml/badge.svg)
38
+ ![GithubActions](https://github.com/coqui-ai/TTS/actions/workflows/text_tests.yml/badge.svg)
39
+ ![GithubActions](https://github.com/coqui-ai/TTS/actions/workflows/tts_tests.yml/badge.svg)
40
+ ![GithubActions](https://github.com/coqui-ai/TTS/actions/workflows/vocoder_tests.yml/badge.svg)
41
+ ![GithubActions](https://github.com/coqui-ai/TTS/actions/workflows/zoo_tests0.yml/badge.svg)
42
+ ![GithubActions](https://github.com/coqui-ai/TTS/actions/workflows/zoo_tests1.yml/badge.svg)
43
+ ![GithubActions](https://github.com/coqui-ai/TTS/actions/workflows/zoo_tests2.yml/badge.svg)
44
+ [![Docs](<https://readthedocs.org/projects/tts/badge/?version=latest&style=plastic>)](https://tts.readthedocs.io/en/latest/)
45
+
46
+ </div>
47
+
48
+ ______________________________________________________________________
49
+
50
+ ## 💬 Where to ask questions
51
+ Please use our dedicated channels for questions and discussion. Help is much more valuable if it's shared publicly so that more people can benefit from it.
52
+
53
+ | Type | Platforms |
54
+ | ------------------------------- | --------------------------------------- |
55
+ | 🚨 **Bug Reports** | [GitHub Issue Tracker] |
56
+ | 🎁 **Feature Requests & Ideas** | [GitHub Issue Tracker] |
57
+ | 👩‍💻 **Usage Questions** | [GitHub Discussions] |
58
+ | 🗯 **General Discussion** | [GitHub Discussions] or [Discord] |
59
+
60
+ [github issue tracker]: https://github.com/coqui-ai/tts/issues
61
+ [github discussions]: https://github.com/coqui-ai/TTS/discussions
62
+ [discord]: https://discord.gg/5eXr5seRrv
63
+ [Tutorials and Examples]: https://github.com/coqui-ai/TTS/wiki/TTS-Notebooks-and-Tutorials
64
+
65
+
66
+ ## 🔗 Links and Resources
67
+ | Type | Links |
68
+ | ------------------------------- | --------------------------------------- |
69
+ | 💼 **Documentation** | [ReadTheDocs](https://tts.readthedocs.io/en/latest/)
70
+ | 💾 **Installation** | [TTS/README.md](https://github.com/coqui-ai/TTS/tree/dev#installation)|
71
+ | 👩‍💻 **Contributing** | [CONTRIBUTING.md](https://github.com/coqui-ai/TTS/blob/main/CONTRIBUTING.md)|
72
+ | 📌 **Road Map** | [Main Development Plans](https://github.com/coqui-ai/TTS/issues/378)
73
+ | 🚀 **Released Models** | [TTS Releases](https://github.com/coqui-ai/TTS/releases) and [Experimental Models](https://github.com/coqui-ai/TTS/wiki/Experimental-Released-Models)|
74
+ | 📰 **Papers** | [TTS Papers](https://github.com/erogol/TTS-papers)|
75
+
76
+
77
+ ## 🥇 TTS Performance
78
+ <p align="center"><img src="https://raw.githubusercontent.com/coqui-ai/TTS/main/images/TTS-performance.png" width="800" /></p>
79
+
80
+ Underlined "TTS*" and "Judy*" are **internal** 🐸TTS models that are not released open-source. They are here to show the potential. Models prefixed with a dot (.Jofish .Abe and .Janice) are real human voices.
81
+
82
+ ## Features
83
+ - High-performance Deep Learning models for Text2Speech tasks.
84
+ - Text2Spec models (Tacotron, Tacotron2, Glow-TTS, SpeedySpeech).
85
+ - Speaker Encoder to compute speaker embeddings efficiently.
86
+ - Vocoder models (MelGAN, Multiband-MelGAN, GAN-TTS, ParallelWaveGAN, WaveGrad, WaveRNN)
87
+ - Fast and efficient model training.
88
+ - Detailed training logs on the terminal and Tensorboard.
89
+ - Support for Multi-speaker TTS.
90
+ - Efficient, flexible, lightweight but feature complete `Trainer API`.
91
+ - Released and ready-to-use models.
92
+ - Tools to curate Text2Speech datasets under```dataset_analysis```.
93
+ - Utilities to use and test your models.
94
+ - Modular (but not too much) code base enabling easy implementation of new ideas.
95
+
96
+ ## Model Implementations
97
+ ### Spectrogram models
98
+ - Tacotron: [paper](https://arxiv.org/abs/1703.10135)
99
+ - Tacotron2: [paper](https://arxiv.org/abs/1712.05884)
100
+ - Glow-TTS: [paper](https://arxiv.org/abs/2005.11129)
101
+ - Speedy-Speech: [paper](https://arxiv.org/abs/2008.03802)
102
+ - Align-TTS: [paper](https://arxiv.org/abs/2003.01950)
103
+ - FastPitch: [paper](https://arxiv.org/pdf/2006.06873.pdf)
104
+ - FastSpeech: [paper](https://arxiv.org/abs/1905.09263)
105
+ - FastSpeech2: [paper](https://arxiv.org/abs/2006.04558)
106
+ - SC-GlowTTS: [paper](https://arxiv.org/abs/2104.05557)
107
+ - Capacitron: [paper](https://arxiv.org/abs/1906.03402)
108
+ - OverFlow: [paper](https://arxiv.org/abs/2211.06892)
109
+ - Neural HMM TTS: [paper](https://arxiv.org/abs/2108.13320)
110
+ - Delightful TTS: [paper](https://arxiv.org/abs/2110.12612)
111
+
112
+ ### End-to-End Models
113
+ - ⓍTTS: [blog](https://coqui.ai/blog/tts/open_xtts)
114
+ - VITS: [paper](https://arxiv.org/pdf/2106.06103)
115
+ - 🐸 YourTTS: [paper](https://arxiv.org/abs/2112.02418)
116
+ - 🐢 Tortoise: [orig. repo](https://github.com/neonbjb/tortoise-tts)
117
+ - 🐶 Bark: [orig. repo](https://github.com/suno-ai/bark)
118
+
119
+ ### Attention Methods
120
+ - Guided Attention: [paper](https://arxiv.org/abs/1710.08969)
121
+ - Forward Backward Decoding: [paper](https://arxiv.org/abs/1907.09006)
122
+ - Graves Attention: [paper](https://arxiv.org/abs/1910.10288)
123
+ - Double Decoder Consistency: [blog](https://erogol.com/solving-attention-problems-of-tts-models-with-double-decoder-consistency/)
124
+ - Dynamic Convolutional Attention: [paper](https://arxiv.org/pdf/1910.10288.pdf)
125
+ - Alignment Network: [paper](https://arxiv.org/abs/2108.10447)
126
+
127
+ ### Speaker Encoder
128
+ - GE2E: [paper](https://arxiv.org/abs/1710.10467)
129
+ - Angular Loss: [paper](https://arxiv.org/pdf/2003.11982.pdf)
130
+
131
+ ### Vocoders
132
+ - MelGAN: [paper](https://arxiv.org/abs/1910.06711)
133
+ - MultiBandMelGAN: [paper](https://arxiv.org/abs/2005.05106)
134
+ - ParallelWaveGAN: [paper](https://arxiv.org/abs/1910.11480)
135
+ - GAN-TTS discriminators: [paper](https://arxiv.org/abs/1909.11646)
136
+ - WaveRNN: [origin](https://github.com/fatchord/WaveRNN/)
137
+ - WaveGrad: [paper](https://arxiv.org/abs/2009.00713)
138
+ - HiFiGAN: [paper](https://arxiv.org/abs/2010.05646)
139
+ - UnivNet: [paper](https://arxiv.org/abs/2106.07889)
140
+
141
+ ### Voice Conversion
142
+ - FreeVC: [paper](https://arxiv.org/abs/2210.15418)
143
+
144
+ You can also help us implement more models.
145
+
146
+ ## Installation
147
+ 🐸TTS is tested on Ubuntu 18.04 with **python >= 3.9, < 3.12.**.
148
+
149
+ If you are only interested in [synthesizing speech](https://tts.readthedocs.io/en/latest/inference.html) with the released 🐸TTS models, installing from PyPI is the easiest option.
150
+
151
+ ```bash
152
+ pip install TTS
153
+ ```
154
+
155
+ If you plan to code or train models, clone 🐸TTS and install it locally.
156
+
157
+ ```bash
158
+ git clone https://github.com/coqui-ai/TTS
159
+ pip install -e .[all,dev,notebooks] # Select the relevant extras
160
+ ```
161
+
162
+ If you are on Ubuntu (Debian), you can also run following commands for installation.
163
+
164
+ ```bash
165
+ $ make system-deps # intended to be used on Ubuntu (Debian). Let us know if you have a different OS.
166
+ $ make install
167
+ ```
168
+
169
+ If you are on Windows, 👑@GuyPaddock wrote installation instructions [here](https://stackoverflow.com/questions/66726331/how-can-i-run-mozilla-tts-coqui-tts-training-with-cuda-on-a-windows-system).
170
+
171
+
172
+ ## Docker Image
173
+ You can also try TTS without install with the docker image.
174
+ Simply run the following command and you will be able to run TTS without installing it.
175
+
176
+ ```bash
177
+ docker run --rm -it -p 5002:5002 --entrypoint /bin/bash ghcr.io/coqui-ai/tts-cpu
178
+ python3 TTS/server/server.py --list_models #To get the list of available models
179
+ python3 TTS/server/server.py --model_name tts_models/en/vctk/vits # To start a server
180
+ ```
181
+
182
+ You can then enjoy the TTS server [here](http://[::1]:5002/)
183
+ More details about the docker images (like GPU support) can be found [here](https://tts.readthedocs.io/en/latest/docker_images.html)
184
+
185
+
186
+ ## Synthesizing speech by 🐸TTS
187
+
188
+ ### 🐍 Python API
189
+
190
+ #### Running a multi-speaker and multi-lingual model
191
+
192
+ ```python
193
+ import torch
194
+ from TTS.api import TTS
195
+
196
+ # Get device
197
+ device = "cuda" if torch.cuda.is_available() else "cpu"
198
+
199
+ # List available 🐸TTS models
200
+ print(TTS().list_models())
201
+
202
+ # Init TTS
203
+ tts = TTS("tts_models/multilingual/multi-dataset/xtts_v2").to(device)
204
+
205
+ # Run TTS
206
+ # ❗ Since this model is multi-lingual voice cloning model, we must set the target speaker_wav and language
207
+ # Text to speech list of amplitude values as output
208
+ wav = tts.tts(text="Hello world!", speaker_wav="my/cloning/audio.wav", language="en")
209
+ # Text to speech to a file
210
+ tts.tts_to_file(text="Hello world!", speaker_wav="my/cloning/audio.wav", language="en", file_path="output.wav")
211
+ ```
212
+
213
+ #### Running a single speaker model
214
+
215
+ ```python
216
+ # Init TTS with the target model name
217
+ tts = TTS(model_name="tts_models/de/thorsten/tacotron2-DDC", progress_bar=False).to(device)
218
+
219
+ # Run TTS
220
+ tts.tts_to_file(text="Ich bin eine Testnachricht.", file_path=OUTPUT_PATH)
221
+
222
+ # Example voice cloning with YourTTS in English, French and Portuguese
223
+ tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False).to(device)
224
+ tts.tts_to_file("This is voice cloning.", speaker_wav="my/cloning/audio.wav", language="en", file_path="output.wav")
225
+ tts.tts_to_file("C'est le clonage de la voix.", speaker_wav="my/cloning/audio.wav", language="fr-fr", file_path="output.wav")
226
+ tts.tts_to_file("Isso é clonagem de voz.", speaker_wav="my/cloning/audio.wav", language="pt-br", file_path="output.wav")
227
+ ```
228
+
229
+ #### Example voice conversion
230
+
231
+ Converting the voice in `source_wav` to the voice of `target_wav`
232
+
233
+ ```python
234
+ tts = TTS(model_name="voice_conversion_models/multilingual/vctk/freevc24", progress_bar=False).to("cuda")
235
+ tts.voice_conversion_to_file(source_wav="my/source.wav", target_wav="my/target.wav", file_path="output.wav")
236
+ ```
237
+
238
+ #### Example voice cloning together with the voice conversion model.
239
+ This way, you can clone voices by using any model in 🐸TTS.
240
+
241
+ ```python
242
+
243
+ tts = TTS("tts_models/de/thorsten/tacotron2-DDC")
244
+ tts.tts_with_vc_to_file(
245
+ "Wie sage ich auf Italienisch, dass ich dich liebe?",
246
+ speaker_wav="target/speaker.wav",
247
+ file_path="output.wav"
248
+ )
249
+ ```
250
+
251
+ #### Example text to speech using **Fairseq models in ~1100 languages** 🤯.
252
+ For Fairseq models, use the following name format: `tts_models/<lang-iso_code>/fairseq/vits`.
253
+ You can find the language ISO codes [here](https://dl.fbaipublicfiles.com/mms/tts/all-tts-languages.html)
254
+ and learn about the Fairseq models [here](https://github.com/facebookresearch/fairseq/tree/main/examples/mms).
255
+
256
+ ```python
257
+ # TTS with on the fly voice conversion
258
+ api = TTS("tts_models/deu/fairseq/vits")
259
+ api.tts_with_vc_to_file(
260
+ "Wie sage ich auf Italienisch, dass ich dich liebe?",
261
+ speaker_wav="target/speaker.wav",
262
+ file_path="output.wav"
263
+ )
264
+ ```
265
+
266
+ ### Command-line `tts`
267
+
268
+ <!-- begin-tts-readme -->
269
+
270
+ Synthesize speech on command line.
271
+
272
+ You can either use your trained model or choose a model from the provided list.
273
+
274
+ If you don't specify any models, then it uses LJSpeech based English model.
275
+
276
+ #### Single Speaker Models
277
+
278
+ - List provided models:
279
+
280
+ ```
281
+ $ tts --list_models
282
+ ```
283
+
284
+ - Get model info (for both tts_models and vocoder_models):
285
+
286
+ - Query by type/name:
287
+ The model_info_by_name uses the name as it from the --list_models.
288
+ ```
289
+ $ tts --model_info_by_name "<model_type>/<language>/<dataset>/<model_name>"
290
+ ```
291
+ For example:
292
+ ```
293
+ $ tts --model_info_by_name tts_models/tr/common-voice/glow-tts
294
+ $ tts --model_info_by_name vocoder_models/en/ljspeech/hifigan_v2
295
+ ```
296
+ - Query by type/idx:
297
+ The model_query_idx uses the corresponding idx from --list_models.
298
+
299
+ ```
300
+ $ tts --model_info_by_idx "<model_type>/<model_query_idx>"
301
+ ```
302
+
303
+ For example:
304
+
305
+ ```
306
+ $ tts --model_info_by_idx tts_models/3
307
+ ```
308
+
309
+ - Query info for model info by full name:
310
+ ```
311
+ $ tts --model_info_by_name "<model_type>/<language>/<dataset>/<model_name>"
312
+ ```
313
+
314
+ - Run TTS with default models:
315
+
316
+ ```
317
+ $ tts --text "Text for TTS" --out_path output/path/speech.wav
318
+ ```
319
+
320
+ - Run TTS and pipe out the generated TTS wav file data:
321
+
322
+ ```
323
+ $ tts --text "Text for TTS" --pipe_out --out_path output/path/speech.wav | aplay
324
+ ```
325
+
326
+ - Run a TTS model with its default vocoder model:
327
+
328
+ ```
329
+ $ tts --text "Text for TTS" --model_name "<model_type>/<language>/<dataset>/<model_name>" --out_path output/path/speech.wav
330
+ ```
331
+
332
+ For example:
333
+
334
+ ```
335
+ $ tts --text "Text for TTS" --model_name "tts_models/en/ljspeech/glow-tts" --out_path output/path/speech.wav
336
+ ```
337
+
338
+ - Run with specific TTS and vocoder models from the list:
339
+
340
+ ```
341
+ $ tts --text "Text for TTS" --model_name "<model_type>/<language>/<dataset>/<model_name>" --vocoder_name "<model_type>/<language>/<dataset>/<model_name>" --out_path output/path/speech.wav
342
+ ```
343
+
344
+ For example:
345
+
346
+ ```
347
+ $ tts --text "Text for TTS" --model_name "tts_models/en/ljspeech/glow-tts" --vocoder_name "vocoder_models/en/ljspeech/univnet" --out_path output/path/speech.wav
348
+ ```
349
+
350
+ - Run your own TTS model (Using Griffin-Lim Vocoder):
351
+
352
+ ```
353
+ $ tts --text "Text for TTS" --model_path path/to/model.pth --config_path path/to/config.json --out_path output/path/speech.wav
354
+ ```
355
+
356
+ - Run your own TTS and Vocoder models:
357
+
358
+ ```
359
+ $ tts --text "Text for TTS" --model_path path/to/model.pth --config_path path/to/config.json --out_path output/path/speech.wav
360
+ --vocoder_path path/to/vocoder.pth --vocoder_config_path path/to/vocoder_config.json
361
+ ```
362
+
363
+ #### Multi-speaker Models
364
+
365
+ - List the available speakers and choose a <speaker_id> among them:
366
+
367
+ ```
368
+ $ tts --model_name "<language>/<dataset>/<model_name>" --list_speaker_idxs
369
+ ```
370
+
371
+ - Run the multi-speaker TTS model with the target speaker ID:
372
+
373
+ ```
374
+ $ tts --text "Text for TTS." --out_path output/path/speech.wav --model_name "<language>/<dataset>/<model_name>" --speaker_idx <speaker_id>
375
+ ```
376
+
377
+ - Run your own multi-speaker TTS model:
378
+
379
+ ```
380
+ $ tts --text "Text for TTS" --out_path output/path/speech.wav --model_path path/to/model.pth --config_path path/to/config.json --speakers_file_path path/to/speaker.json --speaker_idx <speaker_id>
381
+ ```
382
+
383
+ ### Voice Conversion Models
384
+
385
+ ```
386
+ $ tts --out_path output/path/speech.wav --model_name "<language>/<dataset>/<model_name>" --source_wav <path/to/speaker/wav> --target_wav <path/to/reference/wav>
387
+ ```
388
+
389
+ <!-- end-tts-readme -->
390
+
391
+ ## Directory Structure
392
+ ```
393
+ |- notebooks/ (Jupyter Notebooks for model evaluation, parameter selection and data analysis.)
394
+ |- utils/ (common utilities.)
395
+ |- TTS
396
+ |- bin/ (folder for all the executables.)
397
+ |- train*.py (train your target model.)
398
+ |- ...
399
+ |- tts/ (text to speech models)
400
+ |- layers/ (model layer definitions)
401
+ |- models/ (model definitions)
402
+ |- utils/ (model specific utilities.)
403
+ |- speaker_encoder/ (Speaker Encoder models.)
404
+ |- (same)
405
+ |- vocoder/ (Vocoder models.)
406
+ |- (same)
407
+ ```
TTS/TTS/.models.json ADDED
@@ -0,0 +1,938 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "tts_models": {
3
+ "multilingual": {
4
+ "multi-dataset": {
5
+ "xtts_v2": {
6
+ "description": "XTTS-v2.0.3 by Coqui with 17 languages.",
7
+ "hf_url": [
8
+ "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/model.pth",
9
+ "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/config.json",
10
+ "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/vocab.json",
11
+ "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/hash.md5",
12
+ "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v2/main/speakers_xtts.pth"
13
+ ],
14
+ "model_hash": "10f92b55c512af7a8d39d650547a15a7",
15
+ "default_vocoder": null,
16
+ "commit": "480a6cdf7",
17
+ "license": "CPML",
18
+ "contact": "info@coqui.ai",
19
+ "tos_required": true
20
+ },
21
+ "xtts_v1.1": {
22
+ "description": "XTTS-v1.1 by Coqui with 14 languages, cross-language voice cloning and reference leak fixed.",
23
+ "hf_url": [
24
+ "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.2/model.pth",
25
+ "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.2/config.json",
26
+ "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.2/vocab.json",
27
+ "https://coqui.gateway.scarf.sh/hf-coqui/XTTS-v1/v1.1.2/hash.md5"
28
+ ],
29
+ "model_hash": "7c62beaf58d39b729de287330dc254e7b515677416839b649a50e7cf74c3df59",
30
+ "default_vocoder": null,
31
+ "commit": "82910a63",
32
+ "license": "CPML",
33
+ "contact": "info@coqui.ai",
34
+ "tos_required": true
35
+ },
36
+ "your_tts": {
37
+ "description": "Your TTS model accompanying the paper https://arxiv.org/abs/2112.02418",
38
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.10.1_models/tts_models--multilingual--multi-dataset--your_tts.zip",
39
+ "default_vocoder": null,
40
+ "commit": "e9a1953e",
41
+ "license": "CC BY-NC-ND 4.0",
42
+ "contact": "egolge@coqui.ai"
43
+ },
44
+ "bark": {
45
+ "description": "🐶 Bark TTS model released by suno-ai. You can find the original implementation in https://github.com/suno-ai/bark.",
46
+ "hf_url": [
47
+ "https://coqui.gateway.scarf.sh/hf/bark/coarse_2.pt",
48
+ "https://coqui.gateway.scarf.sh/hf/bark/fine_2.pt",
49
+ "https://coqui.gateway.scarf.sh/hf/text_2.pt",
50
+ "https://coqui.gateway.scarf.sh/hf/bark/config.json",
51
+ "https://coqui.gateway.scarf.sh/hf/bark/hubert.pt",
52
+ "https://coqui.gateway.scarf.sh/hf/bark/tokenizer.pth"
53
+ ],
54
+ "default_vocoder": null,
55
+ "commit": "e9a1953e",
56
+ "license": "MIT",
57
+ "contact": "https://www.suno.ai/"
58
+ }
59
+ }
60
+ },
61
+ "bg": {
62
+ "cv": {
63
+ "vits": {
64
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--bg--cv--vits.zip",
65
+ "default_vocoder": null,
66
+ "commit": null,
67
+ "author": "@NeonGeckoCom",
68
+ "license": "bsd-3-clause"
69
+ }
70
+ }
71
+ },
72
+ "cs": {
73
+ "cv": {
74
+ "vits": {
75
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--cs--cv--vits.zip",
76
+ "default_vocoder": null,
77
+ "commit": null,
78
+ "author": "@NeonGeckoCom",
79
+ "license": "bsd-3-clause"
80
+ }
81
+ }
82
+ },
83
+ "da": {
84
+ "cv": {
85
+ "vits": {
86
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--da--cv--vits.zip",
87
+ "default_vocoder": null,
88
+ "commit": null,
89
+ "author": "@NeonGeckoCom",
90
+ "license": "bsd-3-clause"
91
+ }
92
+ }
93
+ },
94
+ "et": {
95
+ "cv": {
96
+ "vits": {
97
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--et--cv--vits.zip",
98
+ "default_vocoder": null,
99
+ "commit": null,
100
+ "author": "@NeonGeckoCom",
101
+ "license": "bsd-3-clause"
102
+ }
103
+ }
104
+ },
105
+ "ga": {
106
+ "cv": {
107
+ "vits": {
108
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--ga--cv--vits.zip",
109
+ "default_vocoder": null,
110
+ "commit": null,
111
+ "author": "@NeonGeckoCom",
112
+ "license": "bsd-3-clause"
113
+ }
114
+ }
115
+ },
116
+ "en": {
117
+ "ek1": {
118
+ "tacotron2": {
119
+ "description": "EK1 en-rp tacotron2 by NMStoker",
120
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ek1--tacotron2.zip",
121
+ "default_vocoder": "vocoder_models/en/ek1/wavegrad",
122
+ "commit": "c802255",
123
+ "license": "apache 2.0"
124
+ }
125
+ },
126
+ "ljspeech": {
127
+ "tacotron2-DDC": {
128
+ "description": "Tacotron2 with Double Decoder Consistency.",
129
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--tacotron2-DDC.zip",
130
+ "default_vocoder": "vocoder_models/en/ljspeech/hifigan_v2",
131
+ "commit": "bae2ad0f",
132
+ "author": "Eren Gölge @erogol",
133
+ "license": "apache 2.0",
134
+ "contact": "egolge@coqui.com"
135
+ },
136
+ "tacotron2-DDC_ph": {
137
+ "description": "Tacotron2 with Double Decoder Consistency with phonemes.",
138
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--tacotron2-DDC_ph.zip",
139
+ "default_vocoder": "vocoder_models/en/ljspeech/univnet",
140
+ "commit": "3900448",
141
+ "author": "Eren Gölge @erogol",
142
+ "license": "apache 2.0",
143
+ "contact": "egolge@coqui.com"
144
+ },
145
+ "glow-tts": {
146
+ "description": "",
147
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--glow-tts.zip",
148
+ "stats_file": null,
149
+ "default_vocoder": "vocoder_models/en/ljspeech/multiband-melgan",
150
+ "commit": "",
151
+ "author": "Eren Gölge @erogol",
152
+ "license": "MPL",
153
+ "contact": "egolge@coqui.com"
154
+ },
155
+ "speedy-speech": {
156
+ "description": "Speedy Speech model trained on LJSpeech dataset using the Alignment Network for learning the durations.",
157
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--speedy-speech.zip",
158
+ "stats_file": null,
159
+ "default_vocoder": "vocoder_models/en/ljspeech/hifigan_v2",
160
+ "commit": "4581e3d",
161
+ "author": "Eren Gölge @erogol",
162
+ "license": "apache 2.0",
163
+ "contact": "egolge@coqui.com"
164
+ },
165
+ "tacotron2-DCA": {
166
+ "description": "",
167
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--tacotron2-DCA.zip",
168
+ "default_vocoder": "vocoder_models/en/ljspeech/multiband-melgan",
169
+ "commit": "",
170
+ "author": "Eren Gölge @erogol",
171
+ "license": "MPL",
172
+ "contact": "egolge@coqui.com"
173
+ },
174
+ "vits": {
175
+ "description": "VITS is an End2End TTS model trained on LJSpeech dataset with phonemes.",
176
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--vits.zip",
177
+ "default_vocoder": null,
178
+ "commit": "3900448",
179
+ "author": "Eren Gölge @erogol",
180
+ "license": "apache 2.0",
181
+ "contact": "egolge@coqui.com"
182
+ },
183
+ "vits--neon": {
184
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--en--ljspeech--vits.zip",
185
+ "default_vocoder": null,
186
+ "author": "@NeonGeckoCom",
187
+ "license": "bsd-3-clause",
188
+ "contact": null,
189
+ "commit": null
190
+ },
191
+ "fast_pitch": {
192
+ "description": "FastPitch model trained on LJSpeech using the Aligner Network",
193
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--ljspeech--fast_pitch.zip",
194
+ "default_vocoder": "vocoder_models/en/ljspeech/hifigan_v2",
195
+ "commit": "b27b3ba",
196
+ "author": "Eren Gölge @erogol",
197
+ "license": "apache 2.0",
198
+ "contact": "egolge@coqui.com"
199
+ },
200
+ "overflow": {
201
+ "description": "Overflow model trained on LJSpeech",
202
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.10.0_models/tts_models--en--ljspeech--overflow.zip",
203
+ "default_vocoder": "vocoder_models/en/ljspeech/hifigan_v2",
204
+ "commit": "3b1a28f",
205
+ "author": "Eren Gölge @erogol",
206
+ "license": "apache 2.0",
207
+ "contact": "egolge@coqui.ai"
208
+ },
209
+ "neural_hmm": {
210
+ "description": "Neural HMM model trained on LJSpeech",
211
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.11.0_models/tts_models--en--ljspeech--neural_hmm.zip",
212
+ "default_vocoder": "vocoder_models/en/ljspeech/hifigan_v2",
213
+ "commit": "3b1a28f",
214
+ "author": "Shivam Metha @shivammehta25",
215
+ "license": "apache 2.0",
216
+ "contact": "d83ee8fe45e3c0d776d4a865aca21d7c2ac324c4"
217
+ }
218
+ },
219
+ "vctk": {
220
+ "vits": {
221
+ "description": "VITS End2End TTS model trained on VCTK dataset with 109 different speakers with EN accent.",
222
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--vctk--vits.zip",
223
+ "default_vocoder": null,
224
+ "commit": "3900448",
225
+ "author": "Eren @erogol",
226
+ "license": "apache 2.0",
227
+ "contact": "egolge@coqui.ai"
228
+ },
229
+ "fast_pitch": {
230
+ "description": "FastPitch model trained on VCTK dataseset.",
231
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--vctk--fast_pitch.zip",
232
+ "default_vocoder": null,
233
+ "commit": "bdab788d",
234
+ "author": "Eren @erogol",
235
+ "license": "CC BY-NC-ND 4.0",
236
+ "contact": "egolge@coqui.ai"
237
+ }
238
+ },
239
+ "sam": {
240
+ "tacotron-DDC": {
241
+ "description": "Tacotron2 with Double Decoder Consistency trained with Aceenture's Sam dataset.",
242
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--en--sam--tacotron-DDC.zip",
243
+ "default_vocoder": "vocoder_models/en/sam/hifigan_v2",
244
+ "commit": "bae2ad0f",
245
+ "author": "Eren Gölge @erogol",
246
+ "license": "apache 2.0",
247
+ "contact": "egolge@coqui.com"
248
+ }
249
+ },
250
+ "blizzard2013": {
251
+ "capacitron-t2-c50": {
252
+ "description": "Capacitron additions to Tacotron 2 with Capacity at 50 as in https://arxiv.org/pdf/1906.03402.pdf",
253
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.7.0_models/tts_models--en--blizzard2013--capacitron-t2-c50.zip",
254
+ "commit": "d6284e7",
255
+ "default_vocoder": "vocoder_models/en/blizzard2013/hifigan_v2",
256
+ "author": "Adam Froghyar @a-froghyar",
257
+ "license": "apache 2.0",
258
+ "contact": "adamfroghyar@gmail.com"
259
+ },
260
+ "capacitron-t2-c150_v2": {
261
+ "description": "Capacitron additions to Tacotron 2 with Capacity at 150 as in https://arxiv.org/pdf/1906.03402.pdf",
262
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.7.1_models/tts_models--en--blizzard2013--capacitron-t2-c150_v2.zip",
263
+ "commit": "a67039d",
264
+ "default_vocoder": "vocoder_models/en/blizzard2013/hifigan_v2",
265
+ "author": "Adam Froghyar @a-froghyar",
266
+ "license": "apache 2.0",
267
+ "contact": "adamfroghyar@gmail.com"
268
+ }
269
+ },
270
+ "multi-dataset": {
271
+ "tortoise-v2": {
272
+ "description": "Tortoise tts model https://github.com/neonbjb/tortoise-tts",
273
+ "github_rls_url": [
274
+ "https://coqui.gateway.scarf.sh/v0.14.1_models/autoregressive.pth",
275
+ "https://coqui.gateway.scarf.sh/v0.14.1_models/clvp2.pth",
276
+ "https://coqui.gateway.scarf.sh/v0.14.1_models/cvvp.pth",
277
+ "https://coqui.gateway.scarf.sh/v0.14.1_models/diffusion_decoder.pth",
278
+ "https://coqui.gateway.scarf.sh/v0.14.1_models/rlg_auto.pth",
279
+ "https://coqui.gateway.scarf.sh/v0.14.1_models/rlg_diffuser.pth",
280
+ "https://coqui.gateway.scarf.sh/v0.14.1_models/vocoder.pth",
281
+ "https://coqui.gateway.scarf.sh/v0.14.1_models/mel_norms.pth",
282
+ "https://coqui.gateway.scarf.sh/v0.14.1_models/config.json"
283
+ ],
284
+ "commit": "c1875f6",
285
+ "default_vocoder": null,
286
+ "author": "@neonbjb - James Betker, @manmay-nakhashi Manmay Nakhashi",
287
+ "license": "apache 2.0"
288
+ }
289
+ },
290
+ "jenny": {
291
+ "jenny": {
292
+ "description": "VITS model trained with Jenny(Dioco) dataset. Named as Jenny as demanded by the license. Original URL for the model https://www.kaggle.com/datasets/noml4u/tts-models--en--jenny-dioco--vits",
293
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.14.0_models/tts_models--en--jenny--jenny.zip",
294
+ "default_vocoder": null,
295
+ "commit": "ba40a1c",
296
+ "license": "custom - see https://github.com/dioco-group/jenny-tts-dataset#important",
297
+ "author": "@noml4u"
298
+ }
299
+ }
300
+ },
301
+ "es": {
302
+ "mai": {
303
+ "tacotron2-DDC": {
304
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--es--mai--tacotron2-DDC.zip",
305
+ "default_vocoder": "vocoder_models/universal/libri-tts/fullband-melgan",
306
+ "commit": "",
307
+ "author": "Eren Gölge @erogol",
308
+ "license": "MPL",
309
+ "contact": "egolge@coqui.com"
310
+ }
311
+ },
312
+ "css10": {
313
+ "vits": {
314
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--es--css10--vits.zip",
315
+ "default_vocoder": null,
316
+ "commit": null,
317
+ "author": "@NeonGeckoCom",
318
+ "license": "bsd-3-clause"
319
+ }
320
+ }
321
+ },
322
+ "fr": {
323
+ "mai": {
324
+ "tacotron2-DDC": {
325
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--fr--mai--tacotron2-DDC.zip",
326
+ "default_vocoder": "vocoder_models/universal/libri-tts/fullband-melgan",
327
+ "commit": null,
328
+ "author": "Eren Gölge @erogol",
329
+ "license": "MPL",
330
+ "contact": "egolge@coqui.com"
331
+ }
332
+ },
333
+ "css10": {
334
+ "vits": {
335
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--fr--css10--vits.zip",
336
+ "default_vocoder": null,
337
+ "commit": null,
338
+ "author": "@NeonGeckoCom",
339
+ "license": "bsd-3-clause"
340
+ }
341
+ }
342
+ },
343
+ "uk": {
344
+ "mai": {
345
+ "glow-tts": {
346
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--uk--mai--glow-tts.zip",
347
+ "author": "@robinhad",
348
+ "commit": "bdab788d",
349
+ "license": "MIT",
350
+ "contact": "",
351
+ "default_vocoder": "vocoder_models/uk/mai/multiband-melgan"
352
+ },
353
+ "vits": {
354
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--uk--mai--vits.zip",
355
+ "default_vocoder": null,
356
+ "commit": null,
357
+ "author": "@NeonGeckoCom",
358
+ "license": "bsd-3-clause"
359
+ }
360
+ }
361
+ },
362
+ "zh-CN": {
363
+ "baker": {
364
+ "tacotron2-DDC-GST": {
365
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--zh-CN--baker--tacotron2-DDC-GST.zip",
366
+ "commit": "unknown",
367
+ "author": "@kirianguiller",
368
+ "license": "apache 2.0",
369
+ "default_vocoder": null
370
+ }
371
+ }
372
+ },
373
+ "nl": {
374
+ "mai": {
375
+ "tacotron2-DDC": {
376
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--nl--mai--tacotron2-DDC.zip",
377
+ "author": "@r-dh",
378
+ "license": "apache 2.0",
379
+ "default_vocoder": "vocoder_models/nl/mai/parallel-wavegan",
380
+ "stats_file": null,
381
+ "commit": "540d811"
382
+ }
383
+ },
384
+ "css10": {
385
+ "vits": {
386
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--nl--css10--vits.zip",
387
+ "default_vocoder": null,
388
+ "commit": null,
389
+ "author": "@NeonGeckoCom",
390
+ "license": "bsd-3-clause"
391
+ }
392
+ }
393
+ },
394
+ "de": {
395
+ "thorsten": {
396
+ "tacotron2-DCA": {
397
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--de--thorsten--tacotron2-DCA.zip",
398
+ "default_vocoder": "vocoder_models/de/thorsten/fullband-melgan",
399
+ "author": "@thorstenMueller",
400
+ "license": "apache 2.0",
401
+ "commit": "unknown"
402
+ },
403
+ "vits": {
404
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.7.0_models/tts_models--de--thorsten--vits.zip",
405
+ "default_vocoder": null,
406
+ "author": "@thorstenMueller",
407
+ "license": "apache 2.0",
408
+ "commit": "unknown"
409
+ },
410
+ "tacotron2-DDC": {
411
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--de--thorsten--tacotron2-DDC.zip",
412
+ "default_vocoder": "vocoder_models/de/thorsten/hifigan_v1",
413
+ "description": "Thorsten-Dec2021-22k-DDC",
414
+ "author": "@thorstenMueller",
415
+ "license": "apache 2.0",
416
+ "commit": "unknown"
417
+ }
418
+ },
419
+ "css10": {
420
+ "vits-neon": {
421
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--de--css10--vits.zip",
422
+ "default_vocoder": null,
423
+ "author": "@NeonGeckoCom",
424
+ "license": "bsd-3-clause",
425
+ "commit": null
426
+ }
427
+ }
428
+ },
429
+ "ja": {
430
+ "kokoro": {
431
+ "tacotron2-DDC": {
432
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--ja--kokoro--tacotron2-DDC.zip",
433
+ "default_vocoder": "vocoder_models/ja/kokoro/hifigan_v1",
434
+ "description": "Tacotron2 with Double Decoder Consistency trained with Kokoro Speech Dataset.",
435
+ "author": "@kaiidams",
436
+ "license": "apache 2.0",
437
+ "commit": "401fbd89"
438
+ }
439
+ }
440
+ },
441
+ "tr": {
442
+ "common-voice": {
443
+ "glow-tts": {
444
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--tr--common-voice--glow-tts.zip",
445
+ "default_vocoder": "vocoder_models/tr/common-voice/hifigan",
446
+ "license": "MIT",
447
+ "description": "Turkish GlowTTS model using an unknown speaker from the Common-Voice dataset.",
448
+ "author": "Fatih Akademi",
449
+ "commit": null
450
+ }
451
+ }
452
+ },
453
+ "it": {
454
+ "mai_female": {
455
+ "glow-tts": {
456
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--it--mai_female--glow-tts.zip",
457
+ "default_vocoder": null,
458
+ "description": "GlowTTS model as explained on https://github.com/coqui-ai/TTS/issues/1148.",
459
+ "author": "@nicolalandro",
460
+ "license": "apache 2.0",
461
+ "commit": null
462
+ },
463
+ "vits": {
464
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--it--mai_female--vits.zip",
465
+ "default_vocoder": null,
466
+ "description": "GlowTTS model as explained on https://github.com/coqui-ai/TTS/issues/1148.",
467
+ "author": "@nicolalandro",
468
+ "license": "apache 2.0",
469
+ "commit": null
470
+ }
471
+ },
472
+ "mai_male": {
473
+ "glow-tts": {
474
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--it--mai_male--glow-tts.zip",
475
+ "default_vocoder": null,
476
+ "description": "GlowTTS model as explained on https://github.com/coqui-ai/TTS/issues/1148.",
477
+ "author": "@nicolalandro",
478
+ "license": "apache 2.0",
479
+ "commit": null
480
+ },
481
+ "vits": {
482
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/tts_models--it--mai_male--vits.zip",
483
+ "default_vocoder": null,
484
+ "description": "GlowTTS model as explained on https://github.com/coqui-ai/TTS/issues/1148.",
485
+ "author": "@nicolalandro",
486
+ "license": "apache 2.0",
487
+ "commit": null
488
+ }
489
+ }
490
+ },
491
+ "ewe": {
492
+ "openbible": {
493
+ "vits": {
494
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--ewe--openbible--vits.zip",
495
+ "default_vocoder": null,
496
+ "license": "CC-BY-SA 4.0",
497
+ "description": "Original work (audio and text) by Biblica available for free at www.biblica.com and open.bible.",
498
+ "author": "@coqui_ai",
499
+ "commit": "1b22f03"
500
+ }
501
+ }
502
+ },
503
+ "hau": {
504
+ "openbible": {
505
+ "vits": {
506
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--hau--openbible--vits.zip",
507
+ "default_vocoder": null,
508
+ "license": "CC-BY-SA 4.0",
509
+ "description": "Original work (audio and text) by Biblica available for free at www.biblica.com and open.bible.",
510
+ "author": "@coqui_ai",
511
+ "commit": "1b22f03"
512
+ }
513
+ }
514
+ },
515
+ "lin": {
516
+ "openbible": {
517
+ "vits": {
518
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--lin--openbible--vits.zip",
519
+ "default_vocoder": null,
520
+ "license": "CC-BY-SA 4.0",
521
+ "description": "Original work (audio and text) by Biblica available for free at www.biblica.com and open.bible.",
522
+ "author": "@coqui_ai",
523
+ "commit": "1b22f03"
524
+ }
525
+ }
526
+ },
527
+ "tw_akuapem": {
528
+ "openbible": {
529
+ "vits": {
530
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--tw_akuapem--openbible--vits.zip",
531
+ "default_vocoder": null,
532
+ "license": "CC-BY-SA 4.0",
533
+ "description": "Original work (audio and text) by Biblica available for free at www.biblica.com and open.bible.",
534
+ "author": "@coqui_ai",
535
+ "commit": "1b22f03"
536
+ }
537
+ }
538
+ },
539
+ "tw_asante": {
540
+ "openbible": {
541
+ "vits": {
542
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--tw_asante--openbible--vits.zip",
543
+ "default_vocoder": null,
544
+ "license": "CC-BY-SA 4.0",
545
+ "description": "Original work (audio and text) by Biblica available for free at www.biblica.com and open.bible.",
546
+ "author": "@coqui_ai",
547
+ "commit": "1b22f03"
548
+ }
549
+ }
550
+ },
551
+ "yor": {
552
+ "openbible": {
553
+ "vits": {
554
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.2_models/tts_models--yor--openbible--vits.zip",
555
+ "default_vocoder": null,
556
+ "license": "CC-BY-SA 4.0",
557
+ "description": "Original work (audio and text) by Biblica available for free at www.biblica.com and open.bible.",
558
+ "author": "@coqui_ai",
559
+ "commit": "1b22f03"
560
+ }
561
+ }
562
+ },
563
+ "hu": {
564
+ "css10": {
565
+ "vits": {
566
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--hu--css10--vits.zip",
567
+ "default_vocoder": null,
568
+ "commit": null,
569
+ "author": "@NeonGeckoCom",
570
+ "license": "bsd-3-clause"
571
+ }
572
+ }
573
+ },
574
+ "el": {
575
+ "cv": {
576
+ "vits": {
577
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--el--cv--vits.zip",
578
+ "default_vocoder": null,
579
+ "commit": null,
580
+ "author": "@NeonGeckoCom",
581
+ "license": "bsd-3-clause"
582
+ }
583
+ }
584
+ },
585
+ "fi": {
586
+ "css10": {
587
+ "vits": {
588
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--fi--css10--vits.zip",
589
+ "default_vocoder": null,
590
+ "commit": null,
591
+ "author": "@NeonGeckoCom",
592
+ "license": "bsd-3-clause"
593
+ }
594
+ }
595
+ },
596
+ "hr": {
597
+ "cv": {
598
+ "vits": {
599
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--hr--cv--vits.zip",
600
+ "default_vocoder": null,
601
+ "commit": null,
602
+ "author": "@NeonGeckoCom",
603
+ "license": "bsd-3-clause"
604
+ }
605
+ }
606
+ },
607
+ "lt": {
608
+ "cv": {
609
+ "vits": {
610
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--lt--cv--vits.zip",
611
+ "default_vocoder": null,
612
+ "commit": null,
613
+ "author": "@NeonGeckoCom",
614
+ "license": "bsd-3-clause"
615
+ }
616
+ }
617
+ },
618
+ "lv": {
619
+ "cv": {
620
+ "vits": {
621
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--lv--cv--vits.zip",
622
+ "default_vocoder": null,
623
+ "commit": null,
624
+ "author": "@NeonGeckoCom",
625
+ "license": "bsd-3-clause"
626
+ }
627
+ }
628
+ },
629
+ "mt": {
630
+ "cv": {
631
+ "vits": {
632
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--mt--cv--vits.zip",
633
+ "default_vocoder": null,
634
+ "commit": null,
635
+ "author": "@NeonGeckoCom",
636
+ "license": "bsd-3-clause"
637
+ }
638
+ }
639
+ },
640
+ "pl": {
641
+ "mai_female": {
642
+ "vits": {
643
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--pl--mai_female--vits.zip",
644
+ "default_vocoder": null,
645
+ "commit": null,
646
+ "author": "@NeonGeckoCom",
647
+ "license": "bsd-3-clause"
648
+ }
649
+ }
650
+ },
651
+ "pt": {
652
+ "cv": {
653
+ "vits": {
654
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--pt--cv--vits.zip",
655
+ "default_vocoder": null,
656
+ "commit": null,
657
+ "author": "@NeonGeckoCom",
658
+ "license": "bsd-3-clause"
659
+ }
660
+ }
661
+ },
662
+ "ro": {
663
+ "cv": {
664
+ "vits": {
665
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--ro--cv--vits.zip",
666
+ "default_vocoder": null,
667
+ "commit": null,
668
+ "author": "@NeonGeckoCom",
669
+ "license": "bsd-3-clause"
670
+ }
671
+ }
672
+ },
673
+ "sk": {
674
+ "cv": {
675
+ "vits": {
676
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--sk--cv--vits.zip",
677
+ "default_vocoder": null,
678
+ "commit": null,
679
+ "author": "@NeonGeckoCom",
680
+ "license": "bsd-3-clause"
681
+ }
682
+ }
683
+ },
684
+ "sl": {
685
+ "cv": {
686
+ "vits": {
687
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--sl--cv--vits.zip",
688
+ "default_vocoder": null,
689
+ "commit": null,
690
+ "author": "@NeonGeckoCom",
691
+ "license": "bsd-3-clause"
692
+ }
693
+ }
694
+ },
695
+ "sv": {
696
+ "cv": {
697
+ "vits": {
698
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/tts_models--sv--cv--vits.zip",
699
+ "default_vocoder": null,
700
+ "commit": null,
701
+ "author": "@NeonGeckoCom",
702
+ "license": "bsd-3-clause"
703
+ }
704
+ }
705
+ },
706
+ "ca": {
707
+ "custom": {
708
+ "vits": {
709
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.10.1_models/tts_models--ca--custom--vits.zip",
710
+ "default_vocoder": null,
711
+ "commit": null,
712
+ "description": " It is trained from zero with 101460 utterances consisting of 257 speakers, approx 138 hours of speech. We used three datasets;\nFestcat and Google Catalan TTS (both TTS datasets) and also a part of Common Voice 8. It is trained with TTS v0.8.0.\nhttps://github.com/coqui-ai/TTS/discussions/930#discussioncomment-4466345",
713
+ "author": "@gullabi",
714
+ "license": "CC-BY-4.0"
715
+ }
716
+ }
717
+ },
718
+ "fa": {
719
+ "custom": {
720
+ "glow-tts": {
721
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.10.1_models/tts_models--fa--custom--glow-tts.zip",
722
+ "default_vocoder": null,
723
+ "commit": null,
724
+ "description": "persian-tts-female-glow_tts model for text to speech purposes. Single-speaker female voice Trained on persian-tts-dataset-famale. \nThis model has no compatible vocoder thus the output quality is not very good. \nDataset: https://www.kaggle.com/datasets/magnoliasis/persian-tts-dataset-famale.",
725
+ "author": "@karim23657",
726
+ "license": "CC-BY-4.0"
727
+ }
728
+ }
729
+ },
730
+ "bn": {
731
+ "custom": {
732
+ "vits-male": {
733
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.13.3_models/tts_models--bn--custom--vits_male.zip",
734
+ "default_vocoder": null,
735
+ "commit": null,
736
+ "description": "Single speaker Bangla male model. For more information -> https://github.com/mobassir94/comprehensive-bangla-tts",
737
+ "author": "@mobassir94",
738
+ "license": "Apache 2.0"
739
+ },
740
+ "vits-female": {
741
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.13.3_models/tts_models--bn--custom--vits_female.zip",
742
+ "default_vocoder": null,
743
+ "commit": null,
744
+ "description": "Single speaker Bangla female model. For more information -> https://github.com/mobassir94/comprehensive-bangla-tts",
745
+ "author": "@mobassir94",
746
+ "license": "Apache 2.0"
747
+ }
748
+ }
749
+ },
750
+ "be": {
751
+ "common-voice": {
752
+ "glow-tts":{
753
+ "description": "Belarusian GlowTTS model created by @alex73 (Github).",
754
+ "github_rls_url":"https://coqui.gateway.scarf.sh/v0.16.6/tts_models--be--common-voice--glow-tts.zip",
755
+ "default_vocoder": "vocoder_models/be/common-voice/hifigan",
756
+ "commit": "c0aabb85",
757
+ "license": "CC-BY-SA 4.0",
758
+ "contact": "alex73mail@gmail.com"
759
+ }
760
+ }
761
+ }
762
+ },
763
+ "vocoder_models": {
764
+ "universal": {
765
+ "libri-tts": {
766
+ "wavegrad": {
767
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--universal--libri-tts--wavegrad.zip",
768
+ "commit": "ea976b0",
769
+ "author": "Eren Gölge @erogol",
770
+ "license": "MPL",
771
+ "contact": "egolge@coqui.com"
772
+ },
773
+ "fullband-melgan": {
774
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--universal--libri-tts--fullband-melgan.zip",
775
+ "commit": "4132240",
776
+ "author": "Eren Gölge @erogol",
777
+ "license": "MPL",
778
+ "contact": "egolge@coqui.com"
779
+ }
780
+ }
781
+ },
782
+ "en": {
783
+ "ek1": {
784
+ "wavegrad": {
785
+ "description": "EK1 en-rp wavegrad by NMStoker",
786
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--en--ek1--wavegrad.zip",
787
+ "commit": "c802255",
788
+ "license": "apache 2.0"
789
+ }
790
+ },
791
+ "ljspeech": {
792
+ "multiband-melgan": {
793
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--en--ljspeech--multiband-melgan.zip",
794
+ "commit": "ea976b0",
795
+ "author": "Eren Gölge @erogol",
796
+ "license": "MPL",
797
+ "contact": "egolge@coqui.com"
798
+ },
799
+ "hifigan_v2": {
800
+ "description": "HiFiGAN_v2 LJSpeech vocoder from https://arxiv.org/abs/2010.05646.",
801
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--en--ljspeech--hifigan_v2.zip",
802
+ "commit": "bae2ad0f",
803
+ "author": "@erogol",
804
+ "license": "apache 2.0",
805
+ "contact": "egolge@coqui.ai"
806
+ },
807
+ "univnet": {
808
+ "description": "UnivNet model finetuned on TacotronDDC_ph spectrograms for better compatibility.",
809
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--en--ljspeech--univnet_v2.zip",
810
+ "commit": "4581e3d",
811
+ "author": "Eren @erogol",
812
+ "license": "apache 2.0",
813
+ "contact": "egolge@coqui.ai"
814
+ }
815
+ },
816
+ "blizzard2013": {
817
+ "hifigan_v2": {
818
+ "description": "HiFiGAN_v2 LJSpeech vocoder from https://arxiv.org/abs/2010.05646.",
819
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.7.0_models/vocoder_models--en--blizzard2013--hifigan_v2.zip",
820
+ "commit": "d6284e7",
821
+ "author": "Adam Froghyar @a-froghyar",
822
+ "license": "apache 2.0",
823
+ "contact": "adamfroghyar@gmail.com"
824
+ }
825
+ },
826
+ "vctk": {
827
+ "hifigan_v2": {
828
+ "description": "Finetuned and intended to be used with tts_models/en/vctk/sc-glow-tts",
829
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--en--vctk--hifigan_v2.zip",
830
+ "commit": "2f07160",
831
+ "author": "Edresson Casanova",
832
+ "license": "apache 2.0",
833
+ "contact": ""
834
+ }
835
+ },
836
+ "sam": {
837
+ "hifigan_v2": {
838
+ "description": "Finetuned and intended to be used with tts_models/en/sam/tacotron_DDC",
839
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--en--sam--hifigan_v2.zip",
840
+ "commit": "2f07160",
841
+ "author": "Eren Gölge @erogol",
842
+ "license": "apache 2.0",
843
+ "contact": "egolge@coqui.ai"
844
+ }
845
+ }
846
+ },
847
+ "nl": {
848
+ "mai": {
849
+ "parallel-wavegan": {
850
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--nl--mai--parallel-wavegan.zip",
851
+ "author": "@r-dh",
852
+ "license": "apache 2.0",
853
+ "commit": "unknown"
854
+ }
855
+ }
856
+ },
857
+ "de": {
858
+ "thorsten": {
859
+ "wavegrad": {
860
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--de--thorsten--wavegrad.zip",
861
+ "author": "@thorstenMueller",
862
+ "license": "apache 2.0",
863
+ "commit": "unknown"
864
+ },
865
+ "fullband-melgan": {
866
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--de--thorsten--fullband-melgan.zip",
867
+ "author": "@thorstenMueller",
868
+ "license": "apache 2.0",
869
+ "commit": "unknown"
870
+ },
871
+ "hifigan_v1": {
872
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.8.0_models/vocoder_models--de--thorsten--hifigan_v1.zip",
873
+ "description": "HifiGAN vocoder model for Thorsten Neutral Dec2021 22k Samplerate Tacotron2 DDC model",
874
+ "author": "@thorstenMueller",
875
+ "license": "apache 2.0",
876
+ "commit": "unknown"
877
+ }
878
+ }
879
+ },
880
+ "ja": {
881
+ "kokoro": {
882
+ "hifigan_v1": {
883
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--ja--kokoro--hifigan_v1.zip",
884
+ "description": "HifiGAN model trained for kokoro dataset by @kaiidams",
885
+ "author": "@kaiidams",
886
+ "license": "apache 2.0",
887
+ "commit": "3900448"
888
+ }
889
+ }
890
+ },
891
+ "uk": {
892
+ "mai": {
893
+ "multiband-melgan": {
894
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--uk--mai--multiband-melgan.zip",
895
+ "author": "@robinhad",
896
+ "commit": "bdab788d",
897
+ "license": "MIT",
898
+ "contact": ""
899
+ }
900
+ }
901
+ },
902
+ "tr": {
903
+ "common-voice": {
904
+ "hifigan": {
905
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.6.1_models/vocoder_models--tr--common-voice--hifigan.zip",
906
+ "description": "HifiGAN model using an unknown speaker from the Common-Voice dataset.",
907
+ "author": "Fatih Akademi",
908
+ "license": "MIT",
909
+ "commit": null
910
+ }
911
+ }
912
+ },
913
+ "be": {
914
+ "common-voice": {
915
+ "hifigan": {
916
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.16.6/vocoder_models--be--common-voice--hifigan.zip",
917
+ "description": "Belarusian HiFiGAN model created by @alex73 (Github).",
918
+ "author": "@alex73",
919
+ "license": "CC-BY-SA 4.0",
920
+ "commit": "c0aabb85"
921
+ }
922
+ }
923
+ }
924
+ },
925
+ "voice_conversion_models": {
926
+ "multilingual": {
927
+ "vctk": {
928
+ "freevc24": {
929
+ "github_rls_url": "https://coqui.gateway.scarf.sh/v0.13.0_models/voice_conversion_models--multilingual--vctk--freevc24.zip",
930
+ "description": "FreeVC model trained on VCTK dataset from https://github.com/OlaWod/FreeVC",
931
+ "author": "Jing-Yi Li @OlaWod",
932
+ "license": "MIT",
933
+ "commit": null
934
+ }
935
+ }
936
+ }
937
+ }
938
+ }
TTS/TTS/VERSION ADDED
@@ -0,0 +1 @@
 
 
1
+ 0.22.0
TTS/TTS/__init__.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ with open(os.path.join(os.path.dirname(__file__), "VERSION"), "r", encoding="utf-8") as f:
4
+ version = f.read().strip()
5
+
6
+ __version__ = version
TTS/TTS/api.py ADDED
@@ -0,0 +1,458 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import tempfile
2
+ import warnings
3
+ from pathlib import Path
4
+ from typing import Union
5
+
6
+ import numpy as np
7
+ from torch import nn
8
+
9
+ from TTS.utils.audio.numpy_transforms import save_wav
10
+ from TTS.utils.manage import ModelManager
11
+ from TTS.utils.synthesizer import Synthesizer
12
+ from TTS.config import load_config
13
+
14
+
15
+ class TTS(nn.Module):
16
+ """TODO: Add voice conversion and Capacitron support."""
17
+
18
+ def __init__(
19
+ self,
20
+ model_name: str = "",
21
+ model_path: str = None,
22
+ config_path: str = None,
23
+ vocoder_path: str = None,
24
+ vocoder_config_path: str = None,
25
+ progress_bar: bool = True,
26
+ gpu=False,
27
+ ):
28
+ """🐸TTS python interface that allows to load and use the released models.
29
+
30
+ Example with a multi-speaker model:
31
+ >>> from TTS.api import TTS
32
+ >>> tts = TTS(TTS.list_models()[0])
33
+ >>> wav = tts.tts("This is a test! This is also a test!!", speaker=tts.speakers[0], language=tts.languages[0])
34
+ >>> tts.tts_to_file(text="Hello world!", speaker=tts.speakers[0], language=tts.languages[0], file_path="output.wav")
35
+
36
+ Example with a single-speaker model:
37
+ >>> tts = TTS(model_name="tts_models/de/thorsten/tacotron2-DDC", progress_bar=False, gpu=False)
38
+ >>> tts.tts_to_file(text="Ich bin eine Testnachricht.", file_path="output.wav")
39
+
40
+ Example loading a model from a path:
41
+ >>> tts = TTS(model_path="/path/to/checkpoint_100000.pth", config_path="/path/to/config.json", progress_bar=False, gpu=False)
42
+ >>> tts.tts_to_file(text="Ich bin eine Testnachricht.", file_path="output.wav")
43
+
44
+ Example voice cloning with YourTTS in English, French and Portuguese:
45
+ >>> tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False, gpu=True)
46
+ >>> tts.tts_to_file("This is voice cloning.", speaker_wav="my/cloning/audio.wav", language="en", file_path="thisisit.wav")
47
+ >>> tts.tts_to_file("C'est le clonage de la voix.", speaker_wav="my/cloning/audio.wav", language="fr", file_path="thisisit.wav")
48
+ >>> tts.tts_to_file("Isso é clonagem de voz.", speaker_wav="my/cloning/audio.wav", language="pt", file_path="thisisit.wav")
49
+
50
+ Example Fairseq TTS models (uses ISO language codes in https://dl.fbaipublicfiles.com/mms/tts/all-tts-languages.html):
51
+ >>> tts = TTS(model_name="tts_models/eng/fairseq/vits", progress_bar=False, gpu=True)
52
+ >>> tts.tts_to_file("This is a test.", file_path="output.wav")
53
+
54
+ Args:
55
+ model_name (str, optional): Model name to load. You can list models by ```tts.models```. Defaults to None.
56
+ model_path (str, optional): Path to the model checkpoint. Defaults to None.
57
+ config_path (str, optional): Path to the model config. Defaults to None.
58
+ vocoder_path (str, optional): Path to the vocoder checkpoint. Defaults to None.
59
+ vocoder_config_path (str, optional): Path to the vocoder config. Defaults to None.
60
+ progress_bar (bool, optional): Whether to pring a progress bar while downloading a model. Defaults to True.
61
+ gpu (bool, optional): Enable/disable GPU. Some models might be too slow on CPU. Defaults to False.
62
+ """
63
+ super().__init__()
64
+ self.manager = ModelManager(models_file=self.get_models_file_path(), progress_bar=progress_bar, verbose=False)
65
+ self.config = load_config(config_path) if config_path else None
66
+ self.synthesizer = None
67
+ self.voice_converter = None
68
+ self.model_name = ""
69
+ if gpu:
70
+ warnings.warn("`gpu` will be deprecated. Please use `tts.to(device)` instead.")
71
+
72
+ if model_name is not None and len(model_name) > 0:
73
+ if "tts_models" in model_name:
74
+ self.load_tts_model_by_name(model_name, gpu)
75
+ elif "voice_conversion_models" in model_name:
76
+ self.load_vc_model_by_name(model_name, gpu)
77
+ else:
78
+ self.load_model_by_name(model_name, gpu)
79
+
80
+ if model_path:
81
+ self.load_tts_model_by_path(
82
+ model_path, config_path, vocoder_path=vocoder_path, vocoder_config=vocoder_config_path, gpu=gpu
83
+ )
84
+
85
+ @property
86
+ def models(self):
87
+ return self.manager.list_tts_models()
88
+
89
+ @property
90
+ def is_multi_speaker(self):
91
+ if hasattr(self.synthesizer.tts_model, "speaker_manager") and self.synthesizer.tts_model.speaker_manager:
92
+ return self.synthesizer.tts_model.speaker_manager.num_speakers > 1
93
+ return False
94
+
95
+ @property
96
+ def is_multi_lingual(self):
97
+ # Not sure what sets this to None, but applied a fix to prevent crashing.
98
+ if (
99
+ isinstance(self.model_name, str)
100
+ and "xtts" in self.model_name
101
+ or self.config
102
+ and ("xtts" in self.config.model or len(self.config.languages) > 1)
103
+ ):
104
+ return True
105
+ if hasattr(self.synthesizer.tts_model, "language_manager") and self.synthesizer.tts_model.language_manager:
106
+ return self.synthesizer.tts_model.language_manager.num_languages > 1
107
+ return False
108
+
109
+ @property
110
+ def speakers(self):
111
+ if not self.is_multi_speaker:
112
+ return None
113
+ return self.synthesizer.tts_model.speaker_manager.speaker_names
114
+
115
+ @property
116
+ def languages(self):
117
+ if not self.is_multi_lingual:
118
+ return None
119
+ return self.synthesizer.tts_model.language_manager.language_names
120
+
121
+ @staticmethod
122
+ def get_models_file_path():
123
+ return Path(__file__).parent / ".models.json"
124
+
125
+ def list_models(self):
126
+ return ModelManager(models_file=TTS.get_models_file_path(), progress_bar=False, verbose=False)
127
+
128
+ def download_model_by_name(self, model_name: str):
129
+ model_path, config_path, model_item = self.manager.download_model(model_name)
130
+ if "fairseq" in model_name or (model_item is not None and isinstance(model_item["model_url"], list)):
131
+ # return model directory if there are multiple files
132
+ # we assume that the model knows how to load itself
133
+ return None, None, None, None, model_path
134
+ if model_item.get("default_vocoder") is None:
135
+ return model_path, config_path, None, None, None
136
+ vocoder_path, vocoder_config_path, _ = self.manager.download_model(model_item["default_vocoder"])
137
+ return model_path, config_path, vocoder_path, vocoder_config_path, None
138
+
139
+ def load_model_by_name(self, model_name: str, gpu: bool = False):
140
+ """Load one of the 🐸TTS models by name.
141
+
142
+ Args:
143
+ model_name (str): Model name to load. You can list models by ```tts.models```.
144
+ gpu (bool, optional): Enable/disable GPU. Some models might be too slow on CPU. Defaults to False.
145
+ """
146
+ self.load_tts_model_by_name(model_name, gpu)
147
+
148
+ def load_vc_model_by_name(self, model_name: str, gpu: bool = False):
149
+ """Load one of the voice conversion models by name.
150
+
151
+ Args:
152
+ model_name (str): Model name to load. You can list models by ```tts.models```.
153
+ gpu (bool, optional): Enable/disable GPU. Some models might be too slow on CPU. Defaults to False.
154
+ """
155
+ self.model_name = model_name
156
+ model_path, config_path, _, _, _ = self.download_model_by_name(model_name)
157
+ self.voice_converter = Synthesizer(vc_checkpoint=model_path, vc_config=config_path, use_cuda=gpu)
158
+
159
+ def load_tts_model_by_name(self, model_name: str, gpu: bool = False):
160
+ """Load one of 🐸TTS models by name.
161
+
162
+ Args:
163
+ model_name (str): Model name to load. You can list models by ```tts.models```.
164
+ gpu (bool, optional): Enable/disable GPU. Some models might be too slow on CPU. Defaults to False.
165
+
166
+ TODO: Add tests
167
+ """
168
+ self.synthesizer = None
169
+ self.model_name = model_name
170
+
171
+ model_path, config_path, vocoder_path, vocoder_config_path, model_dir = self.download_model_by_name(
172
+ model_name
173
+ )
174
+
175
+ # init synthesizer
176
+ # None values are fetch from the model
177
+ self.synthesizer = Synthesizer(
178
+ tts_checkpoint=model_path,
179
+ tts_config_path=config_path,
180
+ tts_speakers_file=None,
181
+ tts_languages_file=None,
182
+ vocoder_checkpoint=vocoder_path,
183
+ vocoder_config=vocoder_config_path,
184
+ encoder_checkpoint=None,
185
+ encoder_config=None,
186
+ model_dir=model_dir,
187
+ use_cuda=gpu,
188
+ )
189
+
190
+ def load_tts_model_by_path(
191
+ self, model_path: str, config_path: str, vocoder_path: str = None, vocoder_config: str = None, gpu: bool = False
192
+ ):
193
+ """Load a model from a path.
194
+
195
+ Args:
196
+ model_path (str): Path to the model checkpoint.
197
+ config_path (str): Path to the model config.
198
+ vocoder_path (str, optional): Path to the vocoder checkpoint. Defaults to None.
199
+ vocoder_config (str, optional): Path to the vocoder config. Defaults to None.
200
+ gpu (bool, optional): Enable/disable GPU. Some models might be too slow on CPU. Defaults to False.
201
+ """
202
+
203
+ self.synthesizer = Synthesizer(
204
+ tts_checkpoint=model_path,
205
+ tts_config_path=config_path,
206
+ tts_speakers_file=None,
207
+ tts_languages_file=None,
208
+ vocoder_checkpoint=vocoder_path,
209
+ vocoder_config=vocoder_config,
210
+ encoder_checkpoint=None,
211
+ encoder_config=None,
212
+ use_cuda=gpu,
213
+ )
214
+
215
+ def _check_arguments(
216
+ self,
217
+ speaker: str = None,
218
+ language: str = None,
219
+ speaker_wav: str = None,
220
+ emotion: str = None,
221
+ speed: float = None,
222
+ **kwargs,
223
+ ) -> None:
224
+ """Check if the arguments are valid for the model."""
225
+ # check for the coqui tts models
226
+ if self.is_multi_speaker and (speaker is None and speaker_wav is None):
227
+ raise ValueError("Model is multi-speaker but no `speaker` is provided.")
228
+ if self.is_multi_lingual and language is None:
229
+ raise ValueError("Model is multi-lingual but no `language` is provided.")
230
+ if not self.is_multi_speaker and speaker is not None and "voice_dir" not in kwargs:
231
+ raise ValueError("Model is not multi-speaker but `speaker` is provided.")
232
+ if not self.is_multi_lingual and language is not None:
233
+ raise ValueError("Model is not multi-lingual but `language` is provided.")
234
+ if not emotion is None and not speed is None:
235
+ raise ValueError("Emotion and speed can only be used with Coqui Studio models. Which is discontinued.")
236
+
237
+ def tts(
238
+ self,
239
+ text: str,
240
+ speaker: str = None,
241
+ language: str = None,
242
+ speaker_wav: str = None,
243
+ emotion: str = None,
244
+ speed: float = None,
245
+ split_sentences: bool = True,
246
+ **kwargs,
247
+ ):
248
+ """Convert text to speech.
249
+
250
+ Args:
251
+ text (str):
252
+ Input text to synthesize.
253
+ speaker (str, optional):
254
+ Speaker name for multi-speaker. You can check whether loaded model is multi-speaker by
255
+ `tts.is_multi_speaker` and list speakers by `tts.speakers`. Defaults to None.
256
+ language (str): Language of the text. If None, the default language of the speaker is used. Language is only
257
+ supported by `XTTS` model.
258
+ speaker_wav (str, optional):
259
+ Path to a reference wav file to use for voice cloning with supporting models like YourTTS.
260
+ Defaults to None.
261
+ emotion (str, optional):
262
+ Emotion to use for 🐸Coqui Studio models. If None, Studio models use "Neutral". Defaults to None.
263
+ speed (float, optional):
264
+ Speed factor to use for 🐸Coqui Studio models, between 0 and 2.0. If None, Studio models use 1.0.
265
+ Defaults to None.
266
+ split_sentences (bool, optional):
267
+ Split text into sentences, synthesize them separately and concatenate the file audio.
268
+ Setting it False uses more VRAM and possibly hit model specific text length or VRAM limits. Only
269
+ applicable to the 🐸TTS models. Defaults to True.
270
+ kwargs (dict, optional):
271
+ Additional arguments for the model.
272
+ """
273
+ self._check_arguments(
274
+ speaker=speaker, language=language, speaker_wav=speaker_wav, emotion=emotion, speed=speed, **kwargs
275
+ )
276
+ wav = self.synthesizer.tts(
277
+ text=text,
278
+ speaker_name=speaker,
279
+ language_name=language,
280
+ speaker_wav=speaker_wav,
281
+ reference_wav=None,
282
+ style_wav=None,
283
+ style_text=None,
284
+ reference_speaker_name=None,
285
+ split_sentences=split_sentences,
286
+ **kwargs,
287
+ )
288
+ return wav
289
+
290
+ def tts_to_file(
291
+ self,
292
+ text: str,
293
+ speaker: str = None,
294
+ language: str = None,
295
+ speaker_wav: str = None,
296
+ emotion: str = None,
297
+ speed: float = 1.0,
298
+ pipe_out=None,
299
+ file_path: str = "output.wav",
300
+ split_sentences: bool = True,
301
+ **kwargs,
302
+ ):
303
+ """Convert text to speech.
304
+
305
+ Args:
306
+ text (str):
307
+ Input text to synthesize.
308
+ speaker (str, optional):
309
+ Speaker name for multi-speaker. You can check whether loaded model is multi-speaker by
310
+ `tts.is_multi_speaker` and list speakers by `tts.speakers`. Defaults to None.
311
+ language (str, optional):
312
+ Language code for multi-lingual models. You can check whether loaded model is multi-lingual
313
+ `tts.is_multi_lingual` and list available languages by `tts.languages`. Defaults to None.
314
+ speaker_wav (str, optional):
315
+ Path to a reference wav file to use for voice cloning with supporting models like YourTTS.
316
+ Defaults to None.
317
+ emotion (str, optional):
318
+ Emotion to use for 🐸Coqui Studio models. Defaults to "Neutral".
319
+ speed (float, optional):
320
+ Speed factor to use for 🐸Coqui Studio models, between 0.0 and 2.0. Defaults to None.
321
+ pipe_out (BytesIO, optional):
322
+ Flag to stdout the generated TTS wav file for shell pipe.
323
+ file_path (str, optional):
324
+ Output file path. Defaults to "output.wav".
325
+ split_sentences (bool, optional):
326
+ Split text into sentences, synthesize them separately and concatenate the file audio.
327
+ Setting it False uses more VRAM and possibly hit model specific text length or VRAM limits. Only
328
+ applicable to the 🐸TTS models. Defaults to True.
329
+ kwargs (dict, optional):
330
+ Additional arguments for the model.
331
+ """
332
+ self._check_arguments(speaker=speaker, language=language, speaker_wav=speaker_wav, **kwargs)
333
+
334
+ wav = self.tts(
335
+ text=text,
336
+ speaker=speaker,
337
+ language=language,
338
+ speaker_wav=speaker_wav,
339
+ split_sentences=split_sentences,
340
+ **kwargs,
341
+ )
342
+ self.synthesizer.save_wav(wav=wav, path=file_path, pipe_out=pipe_out)
343
+ return file_path
344
+
345
+ def voice_conversion(
346
+ self,
347
+ source_wav: str,
348
+ target_wav: str,
349
+ ):
350
+ """Voice conversion with FreeVC. Convert source wav to target speaker.
351
+
352
+ Args:``
353
+ source_wav (str):
354
+ Path to the source wav file.
355
+ target_wav (str):`
356
+ Path to the target wav file.
357
+ """
358
+ wav = self.voice_converter.voice_conversion(source_wav=source_wav, target_wav=target_wav)
359
+ return wav
360
+
361
+ def voice_conversion_to_file(
362
+ self,
363
+ source_wav: str,
364
+ target_wav: str,
365
+ file_path: str = "output.wav",
366
+ ):
367
+ """Voice conversion with FreeVC. Convert source wav to target speaker.
368
+
369
+ Args:
370
+ source_wav (str):
371
+ Path to the source wav file.
372
+ target_wav (str):
373
+ Path to the target wav file.
374
+ file_path (str, optional):
375
+ Output file path. Defaults to "output.wav".
376
+ """
377
+ wav = self.voice_conversion(source_wav=source_wav, target_wav=target_wav)
378
+ save_wav(wav=wav, path=file_path, sample_rate=self.voice_converter.vc_config.audio.output_sample_rate)
379
+ return file_path
380
+
381
+ def tts_with_vc(
382
+ self,
383
+ text: str,
384
+ language: str = None,
385
+ speaker_wav: str = None,
386
+ speaker: str = None,
387
+ split_sentences: bool = True,
388
+ ):
389
+ """Convert text to speech with voice conversion.
390
+
391
+ It combines tts with voice conversion to fake voice cloning.
392
+
393
+ - Convert text to speech with tts.
394
+ - Convert the output wav to target speaker with voice conversion.
395
+
396
+ Args:
397
+ text (str):
398
+ Input text to synthesize.
399
+ language (str, optional):
400
+ Language code for multi-lingual models. You can check whether loaded model is multi-lingual
401
+ `tts.is_multi_lingual` and list available languages by `tts.languages`. Defaults to None.
402
+ speaker_wav (str, optional):
403
+ Path to a reference wav file to use for voice cloning with supporting models like YourTTS.
404
+ Defaults to None.
405
+ speaker (str, optional):
406
+ Speaker name for multi-speaker. You can check whether loaded model is multi-speaker by
407
+ `tts.is_multi_speaker` and list speakers by `tts.speakers`. Defaults to None.
408
+ split_sentences (bool, optional):
409
+ Split text into sentences, synthesize them separately and concatenate the file audio.
410
+ Setting it False uses more VRAM and possibly hit model specific text length or VRAM limits. Only
411
+ applicable to the 🐸TTS models. Defaults to True.
412
+ """
413
+ with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as fp:
414
+ # Lazy code... save it to a temp file to resample it while reading it for VC
415
+ self.tts_to_file(
416
+ text=text, speaker=speaker, language=language, file_path=fp.name, split_sentences=split_sentences
417
+ )
418
+ if self.voice_converter is None:
419
+ self.load_vc_model_by_name("voice_conversion_models/multilingual/vctk/freevc24")
420
+ wav = self.voice_converter.voice_conversion(source_wav=fp.name, target_wav=speaker_wav)
421
+ return wav
422
+
423
+ def tts_with_vc_to_file(
424
+ self,
425
+ text: str,
426
+ language: str = None,
427
+ speaker_wav: str = None,
428
+ file_path: str = "output.wav",
429
+ speaker: str = None,
430
+ split_sentences: bool = True,
431
+ ):
432
+ """Convert text to speech with voice conversion and save to file.
433
+
434
+ Check `tts_with_vc` for more details.
435
+
436
+ Args:
437
+ text (str):
438
+ Input text to synthesize.
439
+ language (str, optional):
440
+ Language code for multi-lingual models. You can check whether loaded model is multi-lingual
441
+ `tts.is_multi_lingual` and list available languages by `tts.languages`. Defaults to None.
442
+ speaker_wav (str, optional):
443
+ Path to a reference wav file to use for voice cloning with supporting models like YourTTS.
444
+ Defaults to None.
445
+ file_path (str, optional):
446
+ Output file path. Defaults to "output.wav".
447
+ speaker (str, optional):
448
+ Speaker name for multi-speaker. You can check whether loaded model is multi-speaker by
449
+ `tts.is_multi_speaker` and list speakers by `tts.speakers`. Defaults to None.
450
+ split_sentences (bool, optional):
451
+ Split text into sentences, synthesize them separately and concatenate the file audio.
452
+ Setting it False uses more VRAM and possibly hit model specific text length or VRAM limits. Only
453
+ applicable to the 🐸TTS models. Defaults to True.
454
+ """
455
+ wav = self.tts_with_vc(
456
+ text=text, language=language, speaker_wav=speaker_wav, speaker=speaker, split_sentences=split_sentences
457
+ )
458
+ save_wav(wav=wav, path=file_path, sample_rate=self.voice_converter.vc_config.audio.output_sample_rate)
TTS/TTS/bin/__init__.py ADDED
File without changes