Upload folder using huggingface_hub

#1
.github/workflows/build-docker.yml ADDED
@@ -0,0 +1,196 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ name: Build and Publish Docker Image
2
+
3
+ on:
4
+ workflow_dispatch:
5
+ push:
6
+ branches:
7
+ - 'main'
8
+ release:
9
+ types: [published]
10
+
11
+ jobs:
12
+ build-and-push-image:
13
+ runs-on: ubuntu-latest
14
+
15
+ permissions:
16
+ contents: read
17
+ packages: write
18
+
19
+ env:
20
+ # Set up environment variables for the job
21
+ DOCKER_REGISTRY: ghcr.io
22
+ IMAGE_NAME: ${{ github.repository }}
23
+ TAG: ${{ github.sha }}
24
+
25
+ steps:
26
+ - name: Check out code
27
+ uses: actions/checkout@v4
28
+
29
+ - name: Set up Docker Buildx
30
+ uses: docker/setup-buildx-action@v2
31
+ with:
32
+ install: true
33
+
34
+ # Log in to the GitHub Container Registry only when not running on a pull request event
35
+ - name: Login to Docker Registry
36
+ uses: docker/login-action@v2
37
+ with:
38
+ registry: ${{ env.DOCKER_REGISTRY }}
39
+ username: ${{ github.actor }}
40
+ password: ${{ secrets.GITHUB_TOKEN }}
41
+
42
+ - name: Extract metadata (tags, labels) for Docker
43
+ id: meta
44
+ uses: docker/metadata-action@v4
45
+ with:
46
+ images: ${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }}
47
+
48
+ # Build and push the Docker image to GHCR for the main branch or specific tags
49
+ - name: Build and Push Docker Image
50
+ if: github.ref == 'refs/heads/main'
51
+ uses: docker/build-push-action@v4
52
+ with:
53
+ context: .
54
+ file: Dockerfile
55
+ push: true
56
+ tags: ${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }}:latest
57
+ labels: version=${{ github.run_id }}
58
+ platforms: linux/amd64,linux/arm64
59
+
60
+ # For tagged releases, build and push the Docker image with the corresponding tag
61
+ - name: Build and Push Docker Image (Tagged)
62
+ if: startsWith(github.ref, 'refs/tags/')
63
+ uses: docker/build-push-action@v4
64
+ with:
65
+ context: .
66
+ file: Dockerfile
67
+ push: true
68
+ tags: ${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.ref_name }}
69
+ labels: version=${{ github.run_id }}
70
+ platforms: linux/amd64,linux/arm64
71
+
72
+ build-and-push-min-image:
73
+ runs-on: ubuntu-latest
74
+
75
+ permissions:
76
+ contents: read
77
+ packages: write
78
+
79
+ env:
80
+ # Set up environment variables for the job
81
+ DOCKER_REGISTRY: ghcr.io
82
+ IMAGE_NAME: ${{ github.repository }}-min
83
+ TAG: ${{ github.sha }}
84
+
85
+ steps:
86
+ - name: Check out code
87
+ uses: actions/checkout@v4
88
+
89
+ - name: Set up Docker Buildx
90
+ uses: docker/setup-buildx-action@v2
91
+ with:
92
+ install: true
93
+
94
+ # Log in to the GitHub Container Registry only when not running on a pull request event
95
+ - name: Login to Docker Registry
96
+ uses: docker/login-action@v2
97
+ with:
98
+ registry: ${{ env.DOCKER_REGISTRY }}
99
+ username: ${{ github.actor }}
100
+ password: ${{ secrets.GITHUB_TOKEN }}
101
+
102
+ - name: Extract metadata (tags, labels) for Docker
103
+ id: meta
104
+ uses: docker/metadata-action@v4
105
+ with:
106
+ images: ${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }}
107
+
108
+ # Build and push the Docker image to GHCR for the main branch or specific tags
109
+ - name: Build and Push Docker Image
110
+ if: github.ref == 'refs/heads/main'
111
+ uses: docker/build-push-action@v4
112
+ with:
113
+ context: .
114
+ file: Dockerfile.min
115
+ push: true
116
+ tags: ${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }}:latest
117
+ labels: version=${{ github.run_id }}
118
+ platforms: linux/amd64,linux/arm64
119
+
120
+ # For tagged releases, build and push the Docker image with the corresponding tag
121
+ - name: Build and Push Docker Image (Tagged)
122
+ if: startsWith(github.ref, 'refs/tags/')
123
+ uses: docker/build-push-action@v4
124
+ with:
125
+ context: .
126
+ file: Dockerfile.min
127
+ push: true
128
+ tags: ${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.ref_name }}
129
+ labels: version=${{ github.run_id }}
130
+ platforms: linux/amd64,linux/arm64
131
+
132
+ build-and-push-rocm-image:
133
+ runs-on: ubuntu-latest
134
+
135
+ permissions:
136
+ contents: read
137
+ packages: write
138
+
139
+ env:
140
+ # Set up environment variables for the job
141
+ USE_ROCM: 1
142
+ DOCKER_REGISTRY: ghcr.io
143
+ IMAGE_NAME: ${{ github.repository }}-rocm
144
+ TAG: ${{ github.sha }}
145
+
146
+ steps:
147
+ - name: Check out code
148
+ uses: actions/checkout@v4
149
+
150
+ - name: Set up Docker Buildx
151
+ uses: docker/setup-buildx-action@v2
152
+ with:
153
+ install: true
154
+
155
+ # Log in to the GitHub Container Registry only when not running on a pull request event
156
+ - name: Login to Docker Registry
157
+ uses: docker/login-action@v2
158
+ with:
159
+ registry: ${{ env.DOCKER_REGISTRY }}
160
+ username: ${{ github.actor }}
161
+ password: ${{ secrets.GITHUB_TOKEN }}
162
+
163
+ - name: Extract metadata (tags, labels) for Docker
164
+ id: meta
165
+ uses: docker/metadata-action@v4
166
+ with:
167
+ images: ${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }}
168
+
169
+ # Build and push the Docker image to GHCR for the main branch or specific tags
170
+ - name: Build and Push Docker Image
171
+ if: github.ref == 'refs/heads/main'
172
+ uses: docker/build-push-action@v4
173
+ with:
174
+ context: .
175
+ file: Dockerfile
176
+ push: true
177
+ tags: ${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }}:latest
178
+ labels: version=${{ github.run_id }}
179
+ platforms: linux/amd64,linux/arm64
180
+ build-args: |
181
+ USE_ROCM=1
182
+
183
+ # For tagged releases, build and push the Docker image with the corresponding tag
184
+ - name: Build and Push Docker Image (Tagged)
185
+ if: startsWith(github.ref, 'refs/tags/')
186
+ uses: docker/build-push-action@v4
187
+ with:
188
+ context: .
189
+ file: Dockerfile
190
+ push: true
191
+ tags: ${{ env.DOCKER_REGISTRY }}/${{ env.IMAGE_NAME }}:${{ github.ref_name }}
192
+ labels: version=${{ github.run_id }}
193
+ platforms: linux/amd64,linux/arm64
194
+ build-args: |
195
+ USE_ROCM=1
196
+
.gitignore ADDED
@@ -0,0 +1,166 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ voices/
2
+ .env
3
+ speech.env
4
+ config/pre_process_map.yaml
5
+ config/voice_to_speaker.yaml
6
+
7
+ # Byte-compiled / optimized / DLL files
8
+ __pycache__/
9
+ *.py[cod]
10
+ *$py.class
11
+
12
+ # C extensions
13
+ *.so
14
+
15
+ # Distribution / packaging
16
+ .Python
17
+ build/
18
+ develop-eggs/
19
+ dist/
20
+ downloads/
21
+ eggs/
22
+ .eggs/
23
+ lib/
24
+ lib64/
25
+ parts/
26
+ sdist/
27
+ var/
28
+ wheels/
29
+ share/python-wheels/
30
+ *.egg-info/
31
+ .installed.cfg
32
+ *.egg
33
+ MANIFEST
34
+
35
+ # PyInstaller
36
+ # Usually these files are written by a python script from a template
37
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
38
+ *.manifest
39
+ *.spec
40
+
41
+ # Installer logs
42
+ pip-log.txt
43
+ pip-delete-this-directory.txt
44
+
45
+ # Unit test / coverage reports
46
+ htmlcov/
47
+ .tox/
48
+ .nox/
49
+ .coverage
50
+ .coverage.*
51
+ .cache
52
+ nosetests.xml
53
+ coverage.xml
54
+ *.cover
55
+ *.py,cover
56
+ .hypothesis/
57
+ .pytest_cache/
58
+ cover/
59
+
60
+ # Translations
61
+ *.mo
62
+ *.pot
63
+
64
+ # Django stuff:
65
+ *.log
66
+ local_settings.py
67
+ db.sqlite3
68
+ db.sqlite3-journal
69
+
70
+ # Flask stuff:
71
+ instance/
72
+ .webassets-cache
73
+
74
+ # Scrapy stuff:
75
+ .scrapy
76
+
77
+ # Sphinx documentation
78
+ docs/_build/
79
+
80
+ # PyBuilder
81
+ .pybuilder/
82
+ target/
83
+
84
+ # Jupyter Notebook
85
+ .ipynb_checkpoints
86
+
87
+ # IPython
88
+ profile_default/
89
+ ipython_config.py
90
+
91
+ # pyenv
92
+ # For a library or package, you might want to ignore these files since the code is
93
+ # intended to run in multiple environments; otherwise, check them in:
94
+ # .python-version
95
+
96
+ # pipenv
97
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
98
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
99
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
100
+ # install all needed dependencies.
101
+ #Pipfile.lock
102
+
103
+ # poetry
104
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
105
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
106
+ # commonly ignored for libraries.
107
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
108
+ #poetry.lock
109
+
110
+ # pdm
111
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
112
+ #pdm.lock
113
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
114
+ # in version control.
115
+ # https://pdm.fming.dev/#use-with-ide
116
+ .pdm.toml
117
+
118
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
119
+ __pypackages__/
120
+
121
+ # Celery stuff
122
+ celerybeat-schedule
123
+ celerybeat.pid
124
+
125
+ # SageMath parsed files
126
+ *.sage.py
127
+
128
+ # Environments
129
+ .env
130
+ .venv
131
+ env/
132
+ venv/
133
+ ENV/
134
+ env.bak/
135
+ venv.bak/
136
+
137
+ # Spyder project settings
138
+ .spyderproject
139
+ .spyproject
140
+
141
+ # Rope project settings
142
+ .ropeproject
143
+
144
+ # mkdocs documentation
145
+ /site
146
+
147
+ # mypy
148
+ .mypy_cache/
149
+ .dmypy.json
150
+ dmypy.json
151
+
152
+ # Pyre type checker
153
+ .pyre/
154
+
155
+ # pytype static type analyzer
156
+ .pytype/
157
+
158
+ # Cython debug symbols
159
+ cython_debug/
160
+
161
+ # PyCharm
162
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
163
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
164
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
165
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
166
+ #.idea/
Dockerfile ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ RUN --mount=type=cache,target=/root/.cache/pip pip install -U pip
4
+
5
+ ARG TARGETPLATFORM
6
+ RUN apt-get update && apt-get install --no-install-recommends -y curl ffmpeg
7
+ RUN if [ "$TARGETPLATFORM" != "linux/amd64" ]; then apt-get install --no-install-recommends -y build-essential ; fi
8
+ RUN if [ "$TARGETPLATFORM" != "linux/amd64" ]; then curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y ; fi
9
+ ENV PATH="/root/.cargo/bin:${PATH}"
10
+ # for deepspeed support - doesn't seem worth it, image +7.5GB, over the 10GB ghcr.io limit, and no noticable gain in speed or VRAM usage?
11
+ #RUN curl -O https://developer.download.nvidia.com/compute/cuda/repos/debian11/x86_64/cuda-keyring_1.1-1_all.deb
12
+ #RUN dpkg -i cuda-keyring_1.1-1_all.deb && rm cuda-keyring_1.1-1_all.deb
13
+ #RUN apt-get update && apt-get install --no-install-recommends -y libaio-dev build-essential cuda-toolkit
14
+ #ENV CUDA_HOME=/usr/local/cuda
15
+ RUN apt-get clean && rm -rf /var/lib/apt/lists/*
16
+
17
+ WORKDIR /app
18
+ RUN mkdir -p voices config
19
+
20
+ ARG USE_ROCM
21
+ ENV USE_ROCM=${USE_ROCM}
22
+
23
+ COPY requirements*.txt /app/
24
+ RUN if [ "${USE_ROCM}" = "1" ]; then mv /app/requirements-rocm.txt /app/requirements.txt; fi
25
+ RUN --mount=type=cache,target=/root/.cache/pip pip install -r requirements.txt
26
+
27
+ COPY *.py *.sh *.default.yaml README.md LICENSE /app/
28
+
29
+ ARG PRELOAD_MODEL
30
+ ENV PRELOAD_MODEL=${PRELOAD_MODEL}
31
+ ENV TTS_HOME=voices
32
+ ENV HF_HOME=voices
33
+ ENV COQUI_TOS_AGREED=1
34
+
35
+ CMD bash startup.sh
Dockerfile.min ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.11-slim
2
+
3
+ ARG TARGETPLATFORM
4
+ RUN apt-get update && apt-get install --no-install-recommends -y curl ffmpeg
5
+ RUN if [ "$TARGETPLATFORM" != "linux/amd64" ]; then apt-get install --no-install-recommends -y build-essential ; fi
6
+ RUN if [ "$TARGETPLATFORM" != "linux/amd64" ]; then curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y ; fi
7
+ ENV PATH="/root/.cargo/bin:${PATH}"
8
+ RUN apt-get clean && rm -rf /var/lib/apt/lists/*
9
+
10
+ WORKDIR /app
11
+ RUN mkdir -p voices config
12
+
13
+ COPY requirements*.txt /app/
14
+ RUN --mount=type=cache,target=/root/.cache/pip pip install -r requirements-min.txt
15
+ COPY *.py *.sh *.default.yaml README.md LICENSE /app/
16
+
17
+ ENV TTS_HOME=voices
18
+ ENV HF_HOME=voices
19
+
20
+ CMD bash startup.min.sh
LICENSE ADDED
@@ -0,0 +1,661 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ GNU AFFERO GENERAL PUBLIC LICENSE
2
+ Version 3, 19 November 2007
3
+
4
+ Copyright (C) 2007 Free Software Foundation, Inc. <https://fsf.org/>
5
+ Everyone is permitted to copy and distribute verbatim copies
6
+ of this license document, but changing it is not allowed.
7
+
8
+ Preamble
9
+
10
+ The GNU Affero General Public License is a free, copyleft license for
11
+ software and other kinds of works, specifically designed to ensure
12
+ cooperation with the community in the case of network server software.
13
+
14
+ The licenses for most software and other practical works are designed
15
+ to take away your freedom to share and change the works. By contrast,
16
+ our General Public Licenses are intended to guarantee your freedom to
17
+ share and change all versions of a program--to make sure it remains free
18
+ software for all its users.
19
+
20
+ When we speak of free software, we are referring to freedom, not
21
+ price. Our General Public Licenses are designed to make sure that you
22
+ have the freedom to distribute copies of free software (and charge for
23
+ them if you wish), that you receive source code or can get it if you
24
+ want it, that you can change the software or use pieces of it in new
25
+ free programs, and that you know you can do these things.
26
+
27
+ Developers that use our General Public Licenses protect your rights
28
+ with two steps: (1) assert copyright on the software, and (2) offer
29
+ you this License which gives you legal permission to copy, distribute
30
+ and/or modify the software.
31
+
32
+ A secondary benefit of defending all users' freedom is that
33
+ improvements made in alternate versions of the program, if they
34
+ receive widespread use, become available for other developers to
35
+ incorporate. Many developers of free software are heartened and
36
+ encouraged by the resulting cooperation. However, in the case of
37
+ software used on network servers, this result may fail to come about.
38
+ The GNU General Public License permits making a modified version and
39
+ letting the public access it on a server without ever releasing its
40
+ source code to the public.
41
+
42
+ The GNU Affero General Public License is designed specifically to
43
+ ensure that, in such cases, the modified source code becomes available
44
+ to the community. It requires the operator of a network server to
45
+ provide the source code of the modified version running there to the
46
+ users of that server. Therefore, public use of a modified version, on
47
+ a publicly accessible server, gives the public access to the source
48
+ code of the modified version.
49
+
50
+ An older license, called the Affero General Public License and
51
+ published by Affero, was designed to accomplish similar goals. This is
52
+ a different license, not a version of the Affero GPL, but Affero has
53
+ released a new version of the Affero GPL which permits relicensing under
54
+ this license.
55
+
56
+ The precise terms and conditions for copying, distribution and
57
+ modification follow.
58
+
59
+ TERMS AND CONDITIONS
60
+
61
+ 0. Definitions.
62
+
63
+ "This License" refers to version 3 of the GNU Affero General Public License.
64
+
65
+ "Copyright" also means copyright-like laws that apply to other kinds of
66
+ works, such as semiconductor masks.
67
+
68
+ "The Program" refers to any copyrightable work licensed under this
69
+ License. Each licensee is addressed as "you". "Licensees" and
70
+ "recipients" may be individuals or organizations.
71
+
72
+ To "modify" a work means to copy from or adapt all or part of the work
73
+ in a fashion requiring copyright permission, other than the making of an
74
+ exact copy. The resulting work is called a "modified version" of the
75
+ earlier work or a work "based on" the earlier work.
76
+
77
+ A "covered work" means either the unmodified Program or a work based
78
+ on the Program.
79
+
80
+ To "propagate" a work means to do anything with it that, without
81
+ permission, would make you directly or secondarily liable for
82
+ infringement under applicable copyright law, except executing it on a
83
+ computer or modifying a private copy. Propagation includes copying,
84
+ distribution (with or without modification), making available to the
85
+ public, and in some countries other activities as well.
86
+
87
+ To "convey" a work means any kind of propagation that enables other
88
+ parties to make or receive copies. Mere interaction with a user through
89
+ a computer network, with no transfer of a copy, is not conveying.
90
+
91
+ An interactive user interface displays "Appropriate Legal Notices"
92
+ to the extent that it includes a convenient and prominently visible
93
+ feature that (1) displays an appropriate copyright notice, and (2)
94
+ tells the user that there is no warranty for the work (except to the
95
+ extent that warranties are provided), that licensees may convey the
96
+ work under this License, and how to view a copy of this License. If
97
+ the interface presents a list of user commands or options, such as a
98
+ menu, a prominent item in the list meets this criterion.
99
+
100
+ 1. Source Code.
101
+
102
+ The "source code" for a work means the preferred form of the work
103
+ for making modifications to it. "Object code" means any non-source
104
+ form of a work.
105
+
106
+ A "Standard Interface" means an interface that either is an official
107
+ standard defined by a recognized standards body, or, in the case of
108
+ interfaces specified for a particular programming language, one that
109
+ is widely used among developers working in that language.
110
+
111
+ The "System Libraries" of an executable work include anything, other
112
+ than the work as a whole, that (a) is included in the normal form of
113
+ packaging a Major Component, but which is not part of that Major
114
+ Component, and (b) serves only to enable use of the work with that
115
+ Major Component, or to implement a Standard Interface for which an
116
+ implementation is available to the public in source code form. A
117
+ "Major Component", in this context, means a major essential component
118
+ (kernel, window system, and so on) of the specific operating system
119
+ (if any) on which the executable work runs, or a compiler used to
120
+ produce the work, or an object code interpreter used to run it.
121
+
122
+ The "Corresponding Source" for a work in object code form means all
123
+ the source code needed to generate, install, and (for an executable
124
+ work) run the object code and to modify the work, including scripts to
125
+ control those activities. However, it does not include the work's
126
+ System Libraries, or general-purpose tools or generally available free
127
+ programs which are used unmodified in performing those activities but
128
+ which are not part of the work. For example, Corresponding Source
129
+ includes interface definition files associated with source files for
130
+ the work, and the source code for shared libraries and dynamically
131
+ linked subprograms that the work is specifically designed to require,
132
+ such as by intimate data communication or control flow between those
133
+ subprograms and other parts of the work.
134
+
135
+ The Corresponding Source need not include anything that users
136
+ can regenerate automatically from other parts of the Corresponding
137
+ Source.
138
+
139
+ The Corresponding Source for a work in source code form is that
140
+ same work.
141
+
142
+ 2. Basic Permissions.
143
+
144
+ All rights granted under this License are granted for the term of
145
+ copyright on the Program, and are irrevocable provided the stated
146
+ conditions are met. This License explicitly affirms your unlimited
147
+ permission to run the unmodified Program. The output from running a
148
+ covered work is covered by this License only if the output, given its
149
+ content, constitutes a covered work. This License acknowledges your
150
+ rights of fair use or other equivalent, as provided by copyright law.
151
+
152
+ You may make, run and propagate covered works that you do not
153
+ convey, without conditions so long as your license otherwise remains
154
+ in force. You may convey covered works to others for the sole purpose
155
+ of having them make modifications exclusively for you, or provide you
156
+ with facilities for running those works, provided that you comply with
157
+ the terms of this License in conveying all material for which you do
158
+ not control copyright. Those thus making or running the covered works
159
+ for you must do so exclusively on your behalf, under your direction
160
+ and control, on terms that prohibit them from making any copies of
161
+ your copyrighted material outside their relationship with you.
162
+
163
+ Conveying under any other circumstances is permitted solely under
164
+ the conditions stated below. Sublicensing is not allowed; section 10
165
+ makes it unnecessary.
166
+
167
+ 3. Protecting Users' Legal Rights From Anti-Circumvention Law.
168
+
169
+ No covered work shall be deemed part of an effective technological
170
+ measure under any applicable law fulfilling obligations under article
171
+ 11 of the WIPO copyright treaty adopted on 20 December 1996, or
172
+ similar laws prohibiting or restricting circumvention of such
173
+ measures.
174
+
175
+ When you convey a covered work, you waive any legal power to forbid
176
+ circumvention of technological measures to the extent such circumvention
177
+ is effected by exercising rights under this License with respect to
178
+ the covered work, and you disclaim any intention to limit operation or
179
+ modification of the work as a means of enforcing, against the work's
180
+ users, your or third parties' legal rights to forbid circumvention of
181
+ technological measures.
182
+
183
+ 4. Conveying Verbatim Copies.
184
+
185
+ You may convey verbatim copies of the Program's source code as you
186
+ receive it, in any medium, provided that you conspicuously and
187
+ appropriately publish on each copy an appropriate copyright notice;
188
+ keep intact all notices stating that this License and any
189
+ non-permissive terms added in accord with section 7 apply to the code;
190
+ keep intact all notices of the absence of any warranty; and give all
191
+ recipients a copy of this License along with the Program.
192
+
193
+ You may charge any price or no price for each copy that you convey,
194
+ and you may offer support or warranty protection for a fee.
195
+
196
+ 5. Conveying Modified Source Versions.
197
+
198
+ You may convey a work based on the Program, or the modifications to
199
+ produce it from the Program, in the form of source code under the
200
+ terms of section 4, provided that you also meet all of these conditions:
201
+
202
+ a) The work must carry prominent notices stating that you modified
203
+ it, and giving a relevant date.
204
+
205
+ b) The work must carry prominent notices stating that it is
206
+ released under this License and any conditions added under section
207
+ 7. This requirement modifies the requirement in section 4 to
208
+ "keep intact all notices".
209
+
210
+ c) You must license the entire work, as a whole, under this
211
+ License to anyone who comes into possession of a copy. This
212
+ License will therefore apply, along with any applicable section 7
213
+ additional terms, to the whole of the work, and all its parts,
214
+ regardless of how they are packaged. This License gives no
215
+ permission to license the work in any other way, but it does not
216
+ invalidate such permission if you have separately received it.
217
+
218
+ d) If the work has interactive user interfaces, each must display
219
+ Appropriate Legal Notices; however, if the Program has interactive
220
+ interfaces that do not display Appropriate Legal Notices, your
221
+ work need not make them do so.
222
+
223
+ A compilation of a covered work with other separate and independent
224
+ works, which are not by their nature extensions of the covered work,
225
+ and which are not combined with it such as to form a larger program,
226
+ in or on a volume of a storage or distribution medium, is called an
227
+ "aggregate" if the compilation and its resulting copyright are not
228
+ used to limit the access or legal rights of the compilation's users
229
+ beyond what the individual works permit. Inclusion of a covered work
230
+ in an aggregate does not cause this License to apply to the other
231
+ parts of the aggregate.
232
+
233
+ 6. Conveying Non-Source Forms.
234
+
235
+ You may convey a covered work in object code form under the terms
236
+ of sections 4 and 5, provided that you also convey the
237
+ machine-readable Corresponding Source under the terms of this License,
238
+ in one of these ways:
239
+
240
+ a) Convey the object code in, or embodied in, a physical product
241
+ (including a physical distribution medium), accompanied by the
242
+ Corresponding Source fixed on a durable physical medium
243
+ customarily used for software interchange.
244
+
245
+ b) Convey the object code in, or embodied in, a physical product
246
+ (including a physical distribution medium), accompanied by a
247
+ written offer, valid for at least three years and valid for as
248
+ long as you offer spare parts or customer support for that product
249
+ model, to give anyone who possesses the object code either (1) a
250
+ copy of the Corresponding Source for all the software in the
251
+ product that is covered by this License, on a durable physical
252
+ medium customarily used for software interchange, for a price no
253
+ more than your reasonable cost of physically performing this
254
+ conveying of source, or (2) access to copy the
255
+ Corresponding Source from a network server at no charge.
256
+
257
+ c) Convey individual copies of the object code with a copy of the
258
+ written offer to provide the Corresponding Source. This
259
+ alternative is allowed only occasionally and noncommercially, and
260
+ only if you received the object code with such an offer, in accord
261
+ with subsection 6b.
262
+
263
+ d) Convey the object code by offering access from a designated
264
+ place (gratis or for a charge), and offer equivalent access to the
265
+ Corresponding Source in the same way through the same place at no
266
+ further charge. You need not require recipients to copy the
267
+ Corresponding Source along with the object code. If the place to
268
+ copy the object code is a network server, the Corresponding Source
269
+ may be on a different server (operated by you or a third party)
270
+ that supports equivalent copying facilities, provided you maintain
271
+ clear directions next to the object code saying where to find the
272
+ Corresponding Source. Regardless of what server hosts the
273
+ Corresponding Source, you remain obligated to ensure that it is
274
+ available for as long as needed to satisfy these requirements.
275
+
276
+ e) Convey the object code using peer-to-peer transmission, provided
277
+ you inform other peers where the object code and Corresponding
278
+ Source of the work are being offered to the general public at no
279
+ charge under subsection 6d.
280
+
281
+ A separable portion of the object code, whose source code is excluded
282
+ from the Corresponding Source as a System Library, need not be
283
+ included in conveying the object code work.
284
+
285
+ A "User Product" is either (1) a "consumer product", which means any
286
+ tangible personal property which is normally used for personal, family,
287
+ or household purposes, or (2) anything designed or sold for incorporation
288
+ into a dwelling. In determining whether a product is a consumer product,
289
+ doubtful cases shall be resolved in favor of coverage. For a particular
290
+ product received by a particular user, "normally used" refers to a
291
+ typical or common use of that class of product, regardless of the status
292
+ of the particular user or of the way in which the particular user
293
+ actually uses, or expects or is expected to use, the product. A product
294
+ is a consumer product regardless of whether the product has substantial
295
+ commercial, industrial or non-consumer uses, unless such uses represent
296
+ the only significant mode of use of the product.
297
+
298
+ "Installation Information" for a User Product means any methods,
299
+ procedures, authorization keys, or other information required to install
300
+ and execute modified versions of a covered work in that User Product from
301
+ a modified version of its Corresponding Source. The information must
302
+ suffice to ensure that the continued functioning of the modified object
303
+ code is in no case prevented or interfered with solely because
304
+ modification has been made.
305
+
306
+ If you convey an object code work under this section in, or with, or
307
+ specifically for use in, a User Product, and the conveying occurs as
308
+ part of a transaction in which the right of possession and use of the
309
+ User Product is transferred to the recipient in perpetuity or for a
310
+ fixed term (regardless of how the transaction is characterized), the
311
+ Corresponding Source conveyed under this section must be accompanied
312
+ by the Installation Information. But this requirement does not apply
313
+ if neither you nor any third party retains the ability to install
314
+ modified object code on the User Product (for example, the work has
315
+ been installed in ROM).
316
+
317
+ The requirement to provide Installation Information does not include a
318
+ requirement to continue to provide support service, warranty, or updates
319
+ for a work that has been modified or installed by the recipient, or for
320
+ the User Product in which it has been modified or installed. Access to a
321
+ network may be denied when the modification itself materially and
322
+ adversely affects the operation of the network or violates the rules and
323
+ protocols for communication across the network.
324
+
325
+ Corresponding Source conveyed, and Installation Information provided,
326
+ in accord with this section must be in a format that is publicly
327
+ documented (and with an implementation available to the public in
328
+ source code form), and must require no special password or key for
329
+ unpacking, reading or copying.
330
+
331
+ 7. Additional Terms.
332
+
333
+ "Additional permissions" are terms that supplement the terms of this
334
+ License by making exceptions from one or more of its conditions.
335
+ Additional permissions that are applicable to the entire Program shall
336
+ be treated as though they were included in this License, to the extent
337
+ that they are valid under applicable law. If additional permissions
338
+ apply only to part of the Program, that part may be used separately
339
+ under those permissions, but the entire Program remains governed by
340
+ this License without regard to the additional permissions.
341
+
342
+ When you convey a copy of a covered work, you may at your option
343
+ remove any additional permissions from that copy, or from any part of
344
+ it. (Additional permissions may be written to require their own
345
+ removal in certain cases when you modify the work.) You may place
346
+ additional permissions on material, added by you to a covered work,
347
+ for which you have or can give appropriate copyright permission.
348
+
349
+ Notwithstanding any other provision of this License, for material you
350
+ add to a covered work, you may (if authorized by the copyright holders of
351
+ that material) supplement the terms of this License with terms:
352
+
353
+ a) Disclaiming warranty or limiting liability differently from the
354
+ terms of sections 15 and 16 of this License; or
355
+
356
+ b) Requiring preservation of specified reasonable legal notices or
357
+ author attributions in that material or in the Appropriate Legal
358
+ Notices displayed by works containing it; or
359
+
360
+ c) Prohibiting misrepresentation of the origin of that material, or
361
+ requiring that modified versions of such material be marked in
362
+ reasonable ways as different from the original version; or
363
+
364
+ d) Limiting the use for publicity purposes of names of licensors or
365
+ authors of the material; or
366
+
367
+ e) Declining to grant rights under trademark law for use of some
368
+ trade names, trademarks, or service marks; or
369
+
370
+ f) Requiring indemnification of licensors and authors of that
371
+ material by anyone who conveys the material (or modified versions of
372
+ it) with contractual assumptions of liability to the recipient, for
373
+ any liability that these contractual assumptions directly impose on
374
+ those licensors and authors.
375
+
376
+ All other non-permissive additional terms are considered "further
377
+ restrictions" within the meaning of section 10. If the Program as you
378
+ received it, or any part of it, contains a notice stating that it is
379
+ governed by this License along with a term that is a further
380
+ restriction, you may remove that term. If a license document contains
381
+ a further restriction but permits relicensing or conveying under this
382
+ License, you may add to a covered work material governed by the terms
383
+ of that license document, provided that the further restriction does
384
+ not survive such relicensing or conveying.
385
+
386
+ If you add terms to a covered work in accord with this section, you
387
+ must place, in the relevant source files, a statement of the
388
+ additional terms that apply to those files, or a notice indicating
389
+ where to find the applicable terms.
390
+
391
+ Additional terms, permissive or non-permissive, may be stated in the
392
+ form of a separately written license, or stated as exceptions;
393
+ the above requirements apply either way.
394
+
395
+ 8. Termination.
396
+
397
+ You may not propagate or modify a covered work except as expressly
398
+ provided under this License. Any attempt otherwise to propagate or
399
+ modify it is void, and will automatically terminate your rights under
400
+ this License (including any patent licenses granted under the third
401
+ paragraph of section 11).
402
+
403
+ However, if you cease all violation of this License, then your
404
+ license from a particular copyright holder is reinstated (a)
405
+ provisionally, unless and until the copyright holder explicitly and
406
+ finally terminates your license, and (b) permanently, if the copyright
407
+ holder fails to notify you of the violation by some reasonable means
408
+ prior to 60 days after the cessation.
409
+
410
+ Moreover, your license from a particular copyright holder is
411
+ reinstated permanently if the copyright holder notifies you of the
412
+ violation by some reasonable means, this is the first time you have
413
+ received notice of violation of this License (for any work) from that
414
+ copyright holder, and you cure the violation prior to 30 days after
415
+ your receipt of the notice.
416
+
417
+ Termination of your rights under this section does not terminate the
418
+ licenses of parties who have received copies or rights from you under
419
+ this License. If your rights have been terminated and not permanently
420
+ reinstated, you do not qualify to receive new licenses for the same
421
+ material under section 10.
422
+
423
+ 9. Acceptance Not Required for Having Copies.
424
+
425
+ You are not required to accept this License in order to receive or
426
+ run a copy of the Program. Ancillary propagation of a covered work
427
+ occurring solely as a consequence of using peer-to-peer transmission
428
+ to receive a copy likewise does not require acceptance. However,
429
+ nothing other than this License grants you permission to propagate or
430
+ modify any covered work. These actions infringe copyright if you do
431
+ not accept this License. Therefore, by modifying or propagating a
432
+ covered work, you indicate your acceptance of this License to do so.
433
+
434
+ 10. Automatic Licensing of Downstream Recipients.
435
+
436
+ Each time you convey a covered work, the recipient automatically
437
+ receives a license from the original licensors, to run, modify and
438
+ propagate that work, subject to this License. You are not responsible
439
+ for enforcing compliance by third parties with this License.
440
+
441
+ An "entity transaction" is a transaction transferring control of an
442
+ organization, or substantially all assets of one, or subdividing an
443
+ organization, or merging organizations. If propagation of a covered
444
+ work results from an entity transaction, each party to that
445
+ transaction who receives a copy of the work also receives whatever
446
+ licenses to the work the party's predecessor in interest had or could
447
+ give under the previous paragraph, plus a right to possession of the
448
+ Corresponding Source of the work from the predecessor in interest, if
449
+ the predecessor has it or can get it with reasonable efforts.
450
+
451
+ You may not impose any further restrictions on the exercise of the
452
+ rights granted or affirmed under this License. For example, you may
453
+ not impose a license fee, royalty, or other charge for exercise of
454
+ rights granted under this License, and you may not initiate litigation
455
+ (including a cross-claim or counterclaim in a lawsuit) alleging that
456
+ any patent claim is infringed by making, using, selling, offering for
457
+ sale, or importing the Program or any portion of it.
458
+
459
+ 11. Patents.
460
+
461
+ A "contributor" is a copyright holder who authorizes use under this
462
+ License of the Program or a work on which the Program is based. The
463
+ work thus licensed is called the contributor's "contributor version".
464
+
465
+ A contributor's "essential patent claims" are all patent claims
466
+ owned or controlled by the contributor, whether already acquired or
467
+ hereafter acquired, that would be infringed by some manner, permitted
468
+ by this License, of making, using, or selling its contributor version,
469
+ but do not include claims that would be infringed only as a
470
+ consequence of further modification of the contributor version. For
471
+ purposes of this definition, "control" includes the right to grant
472
+ patent sublicenses in a manner consistent with the requirements of
473
+ this License.
474
+
475
+ Each contributor grants you a non-exclusive, worldwide, royalty-free
476
+ patent license under the contributor's essential patent claims, to
477
+ make, use, sell, offer for sale, import and otherwise run, modify and
478
+ propagate the contents of its contributor version.
479
+
480
+ In the following three paragraphs, a "patent license" is any express
481
+ agreement or commitment, however denominated, not to enforce a patent
482
+ (such as an express permission to practice a patent or covenant not to
483
+ sue for patent infringement). To "grant" such a patent license to a
484
+ party means to make such an agreement or commitment not to enforce a
485
+ patent against the party.
486
+
487
+ If you convey a covered work, knowingly relying on a patent license,
488
+ and the Corresponding Source of the work is not available for anyone
489
+ to copy, free of charge and under the terms of this License, through a
490
+ publicly available network server or other readily accessible means,
491
+ then you must either (1) cause the Corresponding Source to be so
492
+ available, or (2) arrange to deprive yourself of the benefit of the
493
+ patent license for this particular work, or (3) arrange, in a manner
494
+ consistent with the requirements of this License, to extend the patent
495
+ license to downstream recipients. "Knowingly relying" means you have
496
+ actual knowledge that, but for the patent license, your conveying the
497
+ covered work in a country, or your recipient's use of the covered work
498
+ in a country, would infringe one or more identifiable patents in that
499
+ country that you have reason to believe are valid.
500
+
501
+ If, pursuant to or in connection with a single transaction or
502
+ arrangement, you convey, or propagate by procuring conveyance of, a
503
+ covered work, and grant a patent license to some of the parties
504
+ receiving the covered work authorizing them to use, propagate, modify
505
+ or convey a specific copy of the covered work, then the patent license
506
+ you grant is automatically extended to all recipients of the covered
507
+ work and works based on it.
508
+
509
+ A patent license is "discriminatory" if it does not include within
510
+ the scope of its coverage, prohibits the exercise of, or is
511
+ conditioned on the non-exercise of one or more of the rights that are
512
+ specifically granted under this License. You may not convey a covered
513
+ work if you are a party to an arrangement with a third party that is
514
+ in the business of distributing software, under which you make payment
515
+ to the third party based on the extent of your activity of conveying
516
+ the work, and under which the third party grants, to any of the
517
+ parties who would receive the covered work from you, a discriminatory
518
+ patent license (a) in connection with copies of the covered work
519
+ conveyed by you (or copies made from those copies), or (b) primarily
520
+ for and in connection with specific products or compilations that
521
+ contain the covered work, unless you entered into that arrangement,
522
+ or that patent license was granted, prior to 28 March 2007.
523
+
524
+ Nothing in this License shall be construed as excluding or limiting
525
+ any implied license or other defenses to infringement that may
526
+ otherwise be available to you under applicable patent law.
527
+
528
+ 12. No Surrender of Others' Freedom.
529
+
530
+ If conditions are imposed on you (whether by court order, agreement or
531
+ otherwise) that contradict the conditions of this License, they do not
532
+ excuse you from the conditions of this License. If you cannot convey a
533
+ covered work so as to satisfy simultaneously your obligations under this
534
+ License and any other pertinent obligations, then as a consequence you may
535
+ not convey it at all. For example, if you agree to terms that obligate you
536
+ to collect a royalty for further conveying from those to whom you convey
537
+ the Program, the only way you could satisfy both those terms and this
538
+ License would be to refrain entirely from conveying the Program.
539
+
540
+ 13. Remote Network Interaction; Use with the GNU General Public License.
541
+
542
+ Notwithstanding any other provision of this License, if you modify the
543
+ Program, your modified version must prominently offer all users
544
+ interacting with it remotely through a computer network (if your version
545
+ supports such interaction) an opportunity to receive the Corresponding
546
+ Source of your version by providing access to the Corresponding Source
547
+ from a network server at no charge, through some standard or customary
548
+ means of facilitating copying of software. This Corresponding Source
549
+ shall include the Corresponding Source for any work covered by version 3
550
+ of the GNU General Public License that is incorporated pursuant to the
551
+ following paragraph.
552
+
553
+ Notwithstanding any other provision of this License, you have
554
+ permission to link or combine any covered work with a work licensed
555
+ under version 3 of the GNU General Public License into a single
556
+ combined work, and to convey the resulting work. The terms of this
557
+ License will continue to apply to the part which is the covered work,
558
+ but the work with which it is combined will remain governed by version
559
+ 3 of the GNU General Public License.
560
+
561
+ 14. Revised Versions of this License.
562
+
563
+ The Free Software Foundation may publish revised and/or new versions of
564
+ the GNU Affero General Public License from time to time. Such new versions
565
+ will be similar in spirit to the present version, but may differ in detail to
566
+ address new problems or concerns.
567
+
568
+ Each version is given a distinguishing version number. If the
569
+ Program specifies that a certain numbered version of the GNU Affero General
570
+ Public License "or any later version" applies to it, you have the
571
+ option of following the terms and conditions either of that numbered
572
+ version or of any later version published by the Free Software
573
+ Foundation. If the Program does not specify a version number of the
574
+ GNU Affero General Public License, you may choose any version ever published
575
+ by the Free Software Foundation.
576
+
577
+ If the Program specifies that a proxy can decide which future
578
+ versions of the GNU Affero General Public License can be used, that proxy's
579
+ public statement of acceptance of a version permanently authorizes you
580
+ to choose that version for the Program.
581
+
582
+ Later license versions may give you additional or different
583
+ permissions. However, no additional obligations are imposed on any
584
+ author or copyright holder as a result of your choosing to follow a
585
+ later version.
586
+
587
+ 15. Disclaimer of Warranty.
588
+
589
+ THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
590
+ APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
591
+ HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
592
+ OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
593
+ THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
594
+ PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
595
+ IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
596
+ ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
597
+
598
+ 16. Limitation of Liability.
599
+
600
+ IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
601
+ WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
602
+ THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
603
+ GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
604
+ USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
605
+ DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
606
+ PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
607
+ EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
608
+ SUCH DAMAGES.
609
+
610
+ 17. Interpretation of Sections 15 and 16.
611
+
612
+ If the disclaimer of warranty and limitation of liability provided
613
+ above cannot be given local legal effect according to their terms,
614
+ reviewing courts shall apply local law that most closely approximates
615
+ an absolute waiver of all civil liability in connection with the
616
+ Program, unless a warranty or assumption of liability accompanies a
617
+ copy of the Program in return for a fee.
618
+
619
+ END OF TERMS AND CONDITIONS
620
+
621
+ How to Apply These Terms to Your New Programs
622
+
623
+ If you develop a new program, and you want it to be of the greatest
624
+ possible use to the public, the best way to achieve this is to make it
625
+ free software which everyone can redistribute and change under these terms.
626
+
627
+ To do so, attach the following notices to the program. It is safest
628
+ to attach them to the start of each source file to most effectively
629
+ state the exclusion of warranty; and each file should have at least
630
+ the "copyright" line and a pointer to where the full notice is found.
631
+
632
+ <one line to give the program's name and a brief idea of what it does.>
633
+ Copyright (C) <year> <name of author>
634
+
635
+ This program is free software: you can redistribute it and/or modify
636
+ it under the terms of the GNU Affero General Public License as published
637
+ by the Free Software Foundation, either version 3 of the License, or
638
+ (at your option) any later version.
639
+
640
+ This program is distributed in the hope that it will be useful,
641
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
642
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
643
+ GNU Affero General Public License for more details.
644
+
645
+ You should have received a copy of the GNU Affero General Public License
646
+ along with this program. If not, see <https://www.gnu.org/licenses/>.
647
+
648
+ Also add information on how to contact you by electronic and paper mail.
649
+
650
+ If your software can interact with users remotely through a computer
651
+ network, you should also make sure that it provides a way for users to
652
+ get its source. For example, if your program is a web application, its
653
+ interface could display a "Source" link that leads users to an archive
654
+ of the code. There are many ways you could offer source, and different
655
+ solutions will be better for different programs; see section 13 for the
656
+ specific requirements.
657
+
658
+ You should also get your employer (if you work as a programmer) or school,
659
+ if any, to sign a "copyright disclaimer" for the program, if necessary.
660
+ For more information on this, and how to apply and follow the GNU AGPL, see
661
+ <https://www.gnu.org/licenses/>.
README.md CHANGED
@@ -1,11 +1,397 @@
1
- ---
2
- title: TTS OPENAI FREE
3
- emoji: 👀
4
- colorFrom: indigo
5
- colorTo: blue
6
- sdk: docker
7
- pinned: false
8
- license: mit
9
- ---
10
-
11
- Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # OpenedAI Speech
2
+
3
+ An OpenAI API compatible text to speech server.
4
+
5
+ * Compatible with the OpenAI audio/speech API
6
+ * Serves the [/v1/audio/speech endpoint](https://platform.openai.com/docs/api-reference/audio/createSpeech)
7
+ * Not affiliated with OpenAI in any way, does not require an OpenAI API Key
8
+ * A free, private, text-to-speech server with custom voice cloning
9
+
10
+ Full Compatibility:
11
+ * `tts-1`: `alloy`, `echo`, `fable`, `onyx`, `nova`, and `shimmer` (configurable)
12
+ * `tts-1-hd`: `alloy`, `echo`, `fable`, `onyx`, `nova`, and `shimmer` (configurable, uses OpenAI samples by default)
13
+ * response_format: `mp3`, `opus`, `aac`, `flac`, `wav` and `pcm`
14
+ * speed 0.25-4.0 (and more)
15
+
16
+ Details:
17
+ * Model `tts-1` via [piper tts](https://github.com/rhasspy/piper) (very fast, runs on cpu)
18
+ * You can map your own [piper voices](https://rhasspy.github.io/piper-samples/) via the `voice_to_speaker.yaml` configuration file
19
+ * Model `tts-1-hd` via [coqui-ai/TTS](https://github.com/coqui-ai/TTS) xtts_v2 voice cloning (fast, but requires around 4GB GPU VRAM)
20
+ * Custom cloned voices can be used for tts-1-hd, See: [Custom Voices Howto](#custom-voices-howto)
21
+ * 🌐 [Multilingual](#multilingual) support with XTTS voices, the language is automatically detected if not set
22
+ * [Custom fine-tuned XTTS model support](#custom-fine-tuned-model-support)
23
+ * Configurable [generation parameters](#generation-parameters)
24
+ * Streamed output while generating
25
+ * Occasionally, certain words or symbols may sound incorrect, you can fix them with regex via `pre_process_map.yaml`
26
+ * Tested with python 3.9-3.11, piper does not install on python 3.12 yet
27
+
28
+
29
+ If you find a better voice match for `tts-1` or `tts-1-hd`, please let me know so I can update the defaults.
30
+
31
+ ## Recent Changes
32
+
33
+ Version 0.17.2, 2024-07-01
34
+
35
+ * fix -min image (re: langdetect)
36
+
37
+ Version 0.17.1, 2024-07-01
38
+
39
+ * fix ROCm (add langdetect to requirements-rocm.txt)
40
+ * Fix zh-cn for xtts
41
+
42
+ Version 0.17.0, 2024-07-01
43
+
44
+ * Automatic language detection, thanks [@RodolfoCastanheira](https://github.com/RodolfoCastanheira)
45
+
46
+ Version 0.16.0, 2024-06-29
47
+
48
+ * Multi-client safe version. Audio generation is synchronized in a single process. The estimated 'realtime' factor of XTTS on a GPU is roughly 1/3, this means that multiple streams simultaneously, or `speed` over 2, may experience audio underrun (delays or pauses in playback). This makes multiple clients possible and safe, but in practice 2 or 3 simultaneous streams is the maximum without audio underrun.
49
+
50
+ Version 0.15.1, 2024-06-27
51
+
52
+ * Remove deepspeed from requirements.txt, it's too complex for typical users. A more detailed deepspeed install document will be required.
53
+
54
+ Version 0.15.0, 2024-06-26
55
+
56
+ * Switch to [coqui-tts](https://github.com/idiap/coqui-ai-TTS) (updated fork), updated simpler dependencies, torch 2.3, etc.
57
+ * Resolve cuda threading issues
58
+
59
+ Version 0.14.1, 2024-06-26
60
+
61
+ * Make deepspeed possible (`--use-deepspeed`), but not enabled in pre-built docker images (too large). Requires the cuda-toolkit installed, see the Dockerfile comment for details
62
+
63
+ Version 0.14.0, 2024-06-26
64
+
65
+ * Added `response_format`: `wav` and `pcm` support
66
+ * Output streaming (while generating) for `tts-1` and `tts-1-hd`
67
+ * Enhanced [generation parameters](#generation-parameters) for xtts models (temperature, top_p, etc.)
68
+ * Idle unload timer (optional) - doesn't work perfectly yet
69
+ * Improved error handling
70
+
71
+ Version 0.13.0, 2024-06-25
72
+
73
+ * Added [Custom fine-tuned XTTS model support](#custom-fine-tuned-model-support)
74
+ * Initial prebuilt arm64 image support (Apple M-series, Raspberry Pi - MPS is not supported in XTTS/torch), thanks [@JakeStevenson](https://github.com/JakeStevenson), [@hchasens](https://github.com/hchasens)
75
+ * Initial attempt at AMD GPU (ROCm 5.7) support
76
+ * Parler-tts support removed
77
+ * Move the *.default.yaml to the root folder
78
+ * Run the docker as a service by default (`restart: unless-stopped`)
79
+ * Added `audio_reader.py` for streaming text input and reading long texts
80
+
81
+ Version 0.12.3, 2024-06-17
82
+
83
+ * Additional logging details for BadRequests (400)
84
+
85
+ Version 0.12.2, 2024-06-16
86
+
87
+ * Fix :min image requirements (numpy<2?)
88
+
89
+ Version 0.12.0, 2024-06-16
90
+
91
+ * Improved error handling and logging
92
+ * Restore the original alloy tts-1-hd voice by default, use alloy-alt for the old voice.
93
+
94
+ Version 0.11.0, 2024-05-29
95
+
96
+ * 🌐 [Multilingual](#multilingual) support (16 languages) with XTTS
97
+ * Remove high Unicode filtering from the default `config/pre_process_map.yaml`
98
+ * Update Docker build & app startup. thanks @justinh-rahb
99
+ * Fix: "Plan failed with a cudnnException"
100
+ * Remove piper cuda support
101
+
102
+ Version: 0.10.1, 2024-05-05
103
+
104
+ * Remove `runtime: nvidia` from docker-compose.yml, this assumes nvidia/cuda compatible runtime is available by default. thanks [@jmtatsch](https://github.com/jmtatsch)
105
+
106
+ Version: 0.10.0, 2024-04-27
107
+
108
+ * Pre-built & tested docker images, smaller docker images (8GB or 860MB)
109
+ * Better upgrades: reorganize config files under `config/`, voice models under `voices/`
110
+ * **Compatibility!** If you customized your `voice_to_speaker.yaml` or `pre_process_map.yaml` you need to move them to the `config/` folder.
111
+ * default listen host to 0.0.0.0
112
+
113
+ Version: 0.9.0, 2024-04-23
114
+
115
+ * Fix bug with yaml and loading UTF-8
116
+ * New sample text-to-speech application `say.py`
117
+ * Smaller docker base image
118
+ * Add beta [parler-tts](https://huggingface.co/parler-tts/parler_tts_mini_v0.1) support (you can describe very basic features of the speaker voice), See: (https://www.text-description-to-speech.com/) for some examples of how to describe voices. Voices can be defined in the `voice_to_speaker.default.yaml`. Two example [parler-tts](https://huggingface.co/parler-tts/parler_tts_mini_v0.1) voices are included in the `voice_to_speaker.default.yaml` file. `parler-tts` is experimental software and is kind of slow. The exact voice will be slightly different each generation but should be similar to the basic description.
119
+
120
+ ...
121
+
122
+ Version: 0.7.3, 2024-03-20
123
+
124
+ * Allow different xtts versions per voice in `voice_to_speaker.yaml`, ex. xtts_v2.0.2
125
+ * Quality: Fix xtts sample rate (24000 vs. 22050 for piper) and pops
126
+
127
+
128
+ ## Installation instructions
129
+
130
+ ### Create a `speech.env` environment file
131
+
132
+ Copy the `sample.env` to `speech.env` (customize if needed)
133
+ ```bash
134
+ cp sample.env speech.env
135
+ ```
136
+
137
+ #### Defaults
138
+ ```bash
139
+ TTS_HOME=voices
140
+ HF_HOME=voices
141
+ #PRELOAD_MODEL=xtts
142
+ #PRELOAD_MODEL=xtts_v2.0.2
143
+ #EXTRA_ARGS=--log-level DEBUG --unload-timer 300
144
+ #USE_ROCM=1
145
+ ```
146
+
147
+ ### Option A: Manual installation
148
+ ```shell
149
+ # install curl and ffmpeg
150
+ sudo apt install curl ffmpeg
151
+ # Create & activate a new virtual environment (optional but recommended)
152
+ python -m venv .venv
153
+ source .venv/bin/activate
154
+ # Install the Python requirements
155
+ # - use requirements-rocm.txt for AMD GPU (ROCm support)
156
+ # - use requirements-min.txt for piper only (CPU only)
157
+ pip install -U -r requirements.txt
158
+ # run the server
159
+ bash startup.sh
160
+ ```
161
+
162
+ > On first run, the voice models will be downloaded automatically. This might take a while depending on your network connection.
163
+
164
+ ### Option B: Docker Image (*recommended*)
165
+
166
+ #### Nvidia GPU (cuda)
167
+
168
+ ```shell
169
+ docker compose up
170
+ ```
171
+
172
+ #### AMD GPU (ROCm support)
173
+
174
+ ```shell
175
+ docker compose -f docker-compose.rocm.yml up
176
+ ```
177
+
178
+ #### ARM64 (Apple M-series, Raspberry Pi)
179
+
180
+ > XTTS only has CPU support here and will be very slow, you can use the Nvidia image for XTTS with CPU (slow), or use the piper only image (recommended)
181
+
182
+ #### CPU only, No GPU (piper only)
183
+
184
+ > For a minimal docker image with only piper support (<1GB vs. 8GB).
185
+
186
+ ```shell
187
+ docker compose -f docker-compose.min.yml up
188
+ ```
189
+
190
+ ## Server Options
191
+
192
+ ```shell
193
+ usage: speech.py [-h] [--xtts_device XTTS_DEVICE] [--preload PRELOAD] [--unload-timer UNLOAD_TIMER] [--use-deepspeed] [--no-cache-speaker] [-P PORT] [-H HOST]
194
+ [-L {DEBUG,INFO,WARNING,ERROR,CRITICAL}]
195
+
196
+ OpenedAI Speech API Server
197
+
198
+ options:
199
+ -h, --help show this help message and exit
200
+ --xtts_device XTTS_DEVICE
201
+ Set the device for the xtts model. The special value of 'none' will use piper for all models. (default: cuda)
202
+ --preload PRELOAD Preload a model (Ex. 'xtts' or 'xtts_v2.0.2'). By default it's loaded on first use. (default: None)
203
+ --unload-timer UNLOAD_TIMER
204
+ Idle unload timer for the XTTS model in seconds, Ex. 900 for 15 minutes (default: None)
205
+ --use-deepspeed Use deepspeed with xtts (this option is unsupported) (default: False)
206
+ --no-cache-speaker Don't use the speaker wav embeddings cache (default: False)
207
+ -P PORT, --port PORT Server tcp port (default: 8000)
208
+ -H HOST, --host HOST Host to listen on, Ex. 0.0.0.0 (default: 0.0.0.0)
209
+ -L {DEBUG,INFO,WARNING,ERROR,CRITICAL}, --log-level {DEBUG,INFO,WARNING,ERROR,CRITICAL}
210
+ Set the log level (default: INFO)
211
+ ```
212
+
213
+
214
+ ## Sample Usage
215
+
216
+ You can use it like this:
217
+
218
+ ```shell
219
+ curl http://localhost:8000/v1/audio/speech -H "Content-Type: application/json" -d '{
220
+ "model": "tts-1",
221
+ "input": "The quick brown fox jumped over the lazy dog.",
222
+ "voice": "alloy",
223
+ "response_format": "mp3",
224
+ "speed": 1.0
225
+ }' > speech.mp3
226
+ ```
227
+
228
+ Or just like this:
229
+
230
+ ```shell
231
+ curl -s http://localhost:8000/v1/audio/speech -H "Content-Type: application/json" -d '{
232
+ "input": "The quick brown fox jumped over the lazy dog."}' > speech.mp3
233
+ ```
234
+
235
+ Or like this example from the [OpenAI Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech):
236
+
237
+ ```python
238
+ import openai
239
+
240
+ client = openai.OpenAI(
241
+ # This part is not needed if you set these environment variables before import openai
242
+ # export OPENAI_API_KEY=sk-11111111111
243
+ # export OPENAI_BASE_URL=http://localhost:8000/v1
244
+ api_key = "sk-111111111",
245
+ base_url = "http://localhost:8000/v1",
246
+ )
247
+
248
+ with client.audio.speech.with_streaming_response.create(
249
+ model="tts-1",
250
+ voice="alloy",
251
+ input="Today is a wonderful day to build something people love!"
252
+ ) as response:
253
+ response.stream_to_file("speech.mp3")
254
+ ```
255
+
256
+ Also see the `say.py` sample application for an example of how to use the openai-python API.
257
+
258
+ ```shell
259
+ # play the audio, requires 'pip install playsound'
260
+ python say.py -t "The quick brown fox jumped over the lazy dog." -p
261
+ # save to a file in flac format
262
+ python say.py -t "The quick brown fox jumped over the lazy dog." -m tts-1-hd -v onyx -f flac -o fox.flac
263
+ ```
264
+
265
+ You can also try the included `audio_reader.py` for listening to longer text and streamed input.
266
+
267
+ Example usage:
268
+ ```bash
269
+ python audio_reader.py -s 2 < LICENSE # read the software license - fast
270
+ ```
271
+
272
+ ## OpenAI API Documentation and Guide
273
+
274
+ * [OpenAI Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech)
275
+ * [OpenAI API Reference](https://platform.openai.com/docs/api-reference/audio/createSpeech)
276
+
277
+
278
+ ## Custom Voices Howto
279
+
280
+ ### Piper
281
+
282
+ 1. Select the piper voice and model from the [piper samples](https://rhasspy.github.io/piper-samples/)
283
+ 2. Update the `config/voice_to_speaker.yaml` with a new section for the voice, for example:
284
+ ```yaml
285
+ ...
286
+ tts-1:
287
+ ryan:
288
+ model: voices/en_US-ryan-high.onnx
289
+ speaker: # default speaker
290
+ ```
291
+ 3. New models will be downloaded as needed, of you can download them in advance with `download_voices_tts-1.sh`. For example:
292
+ ```shell
293
+ bash download_voices_tts-1.sh en_US-ryan-high
294
+ ```
295
+
296
+ ### Coqui XTTS v2
297
+
298
+ Coqui XTTS v2 voice cloning can work with as little as 6 seconds of clear audio. To create a custom voice clone, you must prepare a WAV file sample of the voice.
299
+
300
+ #### Guidelines for preparing good sample files for Coqui XTTS v2
301
+ * Mono (single channel) 22050 Hz WAV file
302
+ * 6-30 seconds long - longer isn't always better (I've had some good results with as little as 4 seconds)
303
+ * low noise (no hiss or hum)
304
+ * No partial words, breathing, laughing, music or backgrounds sounds
305
+ * An even speaking pace with a variety of words is best, like in interviews or audiobooks.
306
+
307
+ You can use FFmpeg to prepare your audio files, here are some examples:
308
+
309
+ ```shell
310
+ # convert a multi-channel audio file to mono, set sample rate to 22050 hz, trim to 6 seconds, and output as WAV file.
311
+ ffmpeg -i input.mp3 -ac 1 -ar 22050 -t 6 -y me.wav
312
+ # use a simple noise filter to clean up audio, and select a start time start for sampling.
313
+ ffmpeg -i input.wav -af "highpass=f=200, lowpass=f=3000" -ac 1 -ar 22050 -ss 00:13:26.2 -t 6 -y me.wav
314
+ # A more complex noise reduction setup, including volume adjustment
315
+ ffmpeg -i input.mkv -af "highpass=f=200, lowpass=f=3000, volume=5, afftdn=nf=25" -ac 1 -ar 22050 -ss 00:13:26.2 -t 6 -y me.wav
316
+ ```
317
+
318
+ Once your WAV file is prepared, save it in the `/voices/` directory and update the `config/voice_to_speaker.yaml` file with the new file name.
319
+
320
+ For example:
321
+
322
+ ```yaml
323
+ ...
324
+ tts-1-hd:
325
+ me:
326
+ model: xtts
327
+ speaker: voices/me.wav # this could be you
328
+ ```
329
+
330
+ ## Multilingual
331
+
332
+ Multilingual cloning support was added in version 0.11.0 and is available only with the XTTS v2 model. To use multilingual voices with piper simply download a language specific voice.
333
+
334
+ Coqui XTTSv2 has support for multiple languages: English (`en`), Spanish (`es`), French (`fr`), German (`de`), Italian (`it`), Portuguese (`pt`), Polish (`pl`), Turkish (`tr`), Russian (`ru`), Dutch (`nl`), Czech (`cs`), Arabic (`ar`), Chinese (`zh-cn`), Hungarian (`hu`), Korean (`ko`), Japanese (`ja`), and Hindi (`hi`). When not set, an attempt will be made to automatically detect the language, falling back to English (`en`).
335
+
336
+ Unfortunately the OpenAI API does not support language, but you can create your own custom speaker voice and set the language for that.
337
+
338
+ 1) Create the WAV file for your speaker, as in [Custom Voices Howto](#custom-voices-howto)
339
+ 2) Add the voice to `config/voice_to_speaker.yaml` and include the correct Coqui `language` code for the speaker. For example:
340
+
341
+ ```yaml
342
+ xunjiang:
343
+ model: xtts
344
+ speaker: voices/xunjiang.wav
345
+ language: zh-cn
346
+ ```
347
+
348
+ 3) Don't remove high unicode characters in your `config/pre_process_map.yaml`! If you have these lines, you will need to remove them. For example:
349
+
350
+ Remove:
351
+ ```yaml
352
+ - - '[\U0001F600-\U0001F64F\U0001F300-\U0001F5FF\U0001F680-\U0001F6FF\U0001F700-\U0001F77F\U0001F780-\U0001F7FF\U0001F800-\U0001F8FF\U0001F900-\U0001F9FF\U0001FA00-\U0001FA6F\U0001FA70-\U0001FAFF\U00002702-\U000027B0\U000024C2-\U0001F251]+'
353
+ - ''
354
+ ```
355
+
356
+ These lines were added to the `config/pre_process_map.yaml` config file by default before version 0.11.0:
357
+
358
+ 4) Your new multi-lingual speaker voice is ready to use!
359
+
360
+
361
+ ## Custom Fine-Tuned Model Support
362
+
363
+ Adding a custom xtts model is simple. Here is an example of how to add a custom fine-tuned 'halo' XTTS model.
364
+
365
+ 1) Save the model folder under `voices/` (all 4 files are required, including the vocab.json from the model)
366
+ ```
367
+ openedai-speech$ ls voices/halo/
368
+ config.json vocab.json model.pth sample.wav
369
+ ```
370
+ 2) Add the custom voice entry under the `tts-1-hd` section of `config/voice_to_speaker.yaml`:
371
+ ```yaml
372
+ tts-1-hd:
373
+ ...
374
+ halo:
375
+ model: halo # This name is required to be unique
376
+ speaker: voices/halo/sample.wav # voice sample is required
377
+ model_path: voices/halo
378
+ ```
379
+ 3) The model will be loaded when you access the voice for the first time (`--preload` doesn't work with custom models yet)
380
+
381
+ ## Generation Parameters
382
+
383
+ The generation of XTTSv2 voices can be fine tuned with the following options (defaults included below):
384
+
385
+ ```yaml
386
+ tts-1-hd:
387
+ alloy:
388
+ model: xtts
389
+ speaker: voices/alloy.wav
390
+ enable_text_splitting: True
391
+ length_penalty: 1.0
392
+ repetition_penalty: 10
393
+ speed: 1.0
394
+ temperature: 0.75
395
+ top_k: 50
396
+ top_p: 0.85
397
+ ```
add_voice.py ADDED
@@ -0,0 +1,63 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ import argparse
4
+ import os
5
+ import shutil
6
+ import yaml
7
+
8
+ print("!! WARNING EXPERIMENTAL !! - THIS TOOL WILL ERASE ALL COMMENTS FROM THE CONFIG FILES .. OR WORSE!!")
9
+
10
+ parser = argparse.ArgumentParser(formatter_class=argparse.ArgumentDefaultsHelpFormatter)
11
+
12
+ parser.add_argument('sample', action='store', help="Set the wav sample file")
13
+ parser.add_argument('-n', '--name', action='store', help="Set the name for the voice (by default will use the WAV file name)")
14
+ parser.add_argument('-l', '--language', action='store', default="auto", help="Set the language for the voice",
15
+ choices=['auto', 'en', 'es', 'fr', 'de', 'it', 'pt', 'pl', 'tr', 'ru', 'nl', 'cs', 'ar', 'zh-cn', 'ja', 'hu', 'ko', 'hi'])
16
+ parser.add_argument('--openai-model', action='store', default="tts-1-hd", help="Set the openai model for the voice")
17
+ parser.add_argument('--xtts-model', action='store', default="xtts", help="Set the xtts model for the voice (if using a custom model, also set model_path)")
18
+ parser.add_argument('--model-path', action='store', default=None, help="Set the path for a custom xtts model")
19
+ parser.add_argument('--config-path', action='store', default="config/voice_to_speaker.yaml", help="Set the config file path")
20
+ parser.add_argument('--voice-path', action='store', default="voices", help="Set the default voices file path")
21
+ parser.add_argument('--default-path', action='store', default="voice_to_speaker.default.yaml", help="Set the default config file path")
22
+
23
+ args = parser.parse_args()
24
+
25
+ basename = os.path.basename(args.sample)
26
+ name_noext, ext = os.path.splitext(basename)
27
+
28
+ if not args.name:
29
+ args.name = name_noext
30
+ else:
31
+ basename = f"{args.name}.wav"
32
+
33
+ dest_file = os.path.join(args.voice_path, basename)
34
+ if args.sample != dest_file:
35
+ shutil.copy2(args.sample, dest_file)
36
+
37
+ if not os.path.exists(args.config_path):
38
+ shutil.copy2(args.default_path, args.config_path)
39
+
40
+ with open(args.config_path, 'r', encoding='utf8') as file:
41
+ voice_map = yaml.safe_load(file)
42
+
43
+ model_conf = voice_map.get(args.openai_model, {})
44
+ model_conf[args.name] = {
45
+ 'model': args.xtts_model,
46
+ 'speaker': os.path.join(args.voice_path, basename),
47
+ 'language': args.language,
48
+ }
49
+ if args.model_path:
50
+ model_conf[args.name]['model_path'] = args.model_path
51
+ voice_map[args.openai_model] = model_conf
52
+
53
+ with open(args.config_path, 'w', encoding='utf8') as ofile:
54
+ yaml.safe_dump(voice_map, ofile, default_flow_style=False, allow_unicode=True)
55
+
56
+ print(f"Updated: {args.config_path}")
57
+ print(f"Added voice: {args.openai_model}/{args.name}")
58
+ print(f"Added section:")
59
+ print(f"{args.openai_model}:")
60
+ print(f" {args.name}:")
61
+ print(f" model: {model_conf[args.name]['model']}")
62
+ print(f" speaker: {model_conf[args.name]['speaker']}")
63
+ print(f" language: {model_conf[args.name]['language']}")
audio_reader.py ADDED
@@ -0,0 +1,127 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ try:
3
+ import dotenv
4
+ dotenv.load_dotenv()
5
+ except ImportError:
6
+ pass
7
+
8
+ import argparse
9
+ import os
10
+ import pysbd
11
+ import queue
12
+ import sys
13
+ import tempfile
14
+ import threading
15
+ import shutil
16
+ import sys
17
+ import tempfile
18
+ import contextlib
19
+
20
+ import openai
21
+
22
+ try:
23
+ from playsound import playsound
24
+ except ImportError:
25
+ print("Error: missing required package 'playsound'. !pip install playsound")
26
+ sys.exit(1)
27
+
28
+ @contextlib.contextmanager
29
+ def tempdir():
30
+ path = tempfile.mkdtemp()
31
+ try:
32
+ yield path
33
+ finally:
34
+ try:
35
+ shutil.rmtree(path)
36
+ except IOError:
37
+ sys.stderr.write('Failed to clean up temp dir {}'.format(path))
38
+
39
+ class SimpleAudioPlayer:
40
+ def __init__(self):
41
+ self._queue = queue.Queue()
42
+ self.running = True
43
+ self._thread = threading.Thread(target=self.__play_audio_loop, daemon=True)
44
+ self._thread.start()
45
+
46
+ def put(self, file):
47
+ self._queue.put(file)
48
+
49
+ def stop(self):
50
+ self.running = False
51
+ self._thread.join()
52
+ try:
53
+ while True:
54
+ file = self._queue.get_nowait()
55
+ if os.path.exists(file):
56
+ os.unlink(file)
57
+ except queue.Empty as e:
58
+ pass
59
+
60
+ def __play_audio_loop(self):
61
+ while self.running:
62
+ try:
63
+ while True:
64
+ file = self._queue.get(block=True, timeout=0.01)
65
+
66
+ try:
67
+ playsound(file)
68
+ finally:
69
+ os.unlink(file)
70
+
71
+ except queue.Empty as e:
72
+ continue
73
+
74
+ class OpenAI_tts:
75
+ def __init__(self, model, voice, speed, base_dir):
76
+ self.base_dir = base_dir
77
+ self.openai_client = openai.OpenAI(
78
+ # export OPENAI_API_KEY=sk-11111111111
79
+ # export OPENAI_BASE_URL=http://localhost:8000/v1
80
+ api_key = os.environ.get("OPENAI_API_KEY", "sk-ip"),
81
+ base_url = os.environ.get("OPENAI_BASE_URL", "http://localhost:8000/v1"),
82
+ )
83
+
84
+ self.params = {
85
+ 'model': model,
86
+ 'voice': voice,
87
+ 'speed': speed
88
+ }
89
+
90
+ def speech_to_file(self, text: str) -> None:
91
+ with self.openai_client.audio.speech.with_streaming_response.create(
92
+ input=text, response_format='opus', **self.params
93
+ ) as response:
94
+ tf, output_filename = tempfile.mkstemp(suffix='.wav', prefix="audio_reader_", dir=self.base_dir)
95
+ response.stream_to_file(output_filename)
96
+ return output_filename
97
+
98
+
99
+ if __name__ == "__main__":
100
+ parser = argparse.ArgumentParser(
101
+ description='Text to speech player',
102
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter)
103
+
104
+ parser.add_argument('-m', '--model', action='store', default="tts-1", help="The OpenAI model")
105
+ parser.add_argument('-v', '--voice', action='store', default="alloy", help="The voice to use")
106
+ parser.add_argument('-s', '--speed', action='store', default=1.0, help="How fast to read the audio")
107
+
108
+ args = parser.parse_args()
109
+
110
+ try:
111
+ with tempdir() as base_dir:
112
+ player = SimpleAudioPlayer()
113
+ reader = OpenAI_tts(voice=args.voice, model=args.model, speed=args.speed, base_dir=base_dir)
114
+ seg = pysbd.Segmenter(language='en', clean=True) # text is dirty, clean it up.
115
+
116
+ for raw_line in sys.stdin:
117
+ for line in seg.segment(raw_line):
118
+ if not line:
119
+ continue
120
+
121
+ print(line)
122
+ player.put(reader.speech_to_file(line))
123
+
124
+ player.stop()
125
+
126
+ except KeyboardInterrupt:
127
+ pass
config/config_files_will_go_here.txt ADDED
File without changes
docker-compose.min.yml ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ services:
2
+ server:
3
+ build:
4
+ dockerfile: Dockerfile.min # piper for all models, no gpu/nvidia required, ~1GB
5
+ image: ghcr.io/matatonic/openedai-speech-min
6
+ env_file: speech.env
7
+ ports:
8
+ - "8000:8000"
9
+ volumes:
10
+ - ./voices:/app/voices
11
+ - ./config:/app/config
12
+ # To install as a service
13
+ restart: unless-stopped
docker-compose.rocm.yml ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ services:
2
+ server:
3
+ build:
4
+ dockerfile: Dockerfile
5
+ args:
6
+ - USE_ROCM=1
7
+ image: ghcr.io/matatonic/openedai-speech-rocm
8
+ env_file: speech.env
9
+ ports:
10
+ - "8000:8000"
11
+ volumes:
12
+ - ./voices:/app/voices
13
+ - ./config:/app/config
14
+ # To install as a service
15
+ restart: unless-stopped
16
+ # For AMD GPU (ROCm) Support
17
+ cap_add:
18
+ - SYS_PTRACE
19
+ devices:
20
+ - /dev/kfd
21
+ - /dev/dri
22
+ security_opt:
23
+ - seccomp=unconfined
24
+ group_add:
25
+ - video
26
+ - audio
27
+ ipc: host
docker-compose.yml ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ services:
2
+ server:
3
+ build:
4
+ dockerfile: Dockerfile
5
+ image: ghcr.io/matatonic/openedai-speech
6
+ env_file: speech.env
7
+ ports:
8
+ - "8000:8000"
9
+ volumes:
10
+ - ./voices:/app/voices
11
+ - ./config:/app/config
12
+ # To install as a service
13
+ restart: unless-stopped
14
+ deploy:
15
+ resources:
16
+ reservations:
17
+ devices:
18
+ - driver: nvidia
19
+ #device_ids: ['0', '1'] # Select a gpu, or
20
+ count: all
21
+ capabilities: [gpu]
download_samples.bat ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ @echo off
2
+ for %%i in (alloy echo fable onyx nova shimmer) do (
3
+ if not exist "voices\%%i.wav" (
4
+ curl -s https://cdn.openai.com/API/docs/audio/%%i.wav | ffmpeg -loglevel error -i - -ar 22050 -ac 1 voices\%%i.wav
5
+ )
6
+ )
download_samples.sh ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ #!/bin/sh
2
+ for i in alloy echo fable onyx nova shimmer; do
3
+ [ ! -e "voices/$i.wav" ] && curl -s https://cdn.openai.com/API/docs/audio/$i.wav | ffmpeg -loglevel error -i - -ar 22050 -ac 1 voices/$i.wav
4
+ done
download_voices_tts-1-hd.bat ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ @echo off
2
+ set COQUI_TOS_AGREED=1
3
+ set TTS_HOME=voices
4
+
5
+ for %%i in (%*) do (
6
+ python -c "from TTS.utils.manage import ModelManager; ModelManager().download_model('%%i')"
7
+ )
8
+ call download_samples.bat
download_voices_tts-1-hd.sh ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ #!/bin/sh
2
+ export COQUI_TOS_AGREED=1
3
+ export TTS_HOME=voices
4
+
5
+ for model in $*; do
6
+ python -c "from TTS.utils.manage import ModelManager; ModelManager().download_model('$model')"
7
+ done
8
+ ./download_samples.sh
download_voices_tts-1.bat ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ @echo off
2
+ set models=%*
3
+ if "%models%" == "" set models=en_GB-northern_english_male-medium en_US-libritts_r-medium
4
+
5
+ piper --update-voices --data-dir voices --download-dir voices --model x 2> nul
6
+ for %%i in (%models%) do (
7
+ if not exist "voices\%%i.onnx" piper --data-dir voices --download-dir voices --model %%i > nul
8
+ )
download_voices_tts-1.sh ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ #!/bin/sh
2
+ models=${*:-"en_GB-northern_english_male-medium en_US-libritts_r-medium"} # en_US-ryan-high
3
+ piper --update-voices --data-dir voices --download-dir voices --model x 2> /dev/null
4
+ for i in $models ; do
5
+ [ ! -e "voices/$i.onnx" ] && piper --data-dir voices --download-dir voices --model $i < /dev/null > /dev/null
6
+ done
openedai.py ADDED
@@ -0,0 +1,181 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import FastAPI, Request
2
+ from fastapi.middleware.cors import CORSMiddleware
3
+ from fastapi.responses import PlainTextResponse, JSONResponse
4
+ from loguru import logger
5
+
6
+ class OpenAIError(Exception):
7
+ pass
8
+
9
+ class APIError(OpenAIError):
10
+ message: str
11
+ code: str = None
12
+ param: str = None
13
+ type: str = None
14
+
15
+ def __init__(self, message: str, code: int = 500, param: str = None, internal_message: str = ''):
16
+ super().__init__(message)
17
+ self.message = message
18
+ self.code = code
19
+ self.param = param
20
+ self.type = self.__class__.__name__,
21
+ self.internal_message = internal_message
22
+
23
+ def __repr__(self):
24
+ return "%s(message=%r, code=%d, param=%s)" % (
25
+ self.__class__.__name__,
26
+ self.message,
27
+ self.code,
28
+ self.param,
29
+ )
30
+
31
+ class InternalServerError(APIError):
32
+ pass
33
+
34
+ class ServiceUnavailableError(APIError):
35
+ def __init__(self, message="Service unavailable, please try again later.", code=503, internal_message=''):
36
+ super().__init__(message, code, internal_message)
37
+
38
+ class APIStatusError(APIError):
39
+ status_code: int = 400
40
+
41
+ def __init__(self, message: str, param: str = None, internal_message: str = ''):
42
+ super().__init__(message, self.status_code, param, internal_message)
43
+
44
+ class BadRequestError(APIStatusError):
45
+ status_code: int = 400
46
+
47
+ class AuthenticationError(APIStatusError):
48
+ status_code: int = 401
49
+
50
+ class PermissionDeniedError(APIStatusError):
51
+ status_code: int = 403
52
+
53
+ class NotFoundError(APIStatusError):
54
+ status_code: int = 404
55
+
56
+ class ConflictError(APIStatusError):
57
+ status_code: int = 409
58
+
59
+ class UnprocessableEntityError(APIStatusError):
60
+ status_code: int = 422
61
+
62
+ class RateLimitError(APIStatusError):
63
+ status_code: int = 429
64
+
65
+ class OpenAIStub(FastAPI):
66
+ def __init__(self, **kwargs) -> None:
67
+ super().__init__(**kwargs)
68
+ self.models = {}
69
+
70
+ self.add_middleware(
71
+ CORSMiddleware,
72
+ allow_origins=["*"],
73
+ allow_credentials=True,
74
+ allow_methods=["*"],
75
+ allow_headers=["*"]
76
+ )
77
+
78
+ @self.exception_handler(Exception)
79
+ def openai_exception_handler(request: Request, exc: Exception) -> JSONResponse:
80
+ # Generic server errors
81
+ #logger.opt(exception=exc).error("Logging exception traceback")
82
+
83
+ return JSONResponse(status_code=500, content={
84
+ 'message': 'InternalServerError',
85
+ 'code': 500,
86
+ })
87
+
88
+ @self.exception_handler(APIError)
89
+ def openai_apierror_handler(request: Request, exc: APIError) -> JSONResponse:
90
+ # Server error
91
+ logger.opt(exception=exc).error("Logging exception traceback")
92
+
93
+ if exc.internal_message:
94
+ logger.info(exc.internal_message)
95
+
96
+ return JSONResponse(status_code = exc.code, content={
97
+ 'message': exc.message,
98
+ 'code': exc.code,
99
+ 'type': exc.__class__.__name__,
100
+ 'param': exc.param,
101
+ })
102
+
103
+ @self.exception_handler(APIStatusError)
104
+ def openai_statuserror_handler(request: Request, exc: APIStatusError) -> JSONResponse:
105
+ # Client side error
106
+ logger.info(repr(exc))
107
+
108
+ if exc.internal_message:
109
+ logger.info(exc.internal_message)
110
+
111
+ return JSONResponse(status_code = exc.code, content={
112
+ 'message': exc.message,
113
+ 'code': exc.code,
114
+ 'type': exc.__class__.__name__,
115
+ 'param': exc.param,
116
+ })
117
+
118
+ @self.middleware("http")
119
+ async def log_requests(request: Request, call_next):
120
+ logger.debug(f"Request path: {request.url.path}")
121
+ logger.debug(f"Request method: {request.method}")
122
+ logger.debug(f"Request headers: {request.headers}")
123
+ logger.debug(f"Request query params: {request.query_params}")
124
+ logger.debug(f"Request body: {await request.body()}")
125
+
126
+ response = await call_next(request)
127
+
128
+ logger.debug(f"Response status code: {response.status_code}")
129
+ logger.debug(f"Response headers: {response.headers}")
130
+
131
+ return response
132
+
133
+ @self.get('/v1/billing/usage')
134
+ @self.get('/v1/dashboard/billing/usage')
135
+ async def handle_billing_usage():
136
+ return { 'total_usage': 0 }
137
+
138
+ @self.get("/", response_class=PlainTextResponse)
139
+ @self.head("/", response_class=PlainTextResponse)
140
+ @self.options("/", response_class=PlainTextResponse)
141
+ async def root():
142
+ return PlainTextResponse(content="", status_code=200 if self.models else 503)
143
+
144
+ @self.get("/health")
145
+ async def health():
146
+ return {"status": "ok" if self.models else "unk" }
147
+
148
+ @self.get("/v1/models")
149
+ async def get_model_list():
150
+ return self.model_list()
151
+
152
+ @self.get("/v1/models/{model}")
153
+ async def get_model_info(model_id: str):
154
+ return self.model_info(model_id)
155
+
156
+ def register_model(self, name: str, model: str = None) -> None:
157
+ self.models[name] = model if model else name
158
+
159
+ def deregister_model(self, name: str) -> None:
160
+ if name in self.models:
161
+ del self.models[name]
162
+
163
+ def model_info(self, model: str) -> dict:
164
+ result = {
165
+ "id": model,
166
+ "object": "model",
167
+ "created": 0,
168
+ "owned_by": "user"
169
+ }
170
+ return result
171
+
172
+ def model_list(self) -> dict:
173
+ if not self.models:
174
+ return {}
175
+
176
+ result = {
177
+ "object": "list",
178
+ "data": [ self.model_info(model) for model in list(set(self.models.keys() | self.models.values())) if model ]
179
+ }
180
+
181
+ return result
pre_process_map.default.yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # regex pairs to clean the text before speaking
2
+ - - ([^.])\.$
3
+ - \1
4
+ - - '&amp;'
5
+ - '&'
6
+ - - '&lt;'
7
+ - <
8
+ - - '&gt;'
9
+ - '>'
10
+ - - '&quot;'
11
+ - '"'
12
+ - - '&#x27;'
13
+ - ''''
14
+ - - '&copy;'
15
+ - '©'
16
+ - - '&reg;'
17
+ - '®'
18
+ - - '&nbsp;'
19
+ - ' '
20
+ - - '"'
21
+ - ''
22
+ - - ' biases '
23
+ - ' bias''s '
24
+ - - ex\.
25
+ - for example
26
+ - - e\.g\.
27
+ - for example
28
+ - - ' ESG '
29
+ - ' E.S.G. '
30
+ - - ' FY '
31
+ - ' F.Y. '
32
+ - - ([0-9]+)-([0-9]+)
33
+ - \1 to \2
34
+ - - '\*\*\*'
35
+ - '*'
36
+ - - '\*\*'
37
+ - '*'
requirements-min.txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ loguru
4
+ numpy<2
5
+ piper-tts
requirements-rocm.txt ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ loguru
4
+ piper-tts
5
+ coqui-tts
6
+ langdetect
7
+ # Creating an environment where deepspeed works is complex, for now it will be disabled by default.
8
+ #deepspeed
9
+ torch; --index-url https://download.pytorch.org/whl/rocm5.7; sys_platform == "linux"
10
+ torchaudio; --index-url https://download.pytorch.org/whl/rocm5.7; sys_platform == "linux"
requirements.txt ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ fastapi
2
+ uvicorn
3
+ loguru
4
+ piper-tts
5
+ coqui-tts[languages]
6
+ langdetect
7
+ # Creating an environment where deepspeed works is complex, for now it will be disabled by default.
8
+ #deepspeed
9
+
10
+ torch; sys_platform != "darwin"
11
+ torchaudio; sys_platform != "darwin"
12
+ # for MPS accelerated torch on Mac - doesn't work yet, incomplete support in torch and torchaudio
13
+ torch; --index-url https://download.pytorch.org/whl/cpu; sys_platform == "darwin"
14
+ torchaudio; --index-url https://download.pytorch.org/whl/cpu; sys_platform == "darwin"
15
+
16
+ # ROCM (Linux only) - use requirements.amd.txt
sample.env ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ TTS_HOME=voices
2
+ HF_HOME=voices
3
+ #PRELOAD_MODEL=xtts
4
+ #PRELOAD_MODEL=xtts_v2.0.2
5
+ #EXTRA_ARGS=--log-level DEBUG --unload-timer 300
6
+ #USE_ROCM=1
say.py ADDED
@@ -0,0 +1,96 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python
2
+
3
+ import sys
4
+ import os
5
+ import atexit
6
+ import tempfile
7
+ import argparse
8
+
9
+ try:
10
+ import dotenv
11
+ dotenv.load_dotenv(override=True)
12
+ except ImportError:
13
+ pass
14
+
15
+ try:
16
+ from playsound import playsound
17
+ except ImportError:
18
+ playsound = None
19
+
20
+ import openai
21
+
22
+
23
+ def parse_args(argv):
24
+ parser = argparse.ArgumentParser(
25
+ description='Text to speech using the OpenAI API',
26
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter,
27
+ )
28
+ parser.add_argument("-m", "--model", type=str, default="tts-1", help="The model to use")#, choices=["tts-1", "tts-1-hd"])
29
+ parser.add_argument("-v", "--voice", type=str, default="alloy", help="The voice of the speaker")#, choices=["alloy", "echo", "fable", "onyx", "nova", "shimmer"])
30
+ parser.add_argument("-f", "--format", type=str, default="mp3", choices=["mp3", "aac", "opus", "flac"], help="The output audio format")
31
+ parser.add_argument("-s", "--speed", type=float, default=1.0, help="playback speed, 0.25-4.0")
32
+ parser.add_argument("-t", "--text", type=str, default=None, help="Provide text to read on the command line")
33
+ parser.add_argument("-i", "--input", type=str, default=None, help="Read text from a file (default is to read from stdin)")
34
+
35
+ if playsound is None:
36
+ parser.add_argument("-o", "--output", type=str, help="The filename to save the output to") # required
37
+ parser.add_argument("-p", "--playsound", type=None, default=None, help="python playsound not found. pip install playsound")
38
+ else:
39
+ parser.add_argument("-o", "--output", type=str, default=None, help="The filename to save the output to") # not required
40
+ parser.add_argument("-p", "--playsound", action="store_true", help="Play the audio")
41
+
42
+ args = parser.parse_args(argv)
43
+
44
+ return args
45
+
46
+
47
+ if __name__ == "__main__":
48
+ args = parse_args(sys.argv[1:])
49
+
50
+ if args.playsound and playsound is None:
51
+ print("playsound module not found, audio will not be played, use -o <filename> to save output to a file. pip install playsound")
52
+ sys.exit(1)
53
+
54
+ if not args.playsound and not args.output:
55
+ print("Must select one of playsound (-p) or output file name (-o)")
56
+ sys.exit(1)
57
+
58
+ if args.input is None and args.text is None:
59
+ text = sys.stdin.read()
60
+ elif args.text:
61
+ text = args.text
62
+ elif args.input:
63
+ if os.path.exists(args.input):
64
+ with open(args.input, 'r') as f:
65
+ text = f.read()
66
+ else:
67
+ print(f"Warning! File not found: {args.input}\nFalling back to old behavior for -i")
68
+ text = args.input
69
+
70
+ client = openai.OpenAI(
71
+ # This part is not needed if you set these environment variables before import openai
72
+ # export OPENAI_API_KEY=sk-11111111111
73
+ # export OPENAI_BASE_URL=http://localhost:8000/v1
74
+ api_key = os.environ.get("OPENAI_API_KEY", "sk-ip"),
75
+ base_url = os.environ.get("OPENAI_BASE_URL", "http://localhost:8000/v1"),
76
+ )
77
+
78
+ if args.playsound and args.output is None:
79
+ _, args.output = tempfile.mkstemp(suffix='.wav')
80
+
81
+ def cleanup():
82
+ os.unlink(args.output)
83
+
84
+ atexit.register(cleanup)
85
+
86
+ with client.audio.speech.with_streaming_response.create(
87
+ model=args.model,
88
+ voice=args.voice,
89
+ speed=args.speed,
90
+ response_format=args.format,
91
+ input=text,
92
+ ) as response:
93
+ response.stream_to_file(args.output)
94
+
95
+ if args.playsound:
96
+ playsound(args.output)
speech.py ADDED
@@ -0,0 +1,415 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import argparse
3
+ import contextlib
4
+ import gc
5
+ import os
6
+ import queue
7
+ import re
8
+ import subprocess
9
+ import sys
10
+ import threading
11
+ import time
12
+ import yaml
13
+
14
+ from fastapi.responses import StreamingResponse
15
+ from loguru import logger
16
+ from openedai import OpenAIStub, BadRequestError, ServiceUnavailableError
17
+ from pydantic import BaseModel
18
+ import uvicorn
19
+
20
+ @contextlib.asynccontextmanager
21
+ async def lifespan(app):
22
+ yield
23
+ gc.collect()
24
+ try:
25
+ import torch
26
+ if torch.cuda.is_available():
27
+ torch.cuda.empty_cache()
28
+ torch.cuda.ipc_collect()
29
+ except:
30
+ pass
31
+
32
+ app = OpenAIStub(lifespan=lifespan)
33
+ xtts = None
34
+ args = None
35
+
36
+ def unload_model():
37
+ import torch, gc
38
+ global xtts
39
+ if xtts:
40
+ logger.info("Unloading model")
41
+ xtts.xtts.to('cpu') # this was required to free up GPU memory...
42
+ del xtts
43
+ xtts = None
44
+ gc.collect()
45
+ torch.cuda.empty_cache()
46
+ torch.cuda.ipc_collect()
47
+
48
+ class xtts_wrapper():
49
+ check_interval: int = 1 # too aggressive?
50
+
51
+ def __init__(self, model_name, device, model_path=None, unload_timer=None):
52
+ self.model_name = model_name
53
+ self.unload_timer = unload_timer
54
+ self.last_used = time.time()
55
+ self.timer = None
56
+ self.lock = threading.Lock()
57
+
58
+ logger.info(f"Loading model {self.model_name} to {device}")
59
+
60
+ if model_path is None:
61
+ model_path = ModelManager().download_model(model_name)[0]
62
+
63
+ config_path = os.path.join(model_path, 'config.json')
64
+ config = XttsConfig()
65
+ config.load_json(config_path)
66
+ self.xtts = Xtts.init_from_config(config)
67
+ self.xtts.load_checkpoint(config, checkpoint_dir=model_path, use_deepspeed=args.use_deepspeed) # XXX there are no prebuilt deepspeed wheels??
68
+ self.xtts = self.xtts.to(device=device)
69
+ self.xtts.eval()
70
+
71
+ if self.unload_timer:
72
+ logger.info(f"Setting unload timer to {self.unload_timer} seconds")
73
+ self.last_used = time.time()
74
+ self.check_idle()
75
+
76
+ def check_idle(self):
77
+ with self.lock:
78
+ if time.time() - self.last_used >= self.unload_timer:
79
+ print("Unloading TTS model due to inactivity")
80
+ unload_model()
81
+ else:
82
+ # Reschedule the check
83
+ self.timer = threading.Timer(self.check_interval, self.check_idle)
84
+ self.timer.daemon = True
85
+ self.timer.start()
86
+
87
+ def tts(self, text, language, speaker_wav, **hf_generate_kwargs):
88
+ with torch.no_grad():
89
+ self.last_used = time.time()
90
+ tokens = 0
91
+ try:
92
+ with self.lock:
93
+ gpt_cond_latent, speaker_embedding = self.xtts.get_conditioning_latents(audio_path=[speaker_wav]) # not worth caching calls, it's < 0.001s after model is loaded
94
+ pcm_stream = self.xtts.inference_stream(text, language, gpt_cond_latent, speaker_embedding, **hf_generate_kwargs)
95
+ self.last_used = time.time()
96
+
97
+ while True:
98
+ with self.lock:
99
+ yield next(pcm_stream).cpu().numpy().tobytes()
100
+ self.last_used = time.time()
101
+ tokens += 1
102
+
103
+ except StopIteration:
104
+ pass
105
+
106
+ finally:
107
+ logger.debug(f"Generated {tokens} tokens in {time.time() - self.last_used:.2f}s @ {tokens / (time.time() - self.last_used):.2f} T/s")
108
+ self.last_used = time.time()
109
+
110
+ def default_exists(filename: str):
111
+ if not os.path.exists(filename):
112
+ fpath, ext = os.path.splitext(filename)
113
+ basename = os.path.basename(fpath)
114
+ default = f"{basename}.default{ext}"
115
+
116
+ logger.info(f"{filename} does not exist, setting defaults from {default}")
117
+
118
+ with open(default, 'r', encoding='utf8') as from_file:
119
+ with open(filename, 'w', encoding='utf8') as to_file:
120
+ to_file.write(from_file.read())
121
+
122
+ # Read pre process map on demand so it can be changed without restarting the server
123
+ def preprocess(raw_input):
124
+ #logger.debug(f"preprocess: before: {[raw_input]}")
125
+ default_exists('config/pre_process_map.yaml')
126
+ with open('config/pre_process_map.yaml', 'r', encoding='utf8') as file:
127
+ pre_process_map = yaml.safe_load(file)
128
+ for a, b in pre_process_map:
129
+ raw_input = re.sub(a, b, raw_input)
130
+
131
+ raw_input = raw_input.strip()
132
+ #logger.debug(f"preprocess: after: {[raw_input]}")
133
+ return raw_input
134
+
135
+ # Read voice map on demand so it can be changed without restarting the server
136
+ def map_voice_to_speaker(voice: str, model: str):
137
+ default_exists('config/voice_to_speaker.yaml')
138
+ with open('config/voice_to_speaker.yaml', 'r', encoding='utf8') as file:
139
+ voice_map = yaml.safe_load(file)
140
+ try:
141
+ return voice_map[model][voice]
142
+
143
+ except KeyError as e:
144
+ raise BadRequestError(f"Error loading voice: {voice}, KeyError: {e}", param='voice')
145
+
146
+ class GenerateSpeechRequest(BaseModel):
147
+ model: str = "tts-1" # or "tts-1-hd"
148
+ input: str
149
+ voice: str = "alloy" # alloy, echo, fable, onyx, nova, and shimmer
150
+ response_format: str = "mp3" # mp3, opus, aac, flac
151
+ speed: float = 1.0 # 0.25 - 4.0
152
+
153
+ def build_ffmpeg_args(response_format, input_format, sample_rate):
154
+ # Convert the output to the desired format using ffmpeg
155
+ if input_format == 'WAV':
156
+ ffmpeg_args = ["ffmpeg", "-loglevel", "error", "-f", "WAV", "-i", "-"]
157
+ else:
158
+ ffmpeg_args = ["ffmpeg", "-loglevel", "error", "-f", input_format, "-ar", sample_rate, "-ac", "1", "-i", "-"]
159
+
160
+ if response_format == "mp3":
161
+ ffmpeg_args.extend(["-f", "mp3", "-c:a", "libmp3lame", "-ab", "64k"])
162
+ elif response_format == "opus":
163
+ ffmpeg_args.extend(["-f", "ogg", "-c:a", "libopus"])
164
+ elif response_format == "aac":
165
+ ffmpeg_args.extend(["-f", "adts", "-c:a", "aac", "-ab", "64k"])
166
+ elif response_format == "flac":
167
+ ffmpeg_args.extend(["-f", "flac", "-c:a", "flac"])
168
+ elif response_format == "wav":
169
+ ffmpeg_args.extend(["-f", "wav", "-c:a", "pcm_s16le"])
170
+ elif response_format == "pcm": # even though pcm is technically 'raw', we still use ffmpeg to adjust the speed
171
+ ffmpeg_args.extend(["-f", "s16le", "-c:a", "pcm_s16le"])
172
+
173
+ return ffmpeg_args
174
+
175
+ @app.post("/v1/audio/speech", response_class=StreamingResponse)
176
+ async def generate_speech(request: GenerateSpeechRequest):
177
+ global xtts, args
178
+ if len(request.input) < 1:
179
+ raise BadRequestError("Empty Input", param='input')
180
+
181
+ input_text = preprocess(request.input)
182
+
183
+ if len(input_text) < 1:
184
+ raise BadRequestError("Input text empty after preprocess.", param='input')
185
+
186
+ model = request.model
187
+ voice = request.voice
188
+ response_format = request.response_format.lower()
189
+ speed = request.speed
190
+
191
+ # Set the Content-Type header based on the requested format
192
+ if response_format == "mp3":
193
+ media_type = "audio/mpeg"
194
+ elif response_format == "opus":
195
+ media_type = "audio/ogg;codec=opus" # codecs?
196
+ elif response_format == "aac":
197
+ media_type = "audio/aac"
198
+ elif response_format == "flac":
199
+ media_type = "audio/x-flac"
200
+ elif response_format == "wav":
201
+ media_type = "audio/wav"
202
+ elif response_format == "pcm":
203
+ if model == 'tts-1': # piper
204
+ media_type = "audio/pcm;rate=22050"
205
+ elif model == 'tts-1-hd': # xtts
206
+ media_type = "audio/pcm;rate=24000"
207
+ else:
208
+ raise BadRequestError(f"Invalid response_format: '{response_format}'", param='response_format')
209
+
210
+ ffmpeg_args = None
211
+
212
+ # Use piper for tts-1, and if xtts_device == none use for all models.
213
+ if model == 'tts-1' or args.xtts_device == 'none':
214
+ voice_map = map_voice_to_speaker(voice, 'tts-1')
215
+ try:
216
+ piper_model = voice_map['model']
217
+
218
+ except KeyError as e:
219
+ raise ServiceUnavailableError(f"Configuration error: tts-1 voice '{voice}' is missing 'model:' setting. KeyError: {e}")
220
+
221
+ speaker = voice_map.get('speaker', None)
222
+
223
+ tts_args = ["piper", "--model", str(piper_model), "--data-dir", "voices", "--download-dir", "voices", "--output-raw"]
224
+ if speaker:
225
+ tts_args.extend(["--speaker", str(speaker)])
226
+ if speed != 1.0:
227
+ tts_args.extend(["--length-scale", f"{1.0/speed}"])
228
+
229
+ tts_proc = subprocess.Popen(tts_args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
230
+ tts_proc.stdin.write(bytearray(input_text.encode('utf-8')))
231
+ tts_proc.stdin.close()
232
+
233
+ ffmpeg_args = build_ffmpeg_args(response_format, input_format="s16le", sample_rate="22050")
234
+
235
+ # Pipe the output from piper/xtts to the input of ffmpeg
236
+ ffmpeg_args.extend(["-"])
237
+ ffmpeg_proc = subprocess.Popen(ffmpeg_args, stdin=tts_proc.stdout, stdout=subprocess.PIPE)
238
+
239
+ return StreamingResponse(content=ffmpeg_proc.stdout, media_type=media_type)
240
+ # Use xtts for tts-1-hd
241
+ elif model == 'tts-1-hd':
242
+ voice_map = map_voice_to_speaker(voice, 'tts-1-hd')
243
+ try:
244
+ tts_model = voice_map.pop('model')
245
+ speaker = voice_map.pop('speaker')
246
+
247
+ except KeyError as e:
248
+ raise ServiceUnavailableError(f"Configuration error: tts-1-hd voice '{voice}' is missing setting. KeyError: {e}")
249
+
250
+ if xtts and xtts.model_name != tts_model:
251
+ unload_model()
252
+
253
+ tts_model_path = voice_map.pop('model_path', None) # XXX changing this on the fly is ignored if you keep the same name
254
+
255
+ if xtts is None:
256
+ xtts = xtts_wrapper(tts_model, device=args.xtts_device, model_path=tts_model_path, unload_timer=args.unload_timer)
257
+
258
+ ffmpeg_args = build_ffmpeg_args(response_format, input_format="f32le", sample_rate="24000")
259
+
260
+ # tts speed doesn't seem to work well
261
+ speed = voice_map.pop('speed', speed)
262
+ if speed < 0.5:
263
+ speed = speed / 0.5
264
+ ffmpeg_args.extend(["-af", "atempo=0.5"])
265
+ if speed > 1.0:
266
+ ffmpeg_args.extend(["-af", f"atempo={speed}"])
267
+ speed = 1.0
268
+
269
+ # Pipe the output from piper/xtts to the input of ffmpeg
270
+ ffmpeg_args.extend(["-"])
271
+
272
+ language = voice_map.pop('language', 'auto')
273
+ if language == 'auto':
274
+ try:
275
+ language = detect(input_text)
276
+ if language not in [
277
+ 'en', 'es', 'fr', 'de', 'it', 'pt', 'pl', 'tr',
278
+ 'ru', 'nl', 'cs', 'ar', 'zh-cn', 'hu', 'ko', 'ja', 'hi'
279
+ ]:
280
+ logger.debug(f"Detected language {language} not supported, defaulting to en")
281
+ language = 'en'
282
+ else:
283
+ logger.debug(f"Detected language: {language}")
284
+ except:
285
+ language = 'en'
286
+ logger.debug(f"Failed to detect language, defaulting to en")
287
+
288
+ comment = voice_map.pop('comment', None) # ignored.
289
+
290
+ hf_generate_kwargs = dict(
291
+ speed=speed,
292
+ **voice_map,
293
+ )
294
+
295
+ hf_generate_kwargs['enable_text_splitting'] = hf_generate_kwargs.get('enable_text_splitting', True) # change the default to true
296
+
297
+ if hf_generate_kwargs['enable_text_splitting']:
298
+ if language == 'zh-cn':
299
+ split_lang = 'zh'
300
+ else:
301
+ split_lang = language
302
+ all_text = split_sentence(input_text, split_lang, xtts.xtts.tokenizer.char_limits[split_lang])
303
+ else:
304
+ all_text = [input_text]
305
+
306
+ ffmpeg_proc = subprocess.Popen(ffmpeg_args, stdin=subprocess.PIPE, stdout=subprocess.PIPE)
307
+
308
+ in_q = queue.Queue() # speech pcm
309
+ ex_q = queue.Queue() # exceptions
310
+
311
+ def exception_check(exq: queue.Queue):
312
+ try:
313
+ e = exq.get_nowait()
314
+ except queue.Empty:
315
+ return
316
+
317
+ raise e
318
+
319
+ def generator():
320
+ # text -> in_q
321
+ try:
322
+ for text in all_text:
323
+ for chunk in xtts.tts(text=text, language=language, speaker_wav=speaker, **hf_generate_kwargs):
324
+ exception_check(ex_q)
325
+ in_q.put(chunk)
326
+
327
+ except BrokenPipeError as e: # client disconnect lands here
328
+ logger.info("Client disconnected - 'Broken pipe'")
329
+
330
+ except Exception as e:
331
+ logger.error(f"Exception: {repr(e)}")
332
+ raise e
333
+
334
+ finally:
335
+ in_q.put(None) # sentinel
336
+
337
+ def out_writer():
338
+ # in_q -> ffmpeg
339
+ try:
340
+ while True:
341
+ chunk = in_q.get()
342
+ if chunk is None: # sentinel
343
+ break
344
+ ffmpeg_proc.stdin.write(chunk) # BrokenPipeError from here on client disconnect
345
+
346
+ except Exception as e: # BrokenPipeError
347
+ ex_q.put(e) # we need to get this exception into the generation loop
348
+ ffmpeg_proc.kill()
349
+ return
350
+
351
+ finally:
352
+ ffmpeg_proc.stdin.close()
353
+
354
+ generator_worker = threading.Thread(target=generator, daemon=True)
355
+ generator_worker.start()
356
+
357
+ out_writer_worker = threading.Thread(target=out_writer, daemon=True)
358
+ out_writer_worker.start()
359
+
360
+ def cleanup():
361
+ ffmpeg_proc.kill()
362
+ del generator_worker
363
+ del out_writer_worker
364
+
365
+ return StreamingResponse(content=ffmpeg_proc.stdout, media_type=media_type, background=cleanup)
366
+ else:
367
+ raise BadRequestError("No such model, must be tts-1 or tts-1-hd.", param='model')
368
+
369
+
370
+ # We return 'mps' but currently XTTS will not work with mps devices as the cuda support is incomplete
371
+ def auto_torch_device():
372
+ try:
373
+ import torch
374
+ return 'cuda' if torch.cuda.is_available() else 'mps' if ( torch.backends.mps.is_available() and torch.backends.mps.is_built() ) else 'cpu'
375
+
376
+ except:
377
+ return 'none'
378
+
379
+ if __name__ == "__main__":
380
+ parser = argparse.ArgumentParser(
381
+ description='OpenedAI Speech API Server',
382
+ formatter_class=argparse.ArgumentDefaultsHelpFormatter)
383
+
384
+ parser.add_argument('--xtts_device', action='store', default=auto_torch_device(), help="Set the device for the xtts model. The special value of 'none' will use piper for all models.")
385
+ parser.add_argument('--preload', action='store', default=None, help="Preload a model (Ex. 'xtts' or 'xtts_v2.0.2'). By default it's loaded on first use.")
386
+ parser.add_argument('--unload-timer', action='store', default=None, type=int, help="Idle unload timer for the XTTS model in seconds, Ex. 900 for 15 minutes")
387
+ parser.add_argument('--use-deepspeed', action='store_true', default=False, help="Use deepspeed with xtts (this option is unsupported)")
388
+ parser.add_argument('--no-cache-speaker', action='store_true', default=False, help="Don't use the speaker wav embeddings cache")
389
+ parser.add_argument('-P', '--port', action='store', default=8000, type=int, help="Server tcp port")
390
+ parser.add_argument('-H', '--host', action='store', default='0.0.0.0', help="Host to listen on, Ex. 0.0.0.0")
391
+ parser.add_argument('-L', '--log-level', default="INFO", choices=["DEBUG", "INFO", "WARNING", "ERROR", "CRITICAL"], help="Set the log level")
392
+
393
+ args = parser.parse_args()
394
+
395
+ default_exists('config/pre_process_map.yaml')
396
+ default_exists('config/voice_to_speaker.yaml')
397
+
398
+ logger.remove()
399
+ logger.add(sink=sys.stderr, level=args.log_level)
400
+
401
+ if args.xtts_device != "none":
402
+ import torch
403
+ from TTS.tts.configs.xtts_config import XttsConfig
404
+ from TTS.tts.models.xtts import Xtts
405
+ from TTS.utils.manage import ModelManager
406
+ from TTS.tts.layers.xtts.tokenizer import split_sentence
407
+ from langdetect import detect
408
+
409
+ if args.preload:
410
+ xtts = xtts_wrapper(args.preload, device=args.xtts_device, unload_timer=args.unload_timer)
411
+
412
+ app.register_model('tts-1')
413
+ app.register_model('tts-1-hd')
414
+
415
+ uvicorn.run(app, host=args.host, port=args.port)
startup.bat ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ @echo off
2
+
3
+ set /p < speech.env
4
+
5
+ call download_voices_tts-1.bat
6
+ call download_voices_tts-1-hd.bat %PRELOAD_MODEL%
7
+
8
+ python speech.py %PRELOAD_MODEL:+--preload %PRELOAD_MODEL% %EXTRA_ARGS%
startup.min.sh ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ [ -f speech.env ] && . speech.env
4
+
5
+ bash download_voices_tts-1.sh
6
+
7
+ python speech.py --xtts_device none $EXTRA_ARGS $@
startup.sh ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ [ -f speech.env ] && . speech.env
4
+
5
+ echo "First startup may download 2GB of speech models. Please wait."
6
+
7
+ bash download_voices_tts-1.sh
8
+ bash download_voices_tts-1-hd.sh $PRELOAD_MODEL
9
+
10
+ python speech.py ${PRELOAD_MODEL:+--preload $PRELOAD_MODEL} $EXTRA_ARGS $@
test_voices.sh ADDED
@@ -0,0 +1,67 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ URL=${1:-http://localhost:8000/v1/audio/speech}
4
+
5
+ curl -s $URL -H "Content-Type: application/json" -d "{
6
+ \"model\": \"tts-1\",
7
+ \"input\": \"I'm going to play you the original voice, followed by the piper voice and finally the X T T S version 2 voice\",
8
+ \"voice\": \"echo\",
9
+ \"speed\": 1.0
10
+ }" | mpv --really-quiet -
11
+
12
+ for voice in alloy echo fable onyx nova shimmer ; do
13
+
14
+ echo $voice
15
+
16
+ curl -s $URL -H "Content-Type: application/json" -d "{
17
+ \"model\": \"tts-1\",
18
+ \"input\": \"original\",
19
+ \"voice\": \"echo\",
20
+ \"speed\": 1.0
21
+ }" | mpv --really-quiet -
22
+
23
+ curl -s https://cdn.openai.com/API/docs/audio/$voice.wav | mpv --really-quiet -
24
+
25
+ curl -s $URL -H "Content-Type: application/json" -d "{
26
+ \"model\": \"tts-1\",
27
+ \"input\": \"The quick brown fox jumped over the lazy dog. This voice is called $voice, how do you like this voice?\",
28
+ \"voice\": \"$voice\",
29
+ \"speed\": 1.0
30
+ }" | mpv --really-quiet -
31
+
32
+ curl -s $URL -H "Content-Type: application/json" -d "{
33
+ \"model\": \"tts-1-hd\",
34
+ \"input\": \"The quick brown fox jumped over the lazy dog. This HD voice is called $voice, how do you like this voice?\",
35
+ \"voice\": \"$voice\",
36
+ \"speed\": 1.0
37
+ }" | mpv --really-quiet -
38
+
39
+ done
40
+
41
+ curl -s $URL -H "Content-Type: application/json" -d "{
42
+ \"model\": \"tts-1\",
43
+ \"input\": \"the slowest voice\",
44
+ \"voice\": \"onyx\",
45
+ \"speed\": 0.25
46
+ }" | mpv --really-quiet -
47
+
48
+ curl -s $URL -H "Content-Type: application/json" -d "{
49
+ \"model\": \"tts-1-hd\",
50
+ \"input\": \"the slowest HD voice\",
51
+ \"voice\": \"onyx\",
52
+ \"speed\": 0.25
53
+ }" | mpv --really-quiet -
54
+
55
+ curl -s $URL -H "Content-Type: application/json" -d "{
56
+ \"model\": \"tts-1\",
57
+ \"input\": \"And this is how fast it can go, the fastest voice\",
58
+ \"voice\": \"nova\",
59
+ \"speed\": 4.0
60
+ }" | mpv --really-quiet -
61
+
62
+ curl -s $URL -H "Content-Type: application/json" -d "{
63
+ \"model\": \"tts-1-hd\",
64
+ \"input\": \"And this is how fast it can go, the fastest HD voice\",
65
+ \"voice\": \"nova\",
66
+ \"speed\": 4.0
67
+ }" | mpv --really-quiet -
voice_to_speaker.default.yaml ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ tts-1:
2
+ some_other_voice_name_you_want:
3
+ model: voices/choose your own model.onnx
4
+ speaker: set your own speaker
5
+ alloy:
6
+ model: voices/en_US-libritts_r-medium.onnx
7
+ speaker: 79 # 64, 79, 80, 101, 130
8
+ echo:
9
+ model: voices/en_US-libritts_r-medium.onnx
10
+ speaker: 134 # 52, 102, 134
11
+ echo-alt:
12
+ model: voices/en_US-ryan-high.onnx
13
+ speaker: # default speaker
14
+ fable:
15
+ model: voices/en_GB-northern_english_male-medium.onnx
16
+ speaker: # default speaker
17
+ onyx:
18
+ model: voices/en_US-libritts_r-medium.onnx
19
+ speaker: 159 # 55, 90, 132, 136, 137, 159
20
+ nova:
21
+ model: voices/en_US-libritts_r-medium.onnx
22
+ speaker: 107 # 57, 61, 107, 150, 162
23
+ shimmer:
24
+ model: voices/en_US-libritts_r-medium.onnx
25
+ speaker: 163
26
+ tts-1-hd:
27
+ alloy-alt:
28
+ model: xtts
29
+ speaker: voices/alloy-alt.wav
30
+ alloy:
31
+ model: xtts
32
+ speaker: voices/alloy.wav
33
+ echo:
34
+ model: xtts
35
+ speaker: voices/echo.wav
36
+ fable:
37
+ model: xtts
38
+ speaker: voices/fable.wav
39
+ onyx:
40
+ model: xtts
41
+ speaker: voices/onyx.wav
42
+ nova:
43
+ model: xtts
44
+ speaker: voices/nova.wav
45
+ shimmer:
46
+ model: xtts
47
+ speaker: voices/shimmer.wav
48
+ me:
49
+ model: xtts_v2.0.2 # you can specify an older xtts version
50
+ speaker: voices/me.wav # this could be you
51
+ language: auto
52
+ enable_text_splitting: True
53
+ length_penalty: 1.0
54
+ repetition_penalty: 10
55
+ speed: 1.0
56
+ temperature: 0.75
57
+ top_k: 50
58
+ top_p: 0.85
59
+ comment: You can add a comment here also, which will be persistent and otherwise ignored.