imseldrith commited on
Commit
f32a529
1 Parent(s): f2d6326

Upload folder using huggingface_hub

Browse files
.circleci/config.yml ADDED
@@ -0,0 +1,125 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ version: 2.1
2
+
3
+ parameters:
4
+ # This parameter is used to trigger the main workflow
5
+ image_name:
6
+ type: string
7
+ default: text_to_speech_open_api
8
+ package_version:
9
+ type: string
10
+ default: "2.1.31"
11
+ dependency_image_name:
12
+ type: string
13
+ default: text_to_speech_open_api_dependency
14
+ dependency_version:
15
+ type: string
16
+ default: "2.1.7"
17
+ build_dependency:
18
+ type: boolean
19
+ default: true
20
+
21
+ executors:
22
+ node:
23
+ docker:
24
+ - image: cimg/python:3.8.6
25
+ jobs:
26
+ sonar-scan:
27
+ machine:
28
+ image: ubuntu-2004:202201-02
29
+ working_directory: ~/text-to-speech-open-api/
30
+ steps:
31
+ - checkout:
32
+ path: ~/text-to-speech-open-api
33
+ - run:
34
+ name: Branch
35
+ command: echo $CIRCLE_BRANCH
36
+ - run:
37
+ name: Sonar Scan
38
+ command: docker run --rm -e SONAR_HOST_URL="https://sonarcloud.io" -e SONAR_TOKEN=$SONAR_TOKEN -v $PWD:/usr/src sonarsource/sonar-scanner-cli -Dsonar.branch.name=$CIRCLE_BRANCH
39
+
40
+ test-suite:
41
+ executor: node
42
+ working_directory: ~/text-to-speech-open-api/
43
+ steps:
44
+ - checkout:
45
+ path: ~/text-to-speech-open-api/
46
+ - run:
47
+ name: Setup Testing Environment
48
+ command: |
49
+ python --version ;pip --version ; pwd ; ls
50
+ pip install --upgrade pip
51
+ pip install pytest grpcio grpcio-tools pytest-grpc coverage pydantic
52
+ python --version ; pip --version ; pwd ; ls
53
+ - run:
54
+ name: Run Tests
55
+ command: |
56
+ python --version ; pip --version ; pwd ; ls
57
+ pwd ; coverage run -m unittest discover tests
58
+ pytest
59
+ coverage report --fail-under 20 src/*/*.py
60
+ coverage html # open htmlcov/index.html in a browser
61
+ - store_artifacts:
62
+ path: htmlcov
63
+
64
+
65
+ build_and_deploy_api_image:
66
+ machine:
67
+ image: ubuntu-2004:202101-01
68
+ resource_class: large
69
+ working_directory: ~/text-to-speech-open-api/
70
+ steps:
71
+ - checkout:
72
+ path: ~/text-to-speech-open-api
73
+ - run:
74
+ name: Setup Google credentials
75
+ command: |
76
+ echo ${GOOGLE_AUTH} > ${HOME}/gcp-key.json
77
+ pip install google_compute_engine
78
+ gcloud auth activate-service-account --key-file ${HOME}/gcp-key.json
79
+ gcloud --quiet config set project ${GCP_PROJECT}
80
+ - run:
81
+ name: Build dependencies image
82
+ command: |
83
+ echo "Build dependencies..."
84
+ if << pipeline.parameters.build_dependency >> ; then
85
+ docker build --rm=false -t gcr.io/${GCP_PROJECT}/<< pipeline.parameters.dependency_image_name >>:<< pipeline.parameters.dependency_version >> dependencies/
86
+ gcloud docker -- push gcr.io/${GCP_PROJECT}/<< pipeline.parameters.dependency_image_name >>:<< pipeline.parameters.dependency_version >>
87
+ fi
88
+ # - when:
89
+ # condition:
90
+ # equal: [ true, << pipeline.paramaters.build_dependency >> ]
91
+ # steps:
92
+ # - run: |
93
+ # docker build --rm=false -t gcr.io/${GCP_PROJECT}/<< pipeline.parameters.dependency_image_name >>:<< pipeline.parameters.dependency_version >> dependencies/
94
+ # gcloud docker -- push gcr.io/${GCP_PROJECT}/<< pipeline.parameters.dependency_image_name >>:<< pipeline.parameters.dependency_version >>
95
+ - run:
96
+ name: Build docker image
97
+ command: |
98
+ echo "Build api image..."
99
+ docker build --rm=false -t gcr.io/${GCP_PROJECT}/<< pipeline.parameters.image_name >>:<< pipeline.parameters.package_version >> .
100
+ - run:
101
+ name: deploy image to registry
102
+ command: |
103
+ echo "Deploy api image to registry..."
104
+ gcloud docker -- push gcr.io/${GCP_PROJECT}/<< pipeline.parameters.image_name >>:<< pipeline.parameters.package_version >>
105
+ - run:
106
+ name: Remove account details
107
+ command: |
108
+ rm ${HOME}/gcp-key.json ; ls
109
+ workflows:
110
+ version: 2
111
+ main_workflow:
112
+ jobs:
113
+ - sonar-scan:
114
+ name: sonar-scan
115
+ - test-suite:
116
+ name: test-suite
117
+ - approve_to_publish:
118
+ name: approve-build-and-deploy
119
+ type: approval
120
+ requires:
121
+ - test-suite
122
+ - build_and_deploy_api_image:
123
+ name: build_and_deploy_api_image
124
+ requires:
125
+ - approve-build-and-deploy
.gitignore ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ pip-wheel-metadata/
24
+ share/python-wheels/
25
+ *.egg-info/
26
+ .installed.cfg
27
+ *.egg
28
+ MANIFEST
29
+
30
+ # PyInstaller
31
+ # Usually these files are written by a python script from a template
32
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
33
+ *.manifest
34
+ *.spec
35
+
36
+ # Installer logs
37
+ pip-log.txt
38
+ pip-delete-this-directory.txt
39
+
40
+ # Unit test / coverage reports
41
+ htmlcov/
42
+ .tox/
43
+ .nox/
44
+ .coverage
45
+ .coverage.*
46
+ .cache
47
+ nosetests.xml
48
+ coverage.xml
49
+ *.cover
50
+ *.py,cover
51
+ .hypothesis/
52
+ .pytest_cache/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ target/
76
+
77
+ # Jupyter Notebook
78
+ .ipynb_checkpoints
79
+
80
+ # IPython
81
+ profile_default/
82
+ ipython_config.py
83
+
84
+ # pyenv
85
+ .python-version
86
+
87
+ # pipenv
88
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
89
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
90
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
91
+ # install all needed dependencies.
92
+ #Pipfile.lock
93
+
94
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow
95
+ __pypackages__/
96
+
97
+ # Celery stuff
98
+ celerybeat-schedule
99
+ celerybeat.pid
100
+
101
+ # SageMath parsed files
102
+ *.sage.py
103
+
104
+ # Environments
105
+ .env
106
+ .venv
107
+ env/
108
+ venv/
109
+ ENV/
110
+ env.bak/
111
+ venv.bak/
112
+
113
+ # Spyder project settings
114
+ .spyderproject
115
+ .spyproject
116
+
117
+ # Rope project settings
118
+ .ropeproject
119
+
120
+ # mkdocs documentation
121
+ /site
122
+
123
+ # mypy
124
+ .mypy_cache/
125
+ .dmypy.json
126
+ dmypy.json
127
+
128
+ # Pyre type checker
129
+ .pyre/
130
+
131
+ .idea/
132
+ vakyansh-tts/
Dockerfile ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM gcr.io/ekstepspeechrecognition/text_to_speech_open_api_dependency:2.1.7
2
+
3
+
4
+ ARG DEBIAN_FRONTEND=noninteractive
5
+ EXPOSE 5000
6
+ RUN mkdir /opt/text_to_speech_open_api/
7
+ ENV base_path=/opt/text_to_speech_open_api/
8
+ ENV models_base_path=/opt/text_to_speech_open_api/deployed_models/
9
+ ENV model_logs_base_path=/opt/text_to_speech_open_api/deployed_models/logs/
10
+ ENV translit_model_base_path=/opt/text_to_speech_open_api/vakyansh-tts/src/glow_tts/tts_infer/
11
+ RUN echo "export LD_LIBRARY_PATH=/usr/local/nvidia/lib:/usr/local/nvidia/lib64:/usr/local/lib" >> ~/.bashrc
12
+ RUN cp -R /opt/api_dependencies/vakyansh-tts /opt/text_to_speech_open_api/
13
+ RUN cp -R /opt/api_dependencies/vakyansh-tts/tts_infer /opt/text_to_speech_open_api/vakyansh-tts/src/glow_tts/
14
+ WORKDIR /opt/text_to_speech_open_api/vakyansh-tts/src/glow_tts/
15
+ COPY src /opt/text_to_speech_open_api/vakyansh-tts/src/glow_tts/src
16
+ COPY ./server.py /opt/text_to_speech_open_api/vakyansh-tts/src/glow_tts/
17
+ CMD ["python3","/opt/text_to_speech_open_api/vakyansh-tts/src/glow_tts/server.py"]
18
+
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2022 Open-Speech-EkStep
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
dependencies/Dockerfile ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ FROM nvidia/cuda:11.4.0-cudnn8-devel-ubuntu20.04
2
+ ARG DEBIAN_FRONTEND=noninteractive
3
+
4
+ RUN mkdir /opt/api_dependencies/
5
+ WORKDIR /opt/api_dependencies/
6
+ COPY installer.sh /opt/api_dependencies/
7
+ RUN sh /opt/api_dependencies/installer.sh
8
+
dependencies/installer.sh ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env bash
2
+ #
3
+ echo "Setting up model dependencies...."
4
+
5
+ apt-get update && apt-get upgrade -y
6
+ apt-get install -y sudo wget python3-pip git libsndfile1 libsndfile1-dev
7
+
8
+ apt install python-is-python3
9
+ git clone https://github.com/Open-Speech-EkStep/vakyansh-tts
10
+ cd vakyansh-tts
11
+ pip3 install --no-cache-dir -r requirements.txt
12
+ bash install.sh
13
+ python3 setup.py bdist_wheel
14
+ pip3 install -e .
15
+ pip3 install torch==1.7.1+cu110 -f https://download.pytorch.org/whl/torch_stable.html
16
+ cd tts_infer
17
+ mkdir -p translit_models
18
+ #gsutil -m cp -r gs://vakyansh-open-models/translit_models .
19
+
20
+
server.py ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import uvicorn
2
+ from fastapi import FastAPI
3
+ from fastapi.exceptions import RequestValidationError
4
+ from fastapi.middleware.cors import CORSMiddleware
5
+
6
+ from src import log_setup
7
+ from src.config import settings
8
+ from src.routers import tts_routes
9
+ from src.routers.exception_handler import validation_exception_handler
10
+
11
+ LOGGER = log_setup.get_logger(__name__)
12
+
13
+ app = FastAPI()
14
+ app.add_middleware(
15
+ CORSMiddleware,
16
+ allow_origins="*",
17
+ allow_credentials=False,
18
+ allow_methods=["*"],
19
+ allow_headers=["*"],
20
+ )
21
+
22
+ app.include_router(tts_routes.router)
23
+ app.add_exception_handler(RequestValidationError, validation_exception_handler)
24
+
25
+ if __name__ == "__main__":
26
+ LOGGER.info(f'Loading with settings {settings}')
27
+ uvicorn.run(
28
+ "server:app", host="0.0.0.0", port=settings.server_port, log_level=settings.log_level.lower(), reload=False
29
+ )
sonar-project.properties ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # must be unique in a given SonarQube instance
2
+ sonar.projectKey=text-to-speech-open-api
3
+ sonar.host.url=https://sonarcloud.io
4
+ sonar.organization=vakyansh
5
+ sonar.python.version=3
6
+ # --- optional properties ---
7
+ # defaults to project key
8
+ #sonar.projectName=My project
9
+ # defaults to 'not provided'
10
+ #sonar.projectVersion=1.0
11
+ # Path is relative to the sonar-project.properties file. Defaults to .
12
+ #sonar.sources=src
13
+ # Encoding of the source code. Default is default system encoding
14
+ #sonar.sourceEncoding=UTF-8
15
+ #sonar.branch.name=refactor
src/__init__.py ADDED
File without changes
src/application/__init__.py ADDED
File without changes
src/application/tts_preprocess.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import base64
2
+ import io
3
+
4
+ import numpy as np
5
+ import torch
6
+ from fastapi import HTTPException
7
+ from indicnlp.tokenize import sentence_tokenize
8
+ from mosestokenizer import MosesSentenceSplitter
9
+ from scipy.io.wavfile import write
10
+ from tts_infer.num_to_word_on_sent import normalize_nums
11
+
12
+ from src import log_setup
13
+ from src.infer.model_inference import ModelService
14
+ from src.model.language import Language
15
+ from src.model.tts_request import TTSRequest
16
+ from src.model.tts_response import TTSResponse, AudioFile, AudioConfig
17
+
18
+ LOGGER = log_setup.get_logger(__name__)
19
+ model_service = ModelService()
20
+ _INDIC = ["as", "bn", "gu", "hi", "kn", "ml", "mr", "or", "pa", "ta", "te"]
21
+ _PURAM_VIRAM_LANGUAGES = ["hi", "or", "bn", "as"]
22
+ _TRANSLITERATION_NOT_AVAILABLE_IN = ["en","or"]
23
+
24
+ def infer_tts_request(request: TTSRequest):
25
+ config = request.config
26
+ lang = config.language.sourceLanguage
27
+ gender = config.gender
28
+ output_list = []
29
+ audio_config = AudioConfig(language=Language(sourceLanguage=lang))
30
+ try:
31
+ for sentence in request.input:
32
+ LOGGER.debug(f'infer for gender {gender} and lang {lang} text {sentence.source}')
33
+ speech_response = infer_tts(language=lang, gender=gender, text_to_infer=sentence.source)
34
+ LOGGER.debug(f'infer done for text {sentence.source}')
35
+ output_list.append(speech_response)
36
+
37
+ return TTSResponse(audio=output_list, config=audio_config)
38
+ except Exception as e:
39
+ LOGGER.exception('Failed to infer %s', e)
40
+ raise e
41
+
42
+
43
+ def infer_tts(language: str, gender: str, text_to_infer: str):
44
+ choice = language + "_" + gender
45
+ LOGGER.debug(f'choice for model {choice}')
46
+
47
+ if choice in model_service.available_choice.keys():
48
+ t2s = model_service.available_choice[choice]
49
+ else:
50
+ raise NotImplementedError('Requested model not found')
51
+
52
+ if text_to_infer:
53
+ text_to_infer = normalize_text(text_to_infer, language)
54
+
55
+ # if len(text_to_infer) > settings.tts_max_text_limit:
56
+ LOGGER.debug("Running in paragraph mode...")
57
+ audio, sr = run_tts_paragraph(text_to_infer, language, t2s)
58
+ # else:
59
+ # LOGGER.debug("Running in text mode...")
60
+ # audio, sr = run_tts(text_to_infer, language, t2s)
61
+ torch.cuda.empty_cache() # TODO: find better approach for this
62
+ LOGGER.debug('Audio generates successfully')
63
+ bytes_wav = bytes()
64
+ byte_io = io.BytesIO(bytes_wav)
65
+ write(byte_io, sr, audio)
66
+ encoded_bytes = base64.b64encode(byte_io.read())
67
+ encoded_string = encoded_bytes.decode()
68
+ LOGGER.debug(f'Encoded Audio string {encoded_string}')
69
+ return AudioFile(audioContent=encoded_string)
70
+ else:
71
+ raise HTTPException(status_code=400, detail={"error": "No text"})
72
+
73
+
74
+ def split_sentences(paragraph, language):
75
+ if language == "en":
76
+ with MosesSentenceSplitter(language) as splitter:
77
+ return splitter([paragraph])
78
+ elif language in _INDIC:
79
+ return sentence_tokenize.sentence_split(paragraph, lang=language)
80
+
81
+
82
+ def normalize_text(text, lang):
83
+ if lang in _PURAM_VIRAM_LANGUAGES:
84
+ text = text.replace('|', '।')
85
+ text = text.replace('.', '।')
86
+ return text
87
+
88
+
89
+ def pre_process_text(text, lang):
90
+ if lang == 'hi':
91
+ text = text.replace('।', '.') # only for hindi models
92
+
93
+ if lang == 'en' and text[-1] != '.':
94
+ text = text + '. '
95
+
96
+ return text
97
+
98
+
99
+ def run_tts_paragraph(text, lang, t2s):
100
+ audio_list = []
101
+ split_sentences_list = split_sentences(text, language=lang)
102
+
103
+ for sent in split_sentences_list:
104
+ audio, sr = run_tts(pre_process_text(sent, lang), lang, t2s)
105
+ audio_list.append(audio)
106
+
107
+ concatenated_audio = np.concatenate([i for i in audio_list])
108
+ # write(filename='temp_long.wav', rate=sr, data=concatenated_audio)
109
+ return concatenated_audio, sr
110
+
111
+
112
+ def run_tts(text, lang, t2s):
113
+ text_num_to_word = normalize_nums(text, lang) # converting numbers to words in lang
114
+ if lang not in _TRANSLITERATION_NOT_AVAILABLE_IN:
115
+ text_num_to_word_and_transliterated = model_service.transliterate_obj.translit_sentence(text_num_to_word,
116
+ lang) # transliterating english words to lang
117
+ else:
118
+ text_num_to_word_and_transliterated = text_num_to_word
119
+ mel = t2s[0].generate_mel(' ' + text_num_to_word_and_transliterated)
120
+ audio, sr = t2s[1].generate_wav(mel)
121
+ return audio, sr
src/config.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseSettings
2
+
3
+
4
+ class Settings(BaseSettings):
5
+ app_name: str = "TTS API"
6
+ server_port: int = 5000
7
+ log_level: str = 'DEBUG'
8
+ gpu: bool = True
9
+ tts_max_text_limit: int = 450
10
+ models_base_path: str = ''
11
+ model_config_file_path: str = 'model_dict.json'
12
+
13
+
14
+ settings = Settings()
src/infer/__init__.py ADDED
File without changes
src/infer/model_inference.py ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import ast
2
+ import json
3
+ import os
4
+
5
+ import torch
6
+ from mosestokenizer import *
7
+ from tts_infer.transliterate import XlitEngine
8
+ from tts_infer.tts import TextToMel, MelToWav
9
+
10
+ from src import log_setup, utilities
11
+ from src.config import settings
12
+
13
+ LOGGER = log_setup.get_logger(__name__)
14
+
15
+
16
+ def get_gpu_info(gpu):
17
+ LOGGER.info(f"*** GPU is enabled: {gpu} ***")
18
+ if gpu:
19
+ no_gpus = torch.cuda.device_count()
20
+ LOGGER.info(f"*** Total number of gpus allocated are {no_gpus} ***")
21
+ LOGGER.info(f"*** Cuda Version {torch.version.cuda} ***")
22
+ LOGGER.info(f"*** Python process id {os.getpid()} ***")
23
+ LOGGER.info("*** The gpu device info : ***")
24
+ for gpu in range(0, no_gpus):
25
+ LOGGER.info(f"GPU {str(gpu)} - {str(torch.cuda.get_device_name(gpu))}")
26
+
27
+
28
+ class ModelService:
29
+
30
+ def __init__(self):
31
+ LOGGER.info(f'Loading with settings {settings}')
32
+ gpu_present = torch.cuda.is_available()
33
+ LOGGER.info("Gpu present : %s", gpu_present)
34
+ get_gpu_info(settings.gpu)
35
+
36
+ self.device = "cuda" if gpu_present & settings.gpu else "cpu"
37
+ LOGGER.info("Using device : %s", self.device)
38
+
39
+ model_config_file_path = settings.models_base_path + settings.model_config_file_path
40
+ if os.path.exists(model_config_file_path):
41
+ with open(model_config_file_path, 'r') as f:
42
+ model_config = json.load(f)
43
+ else:
44
+ raise Exception(f'Model configuration file is missing at {model_config_file_path}')
45
+ languages = ast.literal_eval(utilities.get_env_var('languages', '["all"]'))
46
+ self.supported_languages = list(model_config.keys())
47
+ LOGGER.info(f'supported languages {self.supported_languages}')
48
+ self.available_choice = {}
49
+ LOGGER.info(f'requested languages are {languages}')
50
+ for language_code, lang_config in model_config.items():
51
+ if language_code in languages or 'all' in languages:
52
+ if lang_config.get("male_glow") is not None:
53
+ self.available_choice[f"{language_code}_male"] = [
54
+ TextToMel(glow_model_dir=settings.models_base_path + lang_config.get("male_glow"),
55
+ device=self.device),
56
+ MelToWav(hifi_model_dir=settings.models_base_path + lang_config.get("male_hifi"),
57
+ device=self.device)]
58
+ if lang_config.get("female_glow") is not None:
59
+ self.available_choice[f"{language_code}_female"] = [
60
+ TextToMel(glow_model_dir=settings.models_base_path + lang_config.get("female_glow"),
61
+ device=self.device),
62
+ MelToWav(hifi_model_dir=settings.models_base_path + lang_config.get("female_hifi"),
63
+ device=self.device)]
64
+ LOGGER.info(f'{language_code} Models initialized successfully')
65
+ LOGGER.info(f'Model service available_choices are {self.available_choice}')
66
+ if len(self.available_choice) == 0:
67
+ LOGGER.error(
68
+ f'Invalid languages requested in {languages} ,only supported languages are {self.supported_languages}')
69
+ raise Exception(f'Invalid languages requested')
70
+ self.transliterate_obj = XlitEngine()
src/log_setup.py ADDED
@@ -0,0 +1,49 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import os
3
+ import socket
4
+ import sys
5
+ from logging.handlers import TimedRotatingFileHandler
6
+ from pathlib import Path
7
+
8
+ from src.config import settings
9
+
10
+ _FORMATTER = logging.Formatter(
11
+ "%(asctime)s — [%(threadName)s] - %(name)s - %(filename)s.%(funcName)s(%(lineno)d) - %(levelname)s - %(message)s")
12
+ LOGS_MODEL_BASE_PATH = Path(os.environ.get('model_logs_base_path', os.getcwd()))
13
+ LOG_FILE = f"tts_inference_" + socket.gethostname() + ".log"
14
+
15
+ _FILE_LOGGER_HANDLER = None
16
+ _CONSOLE_LOGGER_HANDLER = None
17
+ _LOGGING_DEBUG = settings.log_level.upper()
18
+
19
+
20
+ def get_console_handler():
21
+ global _CONSOLE_LOGGER_HANDLER
22
+ if _CONSOLE_LOGGER_HANDLER is None:
23
+ _CONSOLE_LOGGER_HANDLER = logging.StreamHandler(sys.stdout)
24
+ _CONSOLE_LOGGER_HANDLER.setFormatter(_FORMATTER)
25
+
26
+ return _CONSOLE_LOGGER_HANDLER
27
+
28
+
29
+ def get_file_handler():
30
+ global _FILE_LOGGER_HANDLER
31
+ if _FILE_LOGGER_HANDLER is None:
32
+ if not os.path.exists(LOGS_MODEL_BASE_PATH):
33
+ os.makedirs(LOGS_MODEL_BASE_PATH)
34
+ _FILE_LOGGER_HANDLER = TimedRotatingFileHandler(LOGS_MODEL_BASE_PATH / LOG_FILE,
35
+ when='midnight',
36
+ backupCount=30)
37
+ _FILE_LOGGER_HANDLER.setFormatter(_FORMATTER)
38
+
39
+ return _FILE_LOGGER_HANDLER
40
+
41
+
42
+ def get_logger(logger_name):
43
+ logger = logging.getLogger(logger_name)
44
+ logger.setLevel(_LOGGING_DEBUG) # better to have too much log than not enough
45
+ logger.addHandler(get_console_handler())
46
+ logger.addHandler(get_file_handler())
47
+ # with this pattern, it's rarely necessary to propagate the error up to parent
48
+ logger.propagate = False
49
+ return logger
src/model/__init__.py ADDED
File without changes
src/model/language.py ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from pydantic import BaseModel, validator
2
+
3
+
4
+ class Language(BaseModel):
5
+ sourceLanguage: str
6
+
7
+ @validator('sourceLanguage', pre=True)
8
+ def blank_string_in_language(cls, value, field):
9
+ if value == "":
10
+ raise ValueError('sourceLanguage cannot be empty')
11
+ return value
src/model/tts_request.py ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+
3
+ from pydantic import BaseModel, validator
4
+
5
+ from src.model.language import Language
6
+
7
+ SUPPORTED_GENDERS = {'male', 'female'}
8
+
9
+
10
+ class Sentence(BaseModel):
11
+ source: str
12
+
13
+ @validator('source', pre=True)
14
+ def blank_string_in_source(cls, value, field):
15
+ if value == "":
16
+ raise ValueError('source cannot be empty')
17
+ return value
18
+
19
+
20
+ class TTSConfig(BaseModel):
21
+ language: Language
22
+ gender: str
23
+
24
+ @validator('gender', pre=True)
25
+ def blank_string_in_gender(cls, value, field):
26
+ if value == "":
27
+ raise ValueError('gender cannot be empty')
28
+ if value not in SUPPORTED_GENDERS:
29
+ raise ValueError('Unsupported gender value')
30
+ return value
31
+
32
+
33
+ class TTSRequest(BaseModel):
34
+ input: List[Sentence]
35
+ config: TTSConfig
36
+
37
+ @validator('input', pre=True)
38
+ def input_cannot_be_empty(cls, value, field):
39
+ if len(value) < 1:
40
+ raise ValueError('input cannot be empty')
41
+ return value
src/model/tts_response.py ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List
2
+
3
+ from pydantic import BaseModel
4
+
5
+ from src.model.language import Language
6
+
7
+
8
+ class AudioFile(BaseModel):
9
+ audioContent: str
10
+
11
+
12
+ class AudioConfig(BaseModel):
13
+ language: Language
14
+ audioFormat: str = 'wav'
15
+ encoding: str = 'base64'
16
+ samplingRate: int = 22050
17
+
18
+
19
+ class TTSResponse(BaseModel):
20
+ audio: List[AudioFile]
21
+ config: AudioConfig
22
+
23
+
24
+ class TTSFailureResponse(BaseModel):
25
+ status: str = 'ERROR'
26
+ status_text: str
src/routers/__init__.py ADDED
File without changes
src/routers/exception_handler.py ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import Request, status
2
+ from fastapi.encoders import jsonable_encoder
3
+ from fastapi.responses import JSONResponse
4
+
5
+
6
+ async def validation_exception_handler(request: Request, exc: ValueError):
7
+ return JSONResponse(
8
+ status_code=status.HTTP_400_BAD_REQUEST,
9
+ content=jsonable_encoder({"detail": exc.errors()}),
10
+ )
src/routers/tts_routes.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from fastapi import APIRouter, HTTPException, Response, status
2
+
3
+ from src import log_setup
4
+ from src.application.tts_preprocess import infer_tts_request
5
+ from src.model.tts_request import TTSRequest
6
+ from src.model.tts_response import TTSFailureResponse
7
+
8
+ LOGGER = log_setup.get_logger(__name__)
9
+ router = APIRouter()
10
+
11
+
12
+ @router.post("/")
13
+ async def tts(request: TTSRequest, response: Response):
14
+ LOGGER.info(f'TTS request {request}')
15
+ try:
16
+ infer_response = infer_tts_request(request)
17
+ return infer_response
18
+ except NotImplementedError as e:
19
+ LOGGER.exception('Failed to infer http exception %s', e)
20
+ response.status_code = status.HTTP_404_NOT_FOUND
21
+ return TTSFailureResponse(status_text=str(e))
22
+ except Exception as e:
23
+ LOGGER.exception('Failed to infer %s', e)
24
+ response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR
25
+ return TTSFailureResponse(status_text=f'Failed to process request {str(e)}')
src/utilities.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ import os
2
+
3
+ def get_env_var(var_name=str, default=''):
4
+ return os.environ.get(var_name, default)
5
+
tests/__init__.py ADDED
File without changes
tests/test_config.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+
3
+ from src.config import Settings
4
+
5
+
6
+ class ConfigTest(unittest.TestCase):
7
+
8
+ def testDefaultValues(self):
9
+ settings = Settings()
10
+ self.assertEqual(settings.app_name, 'TTS API', 'app_name default value does not match')
11
+ self.assertEqual(settings.server_port, 5000, 'server_port default value does not match')
12
+ self.assertEqual(settings.gpu, True, 'gpu default value does not match')
13
+ self.assertEqual(settings.tts_max_text_limit, 450, 'tts_max_text_limit default value does not match')
14
+ self.assertEqual(settings.models_base_path, '', 'models_base_path default value does not match')
15
+ self.assertEqual(settings.model_config_file_path, 'model_dict.json',
16
+ 'model_dict.json default value does not match')
17
+
18
+
19
+ if __name__ == '__main__':
20
+ unittest.main()
tests/test_language.py ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+
3
+ from pydantic import ValidationError
4
+
5
+ from src.model.language import Language
6
+
7
+
8
+ class LanguageTest(unittest.TestCase):
9
+
10
+ def testLanguage(self):
11
+ language = Language(sourceLanguage='hi')
12
+ self.assertEqual(language.sourceLanguage, 'hi', 'language does not match')
13
+
14
+ def testLanguageRequiredValues(self):
15
+ try:
16
+ config = Language()
17
+ self.fail('Expected validation error got nothing')
18
+ except ValidationError as e:
19
+ pass
20
+ except Exception as er:
21
+ self.fail(f'Expected validation error for {er}')
22
+
23
+
24
+ if __name__ == '__main__':
25
+ unittest.main()
tests/test_tts_request.py ADDED
@@ -0,0 +1,128 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+
3
+ from pydantic import ValidationError
4
+
5
+ from src.model.tts_request import TTSRequest, Sentence, TTSConfig, Language
6
+
7
+
8
+ class TTSRequestTest(unittest.TestCase):
9
+
10
+ def test_request(self):
11
+ request = TTSRequest(input=[Sentence(source='ABC')],
12
+ config=TTSConfig(gender='female', language=Language(sourceLanguage='en')))
13
+ self.assertEqual(request.input[0].source, 'ABC', 'Text does not match')
14
+ self.assertEqual(request.config.language.sourceLanguage, 'en', 'Language does not match')
15
+ self.assertEqual(request.config.gender, 'female', 'Gender does not match')
16
+
17
+ def test_required_values(self):
18
+ try:
19
+ request = TTSRequest()
20
+ self.fail('Expected validation error got nothing')
21
+ except ValidationError as e:
22
+ pass
23
+ except Exception as er:
24
+ self.fail(f'Expected validation error for {er}')
25
+
26
+ def test_config_is_required(self):
27
+ try:
28
+ request = TTSRequest(input=[Sentence(source='ABC')])
29
+ self.fail('Expected validation error got nothing')
30
+ except ValidationError as e:
31
+ pass
32
+ except Exception as er:
33
+ self.fail(f'Expected validation error for {er}')
34
+
35
+ def test_input_is_required(self):
36
+ try:
37
+ request = TTSRequest(input=[Sentence(source='ABC')])
38
+ self.fail('Expected validation error got nothing')
39
+ except ValidationError as e:
40
+ pass
41
+ except Exception as er:
42
+ self.fail(f'Expected validation error for {er}')
43
+
44
+ def test_TTSConfig_without_gender(self):
45
+ try:
46
+ config = TTSConfig(language=Language(sourceLanguage='en'))
47
+ except ValidationError as e:
48
+ self.assertEqual(e.errors()[0]['type'], 'value_error.missing')
49
+ self.assertEqual(e.errors()[0]['loc'][0], 'gender')
50
+ pass
51
+ except Exception as er:
52
+ self.fail(f'Expected validation error for {er}')
53
+
54
+ def test_TTSConfig_empty_gender(self):
55
+ try:
56
+ config = TTSConfig(language=Language(sourceLanguage='en'), gender='')
57
+ except ValidationError as e:
58
+ self.assertEqual(e.errors()[0]['loc'][0], 'gender')
59
+ self.assertEqual(e.errors()[0]['msg'], 'gender cannot be empty')
60
+ pass
61
+ except Exception as er:
62
+ self.fail(f'Expected validation error for {er}')
63
+
64
+ def test_TTSConfig_empty_sourceLanguage(self):
65
+ try:
66
+ config = TTSConfig(language=Language(sourceLanguage=''), gender='male')
67
+ except ValidationError as e:
68
+ self.assertEqual(e.errors()[0]['loc'][0], 'sourceLanguage')
69
+ self.assertEqual(e.errors()[0]['msg'], 'sourceLanguage cannot be empty')
70
+ pass
71
+ except Exception as er:
72
+ self.fail(f'Expected validation error for {er}')
73
+
74
+ def test_TTSConfig(self):
75
+ try:
76
+ config = TTSConfig(gender='male')
77
+ except ValidationError as e:
78
+ self.assertEqual(len(e.errors()), 1)
79
+ self.assertEqual(e.errors()[0]['type'], 'value_error.missing')
80
+ self.assertEqual(e.errors()[0]['loc'][0], 'language')
81
+ pass
82
+ except Exception as er:
83
+ self.fail(f'Expected validation error for {er}')
84
+
85
+ def test_TTSConfig_unsupported_gender(self):
86
+
87
+ try:
88
+ config = TTSConfig(language=Language(sourceLanguage='en'), gender='Male')
89
+ except ValidationError as e:
90
+ self.assertEqual(len(e.errors()), 1)
91
+ self.assertEqual(e.errors()[0]['type'], 'value_error')
92
+ self.assertEqual(e.errors()[0]['loc'][0], 'gender')
93
+ self.assertEqual(e.errors()[0]['msg'], 'Unsupported gender value')
94
+ pass
95
+ except Exception as er:
96
+ self.fail(f'Expected validation error for {er}')
97
+
98
+ def test_sentence_empty_source(self):
99
+ try:
100
+ sentence = Sentence(source='')
101
+ except ValidationError as e:
102
+ self.assertEqual(len(e.errors()), 1)
103
+ self.assertEqual(e.errors()[0]['type'], 'value_error')
104
+ self.assertEqual(e.errors()[0]['loc'][0], 'source')
105
+ self.assertEqual(e.errors()[0]['msg'], 'source cannot be empty')
106
+ pass
107
+ except Exception as er:
108
+ self.fail(f'Expected validation error for {er}')
109
+
110
+ def test_sentence(self):
111
+ sentence = Sentence(source='Text')
112
+ self.assertEqual(sentence.source, 'Text')
113
+
114
+ def test_empty_input(self):
115
+ try:
116
+ request = TTSRequest(input=[], config=TTSConfig(gender='female', language=Language(sourceLanguage='en')))
117
+ except ValidationError as e:
118
+ self.assertEqual(len(e.errors()), 1)
119
+ self.assertEqual(e.errors()[0]['type'], 'value_error')
120
+ self.assertEqual(e.errors()[0]['loc'][0], 'input')
121
+ self.assertEqual(e.errors()[0]['msg'], 'input cannot be empty')
122
+ pass
123
+ except Exception as er:
124
+ self.fail(f'Expected validation error for {er}')
125
+
126
+
127
+ if __name__ == '__main__':
128
+ unittest.main()
tests/test_tts_response.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import unittest
2
+
3
+ from pydantic import ValidationError
4
+
5
+ from src.model.language import Language
6
+ from src.model.tts_response import TTSResponse, AudioConfig, AudioFile
7
+
8
+
9
+ class TTSResponseTest(unittest.TestCase):
10
+
11
+ def testRequest(self):
12
+ response = TTSResponse(audio=[AudioFile(audioContent='data')],
13
+ config=AudioConfig(language=Language(sourceLanguage='hi')))
14
+ self.assertEqual(response.audio[0].audioContent, 'data', 'content does not match')
15
+ self.assertEqual(response.config.language.sourceLanguage, 'hi', 'language does not match')
16
+ self.assertEqual(response.config.samplingRate, 22050, 'default value for sample rate does not match')
17
+ self.assertEqual(response.config.encoding, 'base64', 'default value for encoding is not set')
18
+ self.assertEqual(response.config.audioFormat, 'wav', 'default value for audio format is not set')
19
+
20
+ def testTextIsRequiredValues(self):
21
+ try:
22
+ response = TTSResponse()
23
+ self.fail('Expected validation error got nothing')
24
+ except ValidationError as e:
25
+ pass
26
+ except Exception as er:
27
+ self.fail(f'Expected validation error for {er}')
28
+
29
+ def testResponseRequiredConfig(self):
30
+ try:
31
+ response = TTSResponse(audio=[AudioFile(audioContent='data')])
32
+ self.fail('Expected validation error got nothing')
33
+ except ValidationError as e:
34
+ pass
35
+ except Exception as er:
36
+ self.fail(f'Expected validation error for {er}')
37
+
38
+ def testResponseRequiredAudio(self):
39
+ try:
40
+ response = TTSResponse(config=AudioConfig(language='hi'))
41
+ self.fail('Expected validation error got nothing')
42
+ except ValidationError as e:
43
+ pass
44
+ except Exception as er:
45
+ self.fail(f'Expected validation error for {er}')
46
+
47
+ def testAudioConfigRequiredValues(self):
48
+ try:
49
+ config = AudioConfig()
50
+ self.fail('Expected validation error got nothing')
51
+ except ValidationError as e:
52
+ pass
53
+ except Exception as er:
54
+ self.fail(f'Expected validation error for {er}')
55
+
56
+ def testAudioConfigWithFiles(self):
57
+ config = AudioConfig(language=Language(sourceLanguage='hi'))
58
+ self.assertEqual(config.language.sourceLanguage, 'hi', 'language does not match')
59
+
60
+
61
+ if __name__ == '__main__':
62
+ unittest.main()