stefantaubert commited on
Commit
f7244fb
·
1 Parent(s): e3bcf30
app.py CHANGED
@@ -1,3 +1,24 @@
1
- from en_tts_gr import run
 
2
 
3
- run()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import sys
2
+ from functools import partial
3
 
4
+ from en_tts_app import initialize_app, run_main
5
+ from en_tts_gr import build_interface
6
+
7
+ exit_code = initialize_app()
8
+ if exit_code > 0:
9
+ sys.exit(exit_code)
10
+
11
+ interface = build_interface(cache_examples=False)
12
+ interface.queue()
13
+
14
+ launch_method = partial(
15
+ interface.launch,
16
+ share=False,
17
+ debug=True,
18
+ inbrowser=True,
19
+ quiet=False,
20
+ show_api=False,
21
+ )
22
+
23
+ exit_code = run_main(launch_method)
24
+ sys.exit(exit_code)
en_tts_app/__init__.py CHANGED
@@ -1,3 +1,5 @@
1
  from en_tts_app.app import initialize_app, run_main
2
- from en_tts_app.globals import get_conf_dir, get_log_path, get_work_dir, reset_log
3
- from en_tts_app.main import load_models_to_cache, synthesize_english
 
 
 
1
  from en_tts_app.app import initialize_app, run_main
2
+ from en_tts_app.globals import APP_NAME, APP_VERSION, get_conf_dir, get_log_path, get_work_dir
3
+ from en_tts_app.logging_configuration import get_app_logger, get_file_logger
4
+ from en_tts_app.main import (load_models_to_cache, reset_log, reset_work_dir, synthesize_english,
5
+ synthesize_ipa)
en_tts_app/globals.py CHANGED
@@ -1,8 +1,10 @@
1
- import shutil
2
  from pathlib import Path
3
  from tempfile import gettempdir
4
 
5
- from en_tts_cli.logging_configuration import get_cli_logger
 
 
6
 
7
 
8
  def get_conf_dir() -> Path:
@@ -17,18 +19,3 @@ def get_work_dir() -> Path:
17
 
18
  def get_log_path() -> Path:
19
  return Path(gettempdir()) / "en-tts.log"
20
-
21
-
22
- def reset_log() -> None:
23
- get_log_path().write_text("", "utf-8")
24
-
25
-
26
- def reset_work_dir():
27
- root_logger = get_cli_logger()
28
- work_dir = get_work_dir()
29
-
30
- if work_dir.is_dir():
31
- root_logger.debug("Deleting working directory ...")
32
- shutil.rmtree(work_dir)
33
- root_logger.debug("Creating working directory ...")
34
- work_dir.mkdir(parents=False, exist_ok=False)
 
1
+ from importlib.metadata import version
2
  from pathlib import Path
3
  from tempfile import gettempdir
4
 
5
+ APP_NAME = "en-tts"
6
+
7
+ APP_VERSION = version(APP_NAME)
8
 
9
 
10
  def get_conf_dir() -> Path:
 
19
 
20
  def get_log_path() -> Path:
21
  return Path(gettempdir()) / "en-tts.log"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
en_tts_app/logging_configuration.py CHANGED
@@ -2,14 +2,11 @@ import logging
2
  import os
3
  import platform
4
  import sys
5
- from importlib.metadata import version
6
  from logging import Formatter, Handler, Logger, StreamHandler, getLogger
7
  from pathlib import Path
8
  from pkgutil import iter_modules
9
 
10
- __APP_NAME = "en-tts"
11
-
12
- __version__ = version(__APP_NAME)
13
 
14
 
15
  class ConsoleFormatter(logging.Formatter):
@@ -129,7 +126,7 @@ def log_sysinfo():
129
  flogger = get_file_logger()
130
 
131
  sys_version = sys.version.replace('\n', '')
132
- flogger.debug(f"CLI version: {__version__}")
133
  flogger.debug(f"Python version: {sys_version}")
134
  flogger.debug("Modules: %s", ', '.join(sorted(p.name for p in iter_modules())))
135
 
 
2
  import os
3
  import platform
4
  import sys
 
5
  from logging import Formatter, Handler, Logger, StreamHandler, getLogger
6
  from pathlib import Path
7
  from pkgutil import iter_modules
8
 
9
+ from en_tts_app.globals import APP_VERSION
 
 
10
 
11
 
12
  class ConsoleFormatter(logging.Formatter):
 
126
  flogger = get_file_logger()
127
 
128
  sys_version = sys.version.replace('\n', '')
129
+ flogger.debug(f"CLI version: {APP_VERSION}")
130
  flogger.debug(f"Python version: {sys_version}")
131
  flogger.debug("Modules: %s", ', '.join(sorted(p.name for p in iter_modules())))
132
 
en_tts_app/main.py CHANGED
@@ -3,6 +3,7 @@ import shutil
3
  from pathlib import Path
4
  from typing import Dict, Optional
5
 
 
6
  from ordered_set import OrderedSet
7
  from pronunciation_dictionary import PronunciationDict, SerializationOptions, save_dict
8
 
@@ -10,28 +11,61 @@ from en_tts.helper import get_default_device, normalize_audio
10
  from en_tts.io import save_audio
11
  from en_tts.synthesizer import Synthesizer
12
  from en_tts.transcriber import Transcriber
13
- from en_tts_app.globals import get_conf_dir, get_work_dir, reset_log, reset_work_dir
14
  from en_tts_app.logging_configuration import get_app_logger, get_file_logger, log_sysinfo
15
 
16
  CACHE_TRANSCRIBER = "transcriber"
17
  CACHE_SYNTHESIZER = "synthesizer"
18
 
19
 
20
- def load_models_to_cache() -> Dict:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
21
  cli_logger = get_app_logger()
22
  cache = {}
23
 
24
  conf_dir = get_conf_dir()
25
- device = get_default_device()
26
 
27
  cli_logger.info("Initializing Transcriber...")
28
  cache[CACHE_TRANSCRIBER] = Transcriber(conf_dir)
29
 
 
 
 
30
  cli_logger.info("Initializing Synthesizer...")
31
- cache[CACHE_SYNTHESIZER] = Synthesizer(conf_dir, device)
32
  return cache
33
 
34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35
  def synthesize_english(text: str, cache: Dict, *, max_decoder_steps: int = 5000, sigma: float = 1.0, denoiser_strength: float = 0.0005, seed: int = 0, silence_sentences: float = 0.4, silence_paragraphs: float = 1.0, loglevel: int = 2, skip_normalization: bool = False, skip_sentence_separation: bool = False, custom_output: Optional[Path] = None) -> Path:
36
  cli_logger = get_app_logger()
37
  reset_log()
 
3
  from pathlib import Path
4
  from typing import Dict, Optional
5
 
6
+ import torch
7
  from ordered_set import OrderedSet
8
  from pronunciation_dictionary import PronunciationDict, SerializationOptions, save_dict
9
 
 
11
  from en_tts.io import save_audio
12
  from en_tts.synthesizer import Synthesizer
13
  from en_tts.transcriber import Transcriber
14
+ from en_tts_app.globals import get_conf_dir, get_log_path, get_work_dir
15
  from en_tts_app.logging_configuration import get_app_logger, get_file_logger, log_sysinfo
16
 
17
  CACHE_TRANSCRIBER = "transcriber"
18
  CACHE_SYNTHESIZER = "synthesizer"
19
 
20
 
21
+ def reset_work_dir():
22
+ root_logger = get_app_logger()
23
+ work_dir = get_work_dir()
24
+
25
+ if work_dir.is_dir():
26
+ root_logger.debug("Deleting working directory ...")
27
+ shutil.rmtree(work_dir)
28
+ root_logger.debug("Creating working directory ...")
29
+ work_dir.mkdir(parents=False, exist_ok=False)
30
+
31
+
32
+ def reset_log() -> None:
33
+ get_log_path().write_text("", "utf-8")
34
+
35
+
36
+ def load_models_to_cache(custom_device: torch.device = None) -> Dict:
37
  cli_logger = get_app_logger()
38
  cache = {}
39
 
40
  conf_dir = get_conf_dir()
 
41
 
42
  cli_logger.info("Initializing Transcriber...")
43
  cache[CACHE_TRANSCRIBER] = Transcriber(conf_dir)
44
 
45
+ if custom_device is None:
46
+ custom_device = get_default_device()
47
+
48
  cli_logger.info("Initializing Synthesizer...")
49
+ cache[CACHE_SYNTHESIZER] = Synthesizer(conf_dir, custom_device)
50
  return cache
51
 
52
 
53
+ def synthesize_ipa(text_ipa: str, cache: Dict, *, max_decoder_steps: int = 5000, sigma: float = 1.0, denoiser_strength: float = 0.0005, seed: int = 0, silence_sentences: float = 0.4, silence_paragraphs: float = 1.0, loglevel: int = 2, custom_output: Optional[Path] = None):
54
+ if loglevel >= 1:
55
+ try_log_text(text_ipa, "text")
56
+
57
+ if custom_output is None:
58
+ custom_output = get_work_dir() / "output.wav"
59
+
60
+
61
+ output_path = synthesize_ipa_core(
62
+ text_ipa, cache[CACHE_SYNTHESIZER], custom_output,
63
+ max_decoder_steps=max_decoder_steps, sigma=sigma, denoiser_strength=denoiser_strength, seed=seed, silence_sentences=silence_sentences, silence_paragraphs=silence_paragraphs, loglevel=loglevel,
64
+ )
65
+
66
+ return output_path
67
+
68
+
69
  def synthesize_english(text: str, cache: Dict, *, max_decoder_steps: int = 5000, sigma: float = 1.0, denoiser_strength: float = 0.0005, seed: int = 0, silence_sentences: float = 0.4, silence_paragraphs: float = 1.0, loglevel: int = 2, skip_normalization: bool = False, skip_sentence_separation: bool = False, custom_output: Optional[Path] = None) -> Path:
70
  cli_logger = get_app_logger()
71
  reset_log()
en_tts_gr/app.py CHANGED
@@ -10,7 +10,7 @@ from typing import Dict
10
  import gradio as gr
11
  from scipy.io.wavfile import read
12
 
13
- from en_tts_app import (get_log_path, get_work_dir, initialize_app,
14
  load_models_to_cache, run_main, synthesize_english)
15
 
16
 
@@ -19,7 +19,7 @@ def run():
19
  if exit_code > 0:
20
  sys.exit(exit_code)
21
 
22
- interface = build_interface()
23
  interface.queue()
24
 
25
  launch_method = partial(
@@ -35,7 +35,7 @@ def run():
35
  sys.exit(exit_code)
36
 
37
 
38
- def build_interface():
39
  cache = load_models_to_cache()
40
 
41
  fn = partial(synt, cache=cache)
@@ -172,10 +172,10 @@ def build_interface():
172
  "When the sunlight strikes raindrops in the air, they act as a prism and form a rainbow.",
173
  5000, 1.0, 0.0005, 0, 0.4, 1.0, False, False
174
  ],
175
- [
176
- "Please call Stella. Ask her to bring these things with her from the store: six spoons of fresh snow peas, five thick slabs of blue cheese, and maybe a snack for her brother Bob.\n\nWe also need a small plastic snake and a big toy frog for the kids. She can scoop these things into three red bags, and we will go meet her Wednesday at the train station.",
177
- 5000, 1.0, 0.0005, 0, 0.4, 1.0, False, False
178
- ],
179
  ],
180
  fn=fn,
181
  inputs=[
@@ -195,13 +195,13 @@ def build_interface():
195
  dl_btn,
196
  ],
197
  label="Examples",
198
- cache_examples=True,
199
  )
200
 
201
  with gr.Tab("Info"):
202
  with gr.Column():
203
  gr.Markdown(
204
- """
205
  ### General information
206
 
207
  - Speaker: Linda Johnson
@@ -228,7 +228,7 @@ def build_interface():
228
 
229
  ### Citation
230
 
231
- Taubert, S. (2024). en-tts (Version 0.0.1) [Computer software]. https://doi.org/10.5281/zenodo.10479347
232
 
233
  ### Acknowledgments
234
 
@@ -240,12 +240,13 @@ def build_interface():
240
 
241
  ### App information
242
 
243
- - Version: 0.0.1
244
  - License: [MIT](https://github.com/stefantaubert/en-tts?tab=MIT-1-ov-file#readme)
245
  - GitHub: [stefantaubert/en-tts](https://github.com/stefantaubert/en-tts)
246
  """
247
  )
248
 
 
249
  synt_btn.click(
250
  fn=fn,
251
  inputs=[
 
10
  import gradio as gr
11
  from scipy.io.wavfile import read
12
 
13
+ from en_tts_app import (APP_VERSION, get_log_path, get_work_dir, initialize_app,
14
  load_models_to_cache, run_main, synthesize_english)
15
 
16
 
 
19
  if exit_code > 0:
20
  sys.exit(exit_code)
21
 
22
+ interface = build_interface(cache_examples=False)
23
  interface.queue()
24
 
25
  launch_method = partial(
 
35
  sys.exit(exit_code)
36
 
37
 
38
+ def build_interface(cache_examples: bool = False):
39
  cache = load_models_to_cache()
40
 
41
  fn = partial(synt, cache=cache)
 
172
  "When the sunlight strikes raindrops in the air, they act as a prism and form a rainbow.",
173
  5000, 1.0, 0.0005, 0, 0.4, 1.0, False, False
174
  ],
175
+ # [
176
+ # "Please call Stella. Ask her to bring these things with her from the store: six spoons of fresh snow peas, five thick slabs of blue cheese, and maybe a snack for her brother Bob.\n\nWe also need a small plastic snake and a big toy frog for the kids. She can scoop these things into three red bags, and we will go meet her Wednesday at the train station.",
177
+ # 5000, 1.0, 0.0005, 0, 0.4, 1.0, False, False
178
+ # ],
179
  ],
180
  fn=fn,
181
  inputs=[
 
195
  dl_btn,
196
  ],
197
  label="Examples",
198
+ cache_examples=cache_examples,
199
  )
200
 
201
  with gr.Tab("Info"):
202
  with gr.Column():
203
  gr.Markdown(
204
+ f"""
205
  ### General information
206
 
207
  - Speaker: Linda Johnson
 
228
 
229
  ### Citation
230
 
231
+ Taubert, S. (2024). en-tts (Version {APP_VERSION}) [Computer software]. https://doi.org/10.5281/zenodo.11032264
232
 
233
  ### Acknowledgments
234
 
 
240
 
241
  ### App information
242
 
243
+ - Version: {APP_VERSION}
244
  - License: [MIT](https://github.com/stefantaubert/en-tts?tab=MIT-1-ov-file#readme)
245
  - GitHub: [stefantaubert/en-tts](https://github.com/stefantaubert/en-tts)
246
  """
247
  )
248
 
249
+ # pylint: disable=E1101:no-member
250
  synt_btn.click(
251
  fn=fn,
252
  inputs=[