mrfakename commited on
Commit
4b8ade9
1 Parent(s): 6c306f4

Revert "Allow acronym expansion"

Browse files

This reverts commit 6621613498a6b6109c28ff85e028b1a8ab6824a5.

Files changed (2) hide show
  1. app.py +13 -31
  2. requirements.txt +1 -3
app.py CHANGED
@@ -6,20 +6,6 @@ import os
6
  # from tortoise.utils.text import split_and_recombine_text
7
  import numpy as np
8
  import pickle
9
- import spacy
10
-
11
- from scispacy.abbreviation import AbbreviationDetector
12
- nlp = spacy.load("en_core_sci_sm")
13
-
14
- # Add the abbreviation pipe to the spacy pipeline.
15
- nlp.add_pipe("abbreviation_detector")
16
- def replace_acronyms(text):
17
- doc = nlp(text)
18
- altered_tok = [tok.text for tok in doc]
19
- for abrv in doc._.abbreviations:
20
- altered_tok[abrv.start] = str(abrv._.long_form)
21
-
22
- return(" ".join(altered_tok))
23
  theme = gr.themes.Base(
24
  font=[gr.themes.GoogleFont('Libre Franklin'), gr.themes.GoogleFont('Public Sans'), 'system-ui', 'sans-serif'],
25
  )
@@ -34,14 +20,12 @@ global_phonemizer = phonemizer.backend.EspeakBackend(language='en-us', preserve_
34
  # else:
35
  for v in voicelist:
36
  voices[v] = styletts2importable.compute_style(f'voices/{v}.wav')
37
- def synthesize(text, voice, multispeakersteps, msexpand):
38
  if text.strip() == "":
39
  raise gr.Error("You must enter some text")
40
  # if len(global_phonemizer.phonemize([text])) > 300:
41
  if len(text) > 300:
42
  raise gr.Error("Text must be under 300 characters")
43
- if msexpand:
44
- text = replace_acronyms(text)
45
  v = voice.lower()
46
  # return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=7, embedding_scale=1))
47
  return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=multispeakersteps, embedding_scale=1))
@@ -69,14 +53,14 @@ def clsynthesize(text, voice, vcsteps):
69
  raise gr.Error("Text must be under 400 characters")
70
  # return (24000, styletts2importable.inference(text, styletts2importable.compute_style(voice), alpha=0.3, beta=0.7, diffusion_steps=20, embedding_scale=1))
71
  return (24000, styletts2importable.inference(text, styletts2importable.compute_style(voice), alpha=0.3, beta=0.7, diffusion_steps=vcsteps, embedding_scale=1))
72
- def ljsynthesize(text, steps):
73
  if text.strip() == "":
74
  raise gr.Error("You must enter some text")
75
  # if global_phonemizer.phonemize([text]) > 300:
76
  if len(text) > 400:
77
  raise gr.Error("Text must be under 400 characters")
78
  noise = torch.randn(1,1,256).to('cuda' if torch.cuda.is_available() else 'cpu')
79
- return (24000, ljspeechimportable.inference(text, noise, diffusion_steps=steps, embedding_scale=1))
80
 
81
 
82
  with gr.Blocks() as vctk: # just realized it isn't vctk but libritts but i'm too lazy to change it rn
@@ -85,12 +69,11 @@ with gr.Blocks() as vctk: # just realized it isn't vctk but libritts but i'm too
85
  inp = gr.Textbox(label="Text", info="What would you like StyleTTS 2 to read? It works better on full sentences.", interactive=True)
86
  voice = gr.Dropdown(voicelist, label="Voice", info="Select a default voice.", value='m-us-2', interactive=True)
87
  multispeakersteps = gr.Slider(minimum=5, maximum=15, value=7, step=1, label="Diffusion Steps", info="Higher = better quality, but slower", interactive=True)
88
- msexpand = gr.Checkbox(label="Expand acronyms", info="Expand acronyms using SciSpacy algorithm")
89
  # use_gruut = gr.Checkbox(label="Use alternate phonemizer (Gruut) - Experimental")
90
  with gr.Column(scale=1):
91
  btn = gr.Button("Synthesize", variant="primary")
92
  audio = gr.Audio(interactive=False, label="Synthesized Audio")
93
- btn.click(synthesize, inputs=[inp, voice, multispeakersteps, msexpand], outputs=[audio], concurrency_limit=4)
94
  with gr.Blocks() as clone:
95
  with gr.Row():
96
  with gr.Column(scale=1):
@@ -100,16 +83,7 @@ with gr.Blocks() as clone:
100
  with gr.Column(scale=1):
101
  clbtn = gr.Button("Synthesize", variant="primary")
102
  claudio = gr.Audio(interactive=False, label="Synthesized Audio")
103
- clbtn.click(clsynthesize, inputs=[clinp, clvoice, vcsteps], outputs=[claudio], concurrency_limit=2)
104
- with gr.Blocks() as lj:
105
- with gr.Row():
106
- with gr.Column(scale=1):
107
- ljinp = gr.Textbox(label="Text", info="What would you like StyleTTS 2 to read? It works better on full sentences.", interactive=True)
108
- with gr.Column(scale=1):
109
- ljbtn = gr.Button("Synthesize", variant="primary")
110
- ljaudio = gr.Audio(interactive=False, label="Synthesized Audio")
111
- ljsteps = gr.Slider(minimum=5, maximum=15, value=7, step=1, label="Diffusion Steps", info="Higher = better quality, but slower", interactive=True)
112
- ljbtn.click(ljsynthesize, inputs=[ljinp, ljsteps], outputs=[ljaudio], concurrency_limit=4)
113
  # with gr.Blocks() as longText:
114
  # with gr.Row():
115
  # with gr.Column(scale=1):
@@ -121,6 +95,14 @@ with gr.Blocks() as lj:
121
  # lngbtn = gr.Button("Synthesize", variant="primary")
122
  # lngaudio = gr.Audio(interactive=False, label="Synthesized Audio")
123
  # lngbtn.click(longsynthesize, inputs=[lnginp, lngvoice, lngsteps, lngpwd], outputs=[lngaudio], concurrency_limit=4)
 
 
 
 
 
 
 
 
124
  with gr.Blocks(title="StyleTTS 2", css="footer{display:none !important}", theme=theme) as demo:
125
  gr.Markdown("""# StyleTTS 2
126
 
 
6
  # from tortoise.utils.text import split_and_recombine_text
7
  import numpy as np
8
  import pickle
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  theme = gr.themes.Base(
10
  font=[gr.themes.GoogleFont('Libre Franklin'), gr.themes.GoogleFont('Public Sans'), 'system-ui', 'sans-serif'],
11
  )
 
20
  # else:
21
  for v in voicelist:
22
  voices[v] = styletts2importable.compute_style(f'voices/{v}.wav')
23
+ def synthesize(text, voice, multispeakersteps):
24
  if text.strip() == "":
25
  raise gr.Error("You must enter some text")
26
  # if len(global_phonemizer.phonemize([text])) > 300:
27
  if len(text) > 300:
28
  raise gr.Error("Text must be under 300 characters")
 
 
29
  v = voice.lower()
30
  # return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=7, embedding_scale=1))
31
  return (24000, styletts2importable.inference(text, voices[v], alpha=0.3, beta=0.7, diffusion_steps=multispeakersteps, embedding_scale=1))
 
53
  raise gr.Error("Text must be under 400 characters")
54
  # return (24000, styletts2importable.inference(text, styletts2importable.compute_style(voice), alpha=0.3, beta=0.7, diffusion_steps=20, embedding_scale=1))
55
  return (24000, styletts2importable.inference(text, styletts2importable.compute_style(voice), alpha=0.3, beta=0.7, diffusion_steps=vcsteps, embedding_scale=1))
56
+ def ljsynthesize(text):
57
  if text.strip() == "":
58
  raise gr.Error("You must enter some text")
59
  # if global_phonemizer.phonemize([text]) > 300:
60
  if len(text) > 400:
61
  raise gr.Error("Text must be under 400 characters")
62
  noise = torch.randn(1,1,256).to('cuda' if torch.cuda.is_available() else 'cpu')
63
+ return (24000, ljspeechimportable.inference(text, noise, diffusion_steps=7, embedding_scale=1))
64
 
65
 
66
  with gr.Blocks() as vctk: # just realized it isn't vctk but libritts but i'm too lazy to change it rn
 
69
  inp = gr.Textbox(label="Text", info="What would you like StyleTTS 2 to read? It works better on full sentences.", interactive=True)
70
  voice = gr.Dropdown(voicelist, label="Voice", info="Select a default voice.", value='m-us-2', interactive=True)
71
  multispeakersteps = gr.Slider(minimum=5, maximum=15, value=7, step=1, label="Diffusion Steps", info="Higher = better quality, but slower", interactive=True)
 
72
  # use_gruut = gr.Checkbox(label="Use alternate phonemizer (Gruut) - Experimental")
73
  with gr.Column(scale=1):
74
  btn = gr.Button("Synthesize", variant="primary")
75
  audio = gr.Audio(interactive=False, label="Synthesized Audio")
76
+ btn.click(synthesize, inputs=[inp, voice, multispeakersteps], outputs=[audio], concurrency_limit=4)
77
  with gr.Blocks() as clone:
78
  with gr.Row():
79
  with gr.Column(scale=1):
 
83
  with gr.Column(scale=1):
84
  clbtn = gr.Button("Synthesize", variant="primary")
85
  claudio = gr.Audio(interactive=False, label="Synthesized Audio")
86
+ clbtn.click(clsynthesize, inputs=[clinp, clvoice, vcsteps], outputs=[claudio], concurrency_limit=4)
 
 
 
 
 
 
 
 
 
87
  # with gr.Blocks() as longText:
88
  # with gr.Row():
89
  # with gr.Column(scale=1):
 
95
  # lngbtn = gr.Button("Synthesize", variant="primary")
96
  # lngaudio = gr.Audio(interactive=False, label="Synthesized Audio")
97
  # lngbtn.click(longsynthesize, inputs=[lnginp, lngvoice, lngsteps, lngpwd], outputs=[lngaudio], concurrency_limit=4)
98
+ with gr.Blocks() as lj:
99
+ with gr.Row():
100
+ with gr.Column(scale=1):
101
+ ljinp = gr.Textbox(label="Text", info="What would you like StyleTTS 2 to read? It works better on full sentences.", interactive=True)
102
+ with gr.Column(scale=1):
103
+ ljbtn = gr.Button("Synthesize", variant="primary")
104
+ ljaudio = gr.Audio(interactive=False, label="Synthesized Audio")
105
+ ljbtn.click(ljsynthesize, inputs=[ljinp], outputs=[ljaudio], concurrency_limit=4)
106
  with gr.Blocks(title="StyleTTS 2", css="footer{display:none !important}", theme=theme) as demo:
107
  gr.Markdown("""# StyleTTS 2
108
 
requirements.txt CHANGED
@@ -20,6 +20,4 @@ phonemizer
20
  cached-path
21
  gradio
22
  gruut
23
- # tortoise-tts
24
- spacy
25
- scispacy
 
20
  cached-path
21
  gradio
22
  gruut
23
+ # tortoise-tts