mrfakename
commited on
Commit
·
675a486
1
Parent(s):
d430de8
Switch phonemizer
Browse files- ljspeechimportable.py +5 -6
- styletts2importable.py +8 -8
ljspeechimportable.py
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
from cached_path import cached_path
|
2 |
-
from dp.phonemizer import Phonemizer
|
3 |
|
4 |
|
5 |
import torch
|
@@ -68,10 +67,10 @@ def compute_style(ref_dicts):
|
|
68 |
return reference_embeddings
|
69 |
|
70 |
# load phonemizer
|
71 |
-
|
72 |
-
|
73 |
|
74 |
-
phonemizer = Phonemizer.from_checkpoint(str(cached_path('https://public-asai-dl-models.s3.eu-central-1.amazonaws.com/DeepPhonemizer/en_us_cmudict_ipa_forward.pt')))
|
75 |
|
76 |
|
77 |
config = yaml.safe_load(open(str(cached_path('hf://yl4579/StyleTTS2-LJSpeech/Models/LJSpeech/config.yml'))))
|
@@ -128,7 +127,7 @@ sampler = DiffusionSampler(
|
|
128 |
def inference(text, noise, diffusion_steps=5, embedding_scale=1):
|
129 |
text = text.strip()
|
130 |
text = text.replace('"', '')
|
131 |
-
ps =
|
132 |
ps = word_tokenize(ps[0])
|
133 |
ps = ' '.join(ps)
|
134 |
|
@@ -177,7 +176,7 @@ def inference(text, noise, diffusion_steps=5, embedding_scale=1):
|
|
177 |
def LFinference(text, s_prev, noise, alpha=0.7, diffusion_steps=5, embedding_scale=1):
|
178 |
text = text.strip()
|
179 |
text = text.replace('"', '')
|
180 |
-
ps =
|
181 |
ps = word_tokenize(ps[0])
|
182 |
ps = ' '.join(ps)
|
183 |
|
|
|
1 |
from cached_path import cached_path
|
|
|
2 |
|
3 |
|
4 |
import torch
|
|
|
67 |
return reference_embeddings
|
68 |
|
69 |
# load phonemizer
|
70 |
+
import phonemizer
|
71 |
+
global_phonemizer = phonemizer.backend.EspeakBackend(language='en-us', preserve_punctuation=True, with_stress=True, words_mismatch='ignore')
|
72 |
|
73 |
+
# phonemizer = Phonemizer.from_checkpoint(str(cached_path('https://public-asai-dl-models.s3.eu-central-1.amazonaws.com/DeepPhonemizer/en_us_cmudict_ipa_forward.pt')))
|
74 |
|
75 |
|
76 |
config = yaml.safe_load(open(str(cached_path('hf://yl4579/StyleTTS2-LJSpeech/Models/LJSpeech/config.yml'))))
|
|
|
127 |
def inference(text, noise, diffusion_steps=5, embedding_scale=1):
|
128 |
text = text.strip()
|
129 |
text = text.replace('"', '')
|
130 |
+
ps = global_phonemizer.phonemize([text])
|
131 |
ps = word_tokenize(ps[0])
|
132 |
ps = ' '.join(ps)
|
133 |
|
|
|
176 |
def LFinference(text, s_prev, noise, alpha=0.7, diffusion_steps=5, embedding_scale=1):
|
177 |
text = text.strip()
|
178 |
text = text.replace('"', '')
|
179 |
+
ps = global_phonemizer.phonemize([text])
|
180 |
ps = word_tokenize(ps[0])
|
181 |
ps = ' '.join(ps)
|
182 |
|
styletts2importable.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
from cached_path import cached_path
|
2 |
|
3 |
-
from dp.phonemizer import Phonemizer
|
4 |
print("NLTK")
|
5 |
import nltk
|
6 |
nltk.download('punkt')
|
@@ -73,9 +73,9 @@ elif torch.backends.mps.is_available():
|
|
73 |
print("MPS would be available but cannot be used rn")
|
74 |
# device = 'mps'
|
75 |
|
76 |
-
|
77 |
-
|
78 |
-
phonemizer = Phonemizer.from_checkpoint(str(cached_path('https://public-asai-dl-models.s3.eu-central-1.amazonaws.com/DeepPhonemizer/en_us_cmudict_ipa_forward.pt')))
|
79 |
|
80 |
|
81 |
# config = yaml.safe_load(open("Models/LibriTTS/config.yml"))
|
@@ -133,7 +133,7 @@ sampler = DiffusionSampler(
|
|
133 |
|
134 |
def inference(text, ref_s, alpha = 0.3, beta = 0.7, diffusion_steps=5, embedding_scale=1):
|
135 |
text = text.strip()
|
136 |
-
ps =
|
137 |
ps = word_tokenize(ps[0])
|
138 |
ps = ' '.join(ps)
|
139 |
tokens = textclenaer(ps)
|
@@ -202,7 +202,7 @@ def inference(text, ref_s, alpha = 0.3, beta = 0.7, diffusion_steps=5, embedding
|
|
202 |
|
203 |
def LFinference(text, s_prev, ref_s, alpha = 0.3, beta = 0.7, t = 0.7, diffusion_steps=5, embedding_scale=1):
|
204 |
text = text.strip()
|
205 |
-
ps =
|
206 |
ps = word_tokenize(ps[0])
|
207 |
ps = ' '.join(ps)
|
208 |
ps = ps.replace('``', '"')
|
@@ -279,7 +279,7 @@ def LFinference(text, s_prev, ref_s, alpha = 0.3, beta = 0.7, t = 0.7, diffusion
|
|
279 |
|
280 |
def STinference(text, ref_s, ref_text, alpha = 0.3, beta = 0.7, diffusion_steps=5, embedding_scale=1):
|
281 |
text = text.strip()
|
282 |
-
ps =
|
283 |
ps = word_tokenize(ps[0])
|
284 |
ps = ' '.join(ps)
|
285 |
|
@@ -288,7 +288,7 @@ def STinference(text, ref_s, ref_text, alpha = 0.3, beta = 0.7, diffusion_steps=
|
|
288 |
tokens = torch.LongTensor(tokens).to(device).unsqueeze(0)
|
289 |
|
290 |
ref_text = ref_text.strip()
|
291 |
-
ps =
|
292 |
ps = word_tokenize(ps[0])
|
293 |
ps = ' '.join(ps)
|
294 |
|
|
|
1 |
from cached_path import cached_path
|
2 |
|
3 |
+
# from dp.phonemizer import Phonemizer
|
4 |
print("NLTK")
|
5 |
import nltk
|
6 |
nltk.download('punkt')
|
|
|
73 |
print("MPS would be available but cannot be used rn")
|
74 |
# device = 'mps'
|
75 |
|
76 |
+
import phonemizer
|
77 |
+
global_phonemizer = phonemizer.backend.EspeakBackend(language='en-us', preserve_punctuation=True, with_stress=True)
|
78 |
+
# phonemizer = Phonemizer.from_checkpoint(str(cached_path('https://public-asai-dl-models.s3.eu-central-1.amazonaws.com/DeepPhonemizer/en_us_cmudict_ipa_forward.pt')))
|
79 |
|
80 |
|
81 |
# config = yaml.safe_load(open("Models/LibriTTS/config.yml"))
|
|
|
133 |
|
134 |
def inference(text, ref_s, alpha = 0.3, beta = 0.7, diffusion_steps=5, embedding_scale=1):
|
135 |
text = text.strip()
|
136 |
+
ps = global_phonemizer.phonemize([text])
|
137 |
ps = word_tokenize(ps[0])
|
138 |
ps = ' '.join(ps)
|
139 |
tokens = textclenaer(ps)
|
|
|
202 |
|
203 |
def LFinference(text, s_prev, ref_s, alpha = 0.3, beta = 0.7, t = 0.7, diffusion_steps=5, embedding_scale=1):
|
204 |
text = text.strip()
|
205 |
+
ps = global_phonemizer.phonemize([text])
|
206 |
ps = word_tokenize(ps[0])
|
207 |
ps = ' '.join(ps)
|
208 |
ps = ps.replace('``', '"')
|
|
|
279 |
|
280 |
def STinference(text, ref_s, ref_text, alpha = 0.3, beta = 0.7, diffusion_steps=5, embedding_scale=1):
|
281 |
text = text.strip()
|
282 |
+
ps = global_phonemizer.phonemize([text])
|
283 |
ps = word_tokenize(ps[0])
|
284 |
ps = ' '.join(ps)
|
285 |
|
|
|
288 |
tokens = torch.LongTensor(tokens).to(device).unsqueeze(0)
|
289 |
|
290 |
ref_text = ref_text.strip()
|
291 |
+
ps = global_phonemizer.phonemize([ref_text])
|
292 |
ps = word_tokenize(ps[0])
|
293 |
ps = ' '.join(ps)
|
294 |
|