Spaces:
Running
Running
publish API
Browse files- app.py +48 -13
- assets/lojban/lojban.py +0 -352
- dev.sh +2 -1
- pretrained/{nix-ljspeech-sdp-v0.1 β nix-tts/nix-ljspeech-sdp-v0.1}/decoder.onnx +0 -0
- pretrained/{nix-ljspeech-sdp-v0.1 β nix-tts/nix-ljspeech-sdp-v0.1}/encoder.onnx +0 -0
- pretrained/{nix-ljspeech-sdp-v0.1 β nix-tts/nix-ljspeech-sdp-v0.1}/tokenizer_state.pkl +0 -0
- pretrained/{nix-ljspeech-v0.1 β nix-tts/nix-ljspeech-v0.1}/decoder.onnx +0 -0
- pretrained/{nix-ljspeech-v0.1 β nix-tts/nix-ljspeech-v0.1}/encoder.onnx +0 -0
- pretrained/{nix-ljspeech-v0.1 β nix-tts/nix-ljspeech-v0.1}/tokenizer_state.pkl +0 -0
- pretrained/{pretrained_ljs.pth β vits/pretrained_ljs.pth} +0 -0
- pretrained/{pretrained_vctk.pth β vits/pretrained_vctk.pth} +0 -0
- vits/utils.py +0 -2
app.py
CHANGED
@@ -118,7 +118,7 @@ def load_checkpoints():
|
|
118 |
**hps.model)
|
119 |
_ = model.eval()
|
120 |
|
121 |
-
_ = utils.load_checkpoint(current + "/pretrained/pretrained_ljs.pth", model, None)
|
122 |
|
123 |
hps_vctk = utils.get_hparams_from_file(current + "/vits/configs/vctk_base.json")
|
124 |
net_g_vctk = SynthesizerTrn(
|
@@ -129,11 +129,47 @@ def load_checkpoints():
|
|
129 |
**hps_vctk.model)
|
130 |
_ = model.eval()
|
131 |
|
132 |
-
_ = utils.load_checkpoint(current + "/pretrained/pretrained_vctk.pth", net_g_vctk, None)
|
133 |
|
134 |
return model, hps, net_g_vctk, hps_vctk
|
135 |
|
136 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
def inference(text, language, noise_scale, noise_scale_w, length_scale, voice):
|
138 |
if len(text.strip())==0:
|
139 |
return []
|
@@ -141,9 +177,9 @@ def inference(text, language, noise_scale, noise_scale_w, length_scale, voice):
|
|
141 |
language = language_id_lookup[language] if bool(
|
142 |
language_id_lookup[language]) else "jbo"
|
143 |
if voice == 'Nix-Deterministic' and language == 'jbo':
|
144 |
-
return generate_voice(lojban2ipa(text,'nix'), current+"/pretrained/nix-ljspeech-v0.1")
|
145 |
elif voice == 'Nix-Stochastic' and language == 'jbo':
|
146 |
-
return generate_voice(lojban2ipa(text,'nix'), current+"/pretrained/nix-ljspeech-sdp-v0.1")
|
147 |
elif voice == 'LJS':
|
148 |
ipa_text, stn_tst = get_text(text, language, hps, mode="VITS")
|
149 |
with torch.no_grad():
|
@@ -151,7 +187,7 @@ def inference(text, language, noise_scale, noise_scale_w, length_scale, voice):
|
|
151 |
x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
|
152 |
audio = model.infer(x_tst, x_tst_lengths, noise_scale=noise_scale,
|
153 |
noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0, 0].data.float().numpy()
|
154 |
-
return [ipa_text, (hps.data.sampling_rate, audio)]
|
155 |
else:
|
156 |
ipa_text, stn_tst = get_text(text, language, hps_vctk, mode="VITS")
|
157 |
with torch.no_grad():
|
@@ -160,14 +196,15 @@ def inference(text, language, noise_scale, noise_scale_w, length_scale, voice):
|
|
160 |
sid = torch.LongTensor([voice])
|
161 |
audio = model_vctk.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=noise_scale,
|
162 |
noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0, 0].data.cpu().float().numpy()
|
163 |
-
return [ipa_text, (hps_vctk.data.sampling_rate, audio)]
|
164 |
|
165 |
|
166 |
# download_pretrained()
|
167 |
model, hps, model_vctk, hps_vctk = load_checkpoints()
|
168 |
|
169 |
defaults = {
|
170 |
-
"text": "
|
|
|
171 |
"noise_scale": .667,
|
172 |
"noise_scale_w": .8,
|
173 |
"speed": 1.8,
|
@@ -199,12 +236,12 @@ with gr.Blocks(css=css) as demo:
|
|
199 |
article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2106.06103'>Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech</a> | <a href='https://github.com/jaywalnut310/vits'>Github Repo</a></p>"
|
200 |
with gr.Row():
|
201 |
with gr.Column():
|
202 |
-
input_text = gr.Textbox(lines=4, label="Input text", placeholder="add your text, or click one of the examples to load them")
|
203 |
langs = gr.Radio([
|
204 |
'Lojban',
|
205 |
'English',
|
206 |
'Transcription',
|
207 |
-
], value=defaults["
|
208 |
voices = gr.Radio(["LJS", 0, 1, 2, 3, 4, "Nix-Deterministic", "Nix-Stochastic"], value=defaults["voice"], label="Voice")
|
209 |
noise_scale = gr.Slider(label="Noise scale", minimum=0, maximum=2,
|
210 |
step=0.1, value=defaults["noise_scale"])
|
@@ -225,18 +262,16 @@ with gr.Blocks(css=css) as demo:
|
|
225 |
audio = gr.Audio(type="numpy", label="Output audio")
|
226 |
outputs = [ ipa_block, audio ]
|
227 |
btn = gr.Button("Vocalize")
|
228 |
-
btn.click(fn=inference, inputs=inputs, outputs=outputs)
|
229 |
|
230 |
examples = list(map(lambda el: el[0:len(el)] + defaults["example"][len(el):], [
|
231 |
["coi ro do ma nuzba", "Lojban"],
|
232 |
["mi djica lo nu do zvati ti", "Lojban", 0.667, 0.8, 1.8,4],
|
233 |
["mu xagji sofybakni cu zvati le purdi", "Lojban", 0.667, 0.8, 1.8, "Nix-Deterministic"],
|
234 |
["ni'o le pa tirxu be me'e zo .teris. pu ki kansa le za'u pendo be le nei le ka xabju le foldi be loi spati", "Lojban"],
|
235 |
-
[",
|
236 |
["We propose VITS, Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech.", "English"],
|
237 |
]))
|
238 |
gr.Examples(examples, inputs, fn=inference, outputs=outputs, cache_examples=True, run_on_click=True)
|
239 |
|
240 |
demo.launch(server_name="0.0.0.0")
|
241 |
-
# gr.Interface(fn=inference, inputs=inputs, outputs=outputs, title=title,
|
242 |
-
# description=description, article=article, examples=examples).launch(server_name="0.0.0.0")
|
|
|
118 |
**hps.model)
|
119 |
_ = model.eval()
|
120 |
|
121 |
+
_ = utils.load_checkpoint(current + "/pretrained/vits/pretrained_ljs.pth", model, None)
|
122 |
|
123 |
hps_vctk = utils.get_hparams_from_file(current + "/vits/configs/vctk_base.json")
|
124 |
net_g_vctk = SynthesizerTrn(
|
|
|
129 |
**hps_vctk.model)
|
130 |
_ = model.eval()
|
131 |
|
132 |
+
_ = utils.load_checkpoint(current + "/pretrained/vits/pretrained_vctk.pth", net_g_vctk, None)
|
133 |
|
134 |
return model, hps, net_g_vctk, hps_vctk
|
135 |
|
136 |
|
137 |
+
def float2pcm(sig, dtype='int16'):
|
138 |
+
"""Convert floating point signal with a range from -1 to 1 to PCM.
|
139 |
+
Any signal values outside the interval [-1.0, 1.0) are clipped.
|
140 |
+
No dithering is used.
|
141 |
+
Note that there are different possibilities for scaling floating
|
142 |
+
point numbers to PCM numbers, this function implements just one of
|
143 |
+
them. For an overview of alternatives see
|
144 |
+
http://blog.bjornroche.com/2009/12/int-float-int-its-jungle-out-there.html
|
145 |
+
Parameters
|
146 |
+
----------
|
147 |
+
sig : array_like
|
148 |
+
Input array, must have floating point type.
|
149 |
+
dtype : data type, optional
|
150 |
+
Desired (integer) data type.
|
151 |
+
Returns
|
152 |
+
-------
|
153 |
+
numpy.ndarray
|
154 |
+
Integer data, scaled and clipped to the range of the given
|
155 |
+
*dtype*.
|
156 |
+
See Also
|
157 |
+
--------
|
158 |
+
pcm2float, dtype
|
159 |
+
"""
|
160 |
+
sig = np.asarray(sig)
|
161 |
+
if sig.dtype.kind != 'f':
|
162 |
+
raise TypeError("'sig' must be a float array")
|
163 |
+
dtype = np.dtype(dtype)
|
164 |
+
if dtype.kind not in 'iu':
|
165 |
+
raise TypeError("'dtype' must be an integer type")
|
166 |
+
|
167 |
+
i = np.iinfo(dtype)
|
168 |
+
abs_max = 2 ** (i.bits - 1)
|
169 |
+
offset = i.min + abs_max
|
170 |
+
return (sig * abs_max + offset).clip(i.min, i.max).astype(dtype)
|
171 |
+
|
172 |
+
|
173 |
def inference(text, language, noise_scale, noise_scale_w, length_scale, voice):
|
174 |
if len(text.strip())==0:
|
175 |
return []
|
|
|
177 |
language = language_id_lookup[language] if bool(
|
178 |
language_id_lookup[language]) else "jbo"
|
179 |
if voice == 'Nix-Deterministic' and language == 'jbo':
|
180 |
+
return generate_voice(lojban2ipa(text,'nix'), current+"/pretrained/nix-tts/nix-ljspeech-v0.1")
|
181 |
elif voice == 'Nix-Stochastic' and language == 'jbo':
|
182 |
+
return generate_voice(lojban2ipa(text,'nix'), current+"/pretrained/nix-tts/nix-ljspeech-sdp-v0.1")
|
183 |
elif voice == 'LJS':
|
184 |
ipa_text, stn_tst = get_text(text, language, hps, mode="VITS")
|
185 |
with torch.no_grad():
|
|
|
187 |
x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
|
188 |
audio = model.infer(x_tst, x_tst_lengths, noise_scale=noise_scale,
|
189 |
noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0, 0].data.float().numpy()
|
190 |
+
return [ipa_text, (hps.data.sampling_rate, float2pcm(audio))]
|
191 |
else:
|
192 |
ipa_text, stn_tst = get_text(text, language, hps_vctk, mode="VITS")
|
193 |
with torch.no_grad():
|
|
|
196 |
sid = torch.LongTensor([voice])
|
197 |
audio = model_vctk.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=noise_scale,
|
198 |
noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0, 0].data.cpu().float().numpy()
|
199 |
+
return [ipa_text, (hps_vctk.data.sampling_rate, float2pcm(audio))]
|
200 |
|
201 |
|
202 |
# download_pretrained()
|
203 |
model, hps, model_vctk, hps_vctk = load_checkpoints()
|
204 |
|
205 |
defaults = {
|
206 |
+
"text": "coi munje",
|
207 |
+
"language": "Lojban",
|
208 |
"noise_scale": .667,
|
209 |
"noise_scale_w": .8,
|
210 |
"speed": 1.8,
|
|
|
236 |
article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2106.06103'>Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech</a> | <a href='https://github.com/jaywalnut310/vits'>Github Repo</a></p>"
|
237 |
with gr.Row():
|
238 |
with gr.Column():
|
239 |
+
input_text = gr.Textbox(lines=4, value=defaults["text"], label="Input text", placeholder="add your text, or click one of the examples to load them")
|
240 |
langs = gr.Radio([
|
241 |
'Lojban',
|
242 |
'English',
|
243 |
'Transcription',
|
244 |
+
], value=defaults["language"], label="Language")
|
245 |
voices = gr.Radio(["LJS", 0, 1, 2, 3, 4, "Nix-Deterministic", "Nix-Stochastic"], value=defaults["voice"], label="Voice")
|
246 |
noise_scale = gr.Slider(label="Noise scale", minimum=0, maximum=2,
|
247 |
step=0.1, value=defaults["noise_scale"])
|
|
|
262 |
audio = gr.Audio(type="numpy", label="Output audio")
|
263 |
outputs = [ ipa_block, audio ]
|
264 |
btn = gr.Button("Vocalize")
|
265 |
+
btn.click(fn=inference, inputs=inputs, outputs=outputs, api_name="cupra")
|
266 |
|
267 |
examples = list(map(lambda el: el[0:len(el)] + defaults["example"][len(el):], [
|
268 |
["coi ro do ma nuzba", "Lojban"],
|
269 |
["mi djica lo nu do zvati ti", "Lojban", 0.667, 0.8, 1.8,4],
|
270 |
["mu xagji sofybakni cu zvati le purdi", "Lojban", 0.667, 0.8, 1.8, "Nix-Deterministic"],
|
271 |
["ni'o le pa tirxu be me'e zo .teris. pu ki kansa le za'u pendo be le nei le ka xabju le foldi be loi spati", "Lojban"],
|
272 |
+
[", miΛ dΚΛiΛΚaΛ loΛnΚuΛ doΛ zvΛaΛtiΛ tiΛ.", "Transcription"],
|
273 |
["We propose VITS, Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech.", "English"],
|
274 |
]))
|
275 |
gr.Examples(examples, inputs, fn=inference, outputs=outputs, cache_examples=True, run_on_click=True)
|
276 |
|
277 |
demo.launch(server_name="0.0.0.0")
|
|
|
|
assets/lojban/lojban.py
DELETED
@@ -1,352 +0,0 @@
|
|
1 |
-
from __future__ import annotations
|
2 |
-
import sys
|
3 |
-
import os
|
4 |
-
|
5 |
-
from re import sub, compile
|
6 |
-
from itertools import islice
|
7 |
-
|
8 |
-
def krulermorna(text: str) -> str:
|
9 |
-
text = sub(r"\.", "", text)
|
10 |
-
text = sub(r"^", ".", text)
|
11 |
-
text = sub(r"u([aeiouy])", r"w\1", text)
|
12 |
-
text = sub(r"i([aeiouy])", r"Ι©\1", text)
|
13 |
-
text = sub(r"au", "αΈ", text)
|
14 |
-
text = sub(r"ai", "Δ
", text)
|
15 |
-
text = sub(r"ei", "Δ", text)
|
16 |
-
text = sub(r"oi", "Η«", text)
|
17 |
-
text = sub(r"\.", "", text)
|
18 |
-
return text
|
19 |
-
|
20 |
-
def krulermornaize(words: list[str]) -> list[str]:
|
21 |
-
return [krulermorna(word) for word in words]
|
22 |
-
|
23 |
-
ipa_vits = {
|
24 |
-
"a$": 'aΛ',
|
25 |
-
"a": 'aΛ',
|
26 |
-
# "e(?=v)": 'ΙΛΚ',
|
27 |
-
# "e$": 'ΙΛΚ',
|
28 |
-
"e": 'ΙΛ',
|
29 |
-
"i": 'iΛ',
|
30 |
-
"o": 'oΛ',
|
31 |
-
"u": 'Κu',
|
32 |
-
# "u": 'ΚΛ',
|
33 |
-
"y": 'ΙΛ',
|
34 |
-
"Δ
": 'aΙͺ',
|
35 |
-
"Δ": 'ΙΙͺ',
|
36 |
-
# "Δ(?=\b)(?!')": 'ΙΙͺΚ',
|
37 |
-
"Η«": 'ΙΙͺ',
|
38 |
-
"αΈ": 'aΚ',
|
39 |
-
"Ι©a": 'jaΛ',
|
40 |
-
"Ι©e": 'jΙΛ',
|
41 |
-
"Ι©i": 'jiΛ',
|
42 |
-
"Ι©o": 'jΙΛ',
|
43 |
-
"Ι©u": 'juΛ',
|
44 |
-
"Ι©y": 'jΙΛ',
|
45 |
-
"Ι©": 'j',
|
46 |
-
"wa": 'waΛ',
|
47 |
-
"we": 'wΙΛ',
|
48 |
-
"wi": 'wiΛ',
|
49 |
-
"wo": 'wΙΛ',
|
50 |
-
"wu": 'wuΛ',
|
51 |
-
"wy": 'wΙΛ',
|
52 |
-
"w": 'w',
|
53 |
-
"c": 'Κ',
|
54 |
-
# "bj": 'bΚ',
|
55 |
-
"j": 'Κ',
|
56 |
-
"s": 's',
|
57 |
-
"z": 'z',
|
58 |
-
"f": 'f',
|
59 |
-
"v": 'v',
|
60 |
-
"x": 'hhh',
|
61 |
-
"'": 'h',
|
62 |
-
# "dj":'dΚ',
|
63 |
-
# "tc":'tΚ',
|
64 |
-
# "dz":'Κ£',
|
65 |
-
# "ts":'Κ¦',
|
66 |
-
'r': 'ΙΉ',
|
67 |
-
'r(?![ΛaeiouyαΈΔ
ΔΗ«])': 'ΙΉΙΉ',
|
68 |
-
# 'r(?=[ΛaeiouyαΈΔ
ΔΗ«])': 'ΙΉ',
|
69 |
-
"nΛu": 'nΛΚuΛ',
|
70 |
-
"nu": 'nΚuΛ',
|
71 |
-
"ng": 'ng',
|
72 |
-
"n": 'n',
|
73 |
-
"m": 'm',
|
74 |
-
"l": 'l',
|
75 |
-
"b": 'b',
|
76 |
-
"d": 'd',
|
77 |
-
"g": 'Ι‘',
|
78 |
-
"k": 'k',
|
79 |
-
"p": 'p',
|
80 |
-
"t": 't',
|
81 |
-
"h": 'h'
|
82 |
-
}
|
83 |
-
|
84 |
-
ipa_nix = {
|
85 |
-
"a$": 'aΛ',
|
86 |
-
"a": 'aΛ',
|
87 |
-
# "e(?=v)": 'ΙΛΚ',
|
88 |
-
# "e$": 'ΙΛΚ',
|
89 |
-
"e": 'ΙΛ',
|
90 |
-
"i": 'iΛ',
|
91 |
-
"o": 'oΛ',
|
92 |
-
"u": 'Κu',
|
93 |
-
# "u": 'ΚΛ',
|
94 |
-
"y": 'ΙΛ',
|
95 |
-
"Δ
": 'aΙͺ',
|
96 |
-
"Δ": 'ΙΙͺ',
|
97 |
-
# "Δ(?=\b)(?!')": 'ΙΙͺΚ',
|
98 |
-
"Η«": 'ΙΙͺ',
|
99 |
-
"αΈ": 'aΚ',
|
100 |
-
"Ι©a": 'jaΛ',
|
101 |
-
"Ι©e": 'jΙΛ',
|
102 |
-
"Ι©i": 'jiΛ',
|
103 |
-
"Ι©o": 'jΙΛ',
|
104 |
-
"Ι©u": 'juΛ',
|
105 |
-
"Ι©y": 'jΙΛ',
|
106 |
-
"Ι©": 'j',
|
107 |
-
"wa": 'waΛ',
|
108 |
-
"we": 'wΙΛ',
|
109 |
-
"wi": 'wiΛ',
|
110 |
-
"wo": 'wΙΛ',
|
111 |
-
"wu": 'wuΛ',
|
112 |
-
"wy": 'wΙΛ',
|
113 |
-
"w": 'w',
|
114 |
-
"c": 'Κ',
|
115 |
-
"bj": 'bΙͺΚ',
|
116 |
-
"j": 'Κ',
|
117 |
-
"s": 's',
|
118 |
-
"z": 'z',
|
119 |
-
"f": 'f',
|
120 |
-
"v": 'v',
|
121 |
-
"x": 'hh',
|
122 |
-
"'": 'h',
|
123 |
-
# "dj":'dΚ',
|
124 |
-
# "tc":'tΚ',
|
125 |
-
# "dz":'Κ£',
|
126 |
-
# "ts":'Κ¦',
|
127 |
-
'r': 'ΙΉ',
|
128 |
-
'r(?![ΛaeiouyαΈΔ
ΔΗ«])': 'ΙΉΙΉΙΉΙͺ',
|
129 |
-
# 'r(?=[ΛaeiouyαΈΔ
ΔΗ«])': 'ΙΉ',
|
130 |
-
"nΛu": 'nΛΚuΛ',
|
131 |
-
"nu": 'nΚuΛ',
|
132 |
-
"ng": 'ng',
|
133 |
-
"n": 'n',
|
134 |
-
"m": 'm',
|
135 |
-
"l": 'l',
|
136 |
-
"b": 'b',
|
137 |
-
"d": 'd',
|
138 |
-
"g": 'Ι‘',
|
139 |
-
"k": 'k',
|
140 |
-
"p": 'p',
|
141 |
-
"t": 't',
|
142 |
-
"h": 'h'
|
143 |
-
}
|
144 |
-
|
145 |
-
vowel_pattern = compile("[aeiouyΔ
ΔΗ«αΈ]")
|
146 |
-
vowel_coming_pattern = compile("(?=[aeiouyΔ
ΔΗ«αΈ])")
|
147 |
-
diphthong_coming_pattern = compile("(?=[Δ
ΔΗ«αΈ])")
|
148 |
-
|
149 |
-
question_words = krulermornaize(["ma", "mo", "xu"])
|
150 |
-
starter_words = krulermornaize(["le", "lo", "lei", "loi"])
|
151 |
-
terminator_words = krulermornaize(["kei", "ku'o", "vau", "li'u"])
|
152 |
-
|
153 |
-
def lojban2ipa(text: str, mode: str) -> str:
|
154 |
-
if mode == 'vits':
|
155 |
-
return lojban2ipa_vits(text)
|
156 |
-
if mode == 'nix':
|
157 |
-
return lojban2ipa_nix(text)
|
158 |
-
return lojban2ipa_vits(text)
|
159 |
-
|
160 |
-
def lojban2ipa_vits(text: str) -> str:
|
161 |
-
text = krulermorna(text.strip())
|
162 |
-
words = text.split(' ')
|
163 |
-
rebuilt_words = []
|
164 |
-
question_sentence = False
|
165 |
-
for index, word in enumerate([*words]):
|
166 |
-
modified_word = word
|
167 |
-
prefix, postfix = "", ""
|
168 |
-
|
169 |
-
if word in question_words:
|
170 |
-
postfix = "?"
|
171 |
-
prefix=" " + prefix
|
172 |
-
# question_sentence = True
|
173 |
-
|
174 |
-
if word in starter_words:
|
175 |
-
prefix=" " + prefix
|
176 |
-
# question_sentence = True
|
177 |
-
|
178 |
-
if word in terminator_words:
|
179 |
-
postfix = ", "
|
180 |
-
# if not vowel_pattern.match(word[-1:][0]):
|
181 |
-
# postfix += "Κ"
|
182 |
-
# # cmevla
|
183 |
-
# if not vowel_pattern.match(word[0]):
|
184 |
-
# prefix += "Κ"
|
185 |
-
|
186 |
-
# if vowel_pattern.match(word[0]):
|
187 |
-
# prefix = "Κ" + prefix
|
188 |
-
|
189 |
-
if index == 0 or word in ["ni'o", "i"]:
|
190 |
-
prefix = ", " + prefix
|
191 |
-
|
192 |
-
split_word = vowel_coming_pattern.split(word)
|
193 |
-
tail_word = split_word[-2:]
|
194 |
-
# add stress to {klama}, {ni'o}
|
195 |
-
if len(tail_word) == 2 and len(tail_word[0]) > 0 and bool(vowel_pattern.match(tail_word[0][0])) and bool(vowel_pattern.match(tail_word[1][0])):
|
196 |
-
head_word = split_word[:-2]
|
197 |
-
modified_word = "".join(head_word) + "Λ" + "".join(tail_word)
|
198 |
-
# prefix=" " + prefix
|
199 |
-
# add a pause after two-syllable words
|
200 |
-
postfix = postfix + " "
|
201 |
-
# add stress to {lau}, {coi}
|
202 |
-
elif len(tail_word) == 2 and len(tail_word[0]) > 0 and bool(diphthong_coming_pattern.match(tail_word[1][0])):
|
203 |
-
head_word = split_word[:-2]
|
204 |
-
modified_word = "".join(head_word) + tail_word[0] + "Λ" + tail_word[1]
|
205 |
-
# prefix=" " + prefix
|
206 |
-
postfix = postfix + " "
|
207 |
-
# add stress to {le}
|
208 |
-
# elif len(tail_word) == 2 and len(tail_word[0]) > 0 and bool(vowel_pattern.match(tail_word[1][0])):
|
209 |
-
# head_word = split_word[:-2]
|
210 |
-
# modified_word = "".join(head_word) + tail_word[0] + "Λ" + tail_word[1]+" "
|
211 |
-
# postfix =postfix +" "
|
212 |
-
|
213 |
-
# add a pause even after a cmavo
|
214 |
-
if not (index - 1 >= 0 and words[index-1] in starter_words):
|
215 |
-
prefix = " " + prefix
|
216 |
-
|
217 |
-
# # add a pause before {.alis}
|
218 |
-
# if bool(vowel_pattern.match(word[0])):
|
219 |
-
# word = ", " + word
|
220 |
-
|
221 |
-
"""
|
222 |
-
for each letter: if the slice matches then convert the letter
|
223 |
-
"""
|
224 |
-
rebuilt_word = ""
|
225 |
-
lit = enumerate([*modified_word])
|
226 |
-
for idx, x in lit:
|
227 |
-
tail = modified_word[idx:]
|
228 |
-
matched = False
|
229 |
-
consumed = 1
|
230 |
-
for attr, val in sorted(ipa_vits.items(), key=lambda x: len(str(x[0])), reverse=True):
|
231 |
-
pattern = compile("^"+attr)
|
232 |
-
matches = pattern.findall(tail)
|
233 |
-
if len(matches)>0:
|
234 |
-
match = matches[0]
|
235 |
-
consumed = len(match)
|
236 |
-
rebuilt_word += val
|
237 |
-
matched = True
|
238 |
-
break
|
239 |
-
if not matched:
|
240 |
-
rebuilt_word += x
|
241 |
-
[next(lit, None) for _ in range(consumed - 1)]
|
242 |
-
|
243 |
-
rebuilt_words.append(prefix+rebuilt_word+postfix)
|
244 |
-
|
245 |
-
output = "".join(rebuilt_words).strip()
|
246 |
-
output = sub(r" {2,}", " ", output)
|
247 |
-
output = sub(r", ?(?=,)", "", output)
|
248 |
-
|
249 |
-
if question_sentence == True:
|
250 |
-
output += "?"
|
251 |
-
elif bool(vowel_pattern.match(text[-1:][0])):
|
252 |
-
output += "."
|
253 |
-
|
254 |
-
return output
|
255 |
-
|
256 |
-
def lojban2ipa_nix(text: str) -> str:
|
257 |
-
text = krulermorna(text.strip())
|
258 |
-
words = text.split(' ')
|
259 |
-
rebuilt_words = []
|
260 |
-
question_sentence = False
|
261 |
-
for index, word in enumerate([*words]):
|
262 |
-
modified_word = word
|
263 |
-
prefix, postfix = "", ""
|
264 |
-
|
265 |
-
if word in question_words:
|
266 |
-
# postfix = "?"
|
267 |
-
prefix=" " + prefix
|
268 |
-
# question_sentence = True
|
269 |
-
|
270 |
-
if word in starter_words:
|
271 |
-
prefix=" " + prefix
|
272 |
-
# question_sentence = True
|
273 |
-
|
274 |
-
if word in terminator_words:
|
275 |
-
postfix = ", "
|
276 |
-
# if not vowel_pattern.match(word[-1:][0]):
|
277 |
-
# postfix += "Κ"
|
278 |
-
# # cmevla
|
279 |
-
# if not vowel_pattern.match(word[0]):
|
280 |
-
# prefix += "Κ"
|
281 |
-
|
282 |
-
# if vowel_pattern.match(word[0]):
|
283 |
-
# prefix = "Κ" + prefix
|
284 |
-
|
285 |
-
if index == 0 or word in ["ni'o", "i"]:
|
286 |
-
prefix = ", " + prefix
|
287 |
-
|
288 |
-
split_word = vowel_coming_pattern.split(word)
|
289 |
-
tail_word = split_word[-2:]
|
290 |
-
# add stress to {klama}, {ni'o}
|
291 |
-
if len(tail_word) == 2 and len(tail_word[0]) > 0 and bool(vowel_pattern.match(tail_word[0][0])) and bool(vowel_pattern.match(tail_word[1][0])):
|
292 |
-
head_word = split_word[:-2]
|
293 |
-
modified_word = "".join(head_word) + "Λ" + "".join(tail_word)
|
294 |
-
# prefix=" " + prefix
|
295 |
-
# add a pause after two-syllable words
|
296 |
-
postfix = postfix + " "
|
297 |
-
# add stress to {lau}, {coi}
|
298 |
-
elif len(tail_word) == 2 and len(tail_word[0]) > 0 and bool(diphthong_coming_pattern.match(tail_word[1][0])):
|
299 |
-
head_word = split_word[:-2]
|
300 |
-
modified_word = "".join(head_word) + tail_word[0] + "Λ" + tail_word[1]
|
301 |
-
# prefix=" " + prefix
|
302 |
-
postfix = postfix + " "
|
303 |
-
# add stress to {le}
|
304 |
-
# elif len(tail_word) == 2 and len(tail_word[0]) > 0 and bool(vowel_pattern.match(tail_word[1][0])):
|
305 |
-
# head_word = split_word[:-2]
|
306 |
-
# modified_word = "".join(head_word) + tail_word[0] + "Λ" + tail_word[1]+" "
|
307 |
-
# postfix =postfix +" "
|
308 |
-
|
309 |
-
# add a pause even after a cmavo
|
310 |
-
if not (index - 1 >= 0 and words[index-1] in starter_words):
|
311 |
-
prefix = " " + prefix
|
312 |
-
|
313 |
-
# # add a pause before {.alis}
|
314 |
-
# if bool(vowel_pattern.match(word[0])):
|
315 |
-
# word = ", " + word
|
316 |
-
|
317 |
-
"""
|
318 |
-
for each letter: if the slice matches then convert the letter
|
319 |
-
"""
|
320 |
-
rebuilt_word = ""
|
321 |
-
lit = enumerate([*modified_word])
|
322 |
-
for idx, x in lit:
|
323 |
-
tail = modified_word[idx:]
|
324 |
-
matched = False
|
325 |
-
consumed = 1
|
326 |
-
for attr, val in sorted(ipa_nix.items(), key=lambda x: len(str(x[0])), reverse=True):
|
327 |
-
pattern = compile("^"+attr)
|
328 |
-
matches = pattern.findall(tail)
|
329 |
-
if len(matches)>0:
|
330 |
-
match = matches[0]
|
331 |
-
consumed = len(match)
|
332 |
-
rebuilt_word += val
|
333 |
-
matched = True
|
334 |
-
break
|
335 |
-
if not matched:
|
336 |
-
rebuilt_word += x
|
337 |
-
[next(lit, None) for _ in range(consumed - 1)]
|
338 |
-
|
339 |
-
rebuilt_words.append(prefix+rebuilt_word+postfix)
|
340 |
-
|
341 |
-
output = "".join(rebuilt_words).strip()
|
342 |
-
output = sub(r" {2,}", " ", output)
|
343 |
-
output = sub(r", ?(?=,)", "", output)
|
344 |
-
|
345 |
-
if question_sentence == True:
|
346 |
-
output += "?"
|
347 |
-
elif bool(vowel_pattern.match(text[-1:][0])):
|
348 |
-
output += "."
|
349 |
-
|
350 |
-
return output
|
351 |
-
|
352 |
-
# print(lojban2ipa("ni'o le pa tirxu be me'e zo .teris. pu ki kansa le za'u pendo be le nei le ka xabju le foldi be loi spati"))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
dev.sh
CHANGED
@@ -9,7 +9,8 @@ docker rm -f jboselvoha 2> /dev/null
|
|
9 |
# jboselvoha
|
10 |
docker run -d -it --name jboselvoha \
|
11 |
-v $(pwd)/assets:/home/user/app/assets:Z \
|
12 |
-
-v $(pwd)/
|
|
|
13 |
-v $(pwd)/app.py:/home/user/app/app.py:Z \
|
14 |
-v $(pwd)/lojban:/home/user/app/lojban/:Z \
|
15 |
-v $(pwd)/vits:/home/user/app/vits:Z \
|
|
|
9 |
# jboselvoha
|
10 |
docker run -d -it --name jboselvoha \
|
11 |
-v $(pwd)/assets:/home/user/app/assets:Z \
|
12 |
+
-v $(pwd)/pretrained/nix-tts:/home/user/app/pretrained/nix-tts/:Z \
|
13 |
+
-v $(pwd)/lfs/vits:/home/user/app/pretrained/vits/:Z \
|
14 |
-v $(pwd)/app.py:/home/user/app/app.py:Z \
|
15 |
-v $(pwd)/lojban:/home/user/app/lojban/:Z \
|
16 |
-v $(pwd)/vits:/home/user/app/vits:Z \
|
pretrained/{nix-ljspeech-sdp-v0.1 β nix-tts/nix-ljspeech-sdp-v0.1}/decoder.onnx
RENAMED
File without changes
|
pretrained/{nix-ljspeech-sdp-v0.1 β nix-tts/nix-ljspeech-sdp-v0.1}/encoder.onnx
RENAMED
File without changes
|
pretrained/{nix-ljspeech-sdp-v0.1 β nix-tts/nix-ljspeech-sdp-v0.1}/tokenizer_state.pkl
RENAMED
File without changes
|
pretrained/{nix-ljspeech-v0.1 β nix-tts/nix-ljspeech-v0.1}/decoder.onnx
RENAMED
File without changes
|
pretrained/{nix-ljspeech-v0.1 β nix-tts/nix-ljspeech-v0.1}/encoder.onnx
RENAMED
File without changes
|
pretrained/{nix-ljspeech-v0.1 β nix-tts/nix-ljspeech-v0.1}/tokenizer_state.pkl
RENAMED
File without changes
|
pretrained/{pretrained_ljs.pth β vits/pretrained_ljs.pth}
RENAMED
File without changes
|
pretrained/{pretrained_vctk.pth β vits/pretrained_vctk.pth}
RENAMED
File without changes
|
vits/utils.py
CHANGED
@@ -16,7 +16,6 @@ logger = logging
|
|
16 |
|
17 |
|
18 |
def load_checkpoint(checkpoint_path, model, optimizer=None):
|
19 |
-
print(checkpoint_path)
|
20 |
assert os.path.isfile(checkpoint_path)
|
21 |
checkpoint_dict = torch.load(checkpoint_path, map_location='cpu')
|
22 |
iteration = checkpoint_dict['iteration']
|
@@ -72,7 +71,6 @@ def latest_checkpoint_path(dir_path, regex="G_*.pth"):
|
|
72 |
f_list = glob.glob(os.path.join(dir_path, regex))
|
73 |
f_list.sort(key=lambda f: int("".join(filter(str.isdigit, f))))
|
74 |
x = f_list[-1]
|
75 |
-
print(x)
|
76 |
return x
|
77 |
|
78 |
|
|
|
16 |
|
17 |
|
18 |
def load_checkpoint(checkpoint_path, model, optimizer=None):
|
|
|
19 |
assert os.path.isfile(checkpoint_path)
|
20 |
checkpoint_dict = torch.load(checkpoint_path, map_location='cpu')
|
21 |
iteration = checkpoint_dict['iteration']
|
|
|
71 |
f_list = glob.glob(os.path.join(dir_path, regex))
|
72 |
f_list.sort(key=lambda f: int("".join(filter(str.isdigit, f))))
|
73 |
x = f_list[-1]
|
|
|
74 |
return x
|
75 |
|
76 |
|