Spaces:

lojban
/

text-to-speech

Running

App Files Files Community

lojban commited on Dec 23, 2022

Commit

f9d0d4d

•

1 Parent(s): 1fcc013

publish API

Browse files

Files changed (12) hide show

app.py +48 -13
assets/lojban/lojban.py +0 -352
dev.sh +2 -1
pretrained/{nix-ljspeech-sdp-v0.1 → nix-tts/nix-ljspeech-sdp-v0.1}/decoder.onnx +0 -0
pretrained/{nix-ljspeech-sdp-v0.1 → nix-tts/nix-ljspeech-sdp-v0.1}/encoder.onnx +0 -0
pretrained/{nix-ljspeech-sdp-v0.1 → nix-tts/nix-ljspeech-sdp-v0.1}/tokenizer_state.pkl +0 -0
pretrained/{nix-ljspeech-v0.1 → nix-tts/nix-ljspeech-v0.1}/decoder.onnx +0 -0
pretrained/{nix-ljspeech-v0.1 → nix-tts/nix-ljspeech-v0.1}/encoder.onnx +0 -0
pretrained/{nix-ljspeech-v0.1 → nix-tts/nix-ljspeech-v0.1}/tokenizer_state.pkl +0 -0
pretrained/{pretrained_ljs.pth → vits/pretrained_ljs.pth} +0 -0
pretrained/{pretrained_vctk.pth → vits/pretrained_vctk.pth} +0 -0
vits/utils.py +0 -2

app.py CHANGED Viewed

@@ -118,7 +118,7 @@ def load_checkpoints():
         **hps.model)
     _ = model.eval()
-    _ = utils.load_checkpoint(current + "/pretrained/pretrained_ljs.pth", model, None)
     hps_vctk = utils.get_hparams_from_file(current + "/vits/configs/vctk_base.json")
     net_g_vctk = SynthesizerTrn(
@@ -129,11 +129,47 @@ def load_checkpoints():
         **hps_vctk.model)
     _ = model.eval()
-    _ = utils.load_checkpoint(current + "/pretrained/pretrained_vctk.pth", net_g_vctk, None)
     return model, hps, net_g_vctk, hps_vctk
 def inference(text, language, noise_scale, noise_scale_w, length_scale, voice):
     if len(text.strip())==0:
         return []
@@ -141,9 +177,9 @@ def inference(text, language, noise_scale, noise_scale_w, length_scale, voice):
     language = language_id_lookup[language] if bool(
         language_id_lookup[language]) else "jbo"
     if voice == 'Nix-Deterministic' and language == 'jbo':
-        return generate_voice(lojban2ipa(text,'nix'), current+"/pretrained/nix-ljspeech-v0.1")
     elif voice == 'Nix-Stochastic' and language == 'jbo':
-        return generate_voice(lojban2ipa(text,'nix'), current+"/pretrained/nix-ljspeech-sdp-v0.1")
     elif voice == 'LJS':
         ipa_text, stn_tst = get_text(text, language, hps, mode="VITS")
         with torch.no_grad():
@@ -151,7 +187,7 @@ def inference(text, language, noise_scale, noise_scale_w, length_scale, voice):
             x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
             audio = model.infer(x_tst, x_tst_lengths, noise_scale=noise_scale,
                                 noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0, 0].data.float().numpy()
-            return [ipa_text, (hps.data.sampling_rate, audio)]
     else:
         ipa_text, stn_tst = get_text(text, language, hps_vctk, mode="VITS")
         with torch.no_grad():
@@ -160,14 +196,15 @@ def inference(text, language, noise_scale, noise_scale_w, length_scale, voice):
             sid = torch.LongTensor([voice])
             audio = model_vctk.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=noise_scale,
                                      noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0, 0].data.cpu().float().numpy()
-            return [ipa_text, (hps_vctk.data.sampling_rate, audio)]
 # download_pretrained()
 model, hps, model_vctk, hps_vctk = load_checkpoints()
 defaults = {
-    "text": "Lojban",
     "noise_scale": .667,
     "noise_scale_w": .8,
     "speed": 1.8,
@@ -199,12 +236,12 @@ with gr.Blocks(css=css) as demo:
     article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2106.06103'>Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech</a> | <a href='https://github.com/jaywalnut310/vits'>Github Repo</a></p>"
     with gr.Row():
         with gr.Column():
-            input_text = gr.Textbox(lines=4, label="Input text", placeholder="add your text, or click one of the examples to load them")
             langs = gr.Radio([
                 'Lojban',
                 'English',
                 'Transcription',
-                ], value=defaults["text"], label="Language")
             voices = gr.Radio(["LJS", 0, 1, 2, 3, 4, "Nix-Deterministic", "Nix-Stochastic"], value=defaults["voice"], label="Voice")
             noise_scale = gr.Slider(label="Noise scale", minimum=0, maximum=2,
                 step=0.1, value=defaults["noise_scale"])
@@ -225,18 +262,16 @@ with gr.Blocks(css=css) as demo:
             audio = gr.Audio(type="numpy", label="Output audio")
             outputs = [ ipa_block, audio ]
             btn = gr.Button("Vocalize")
-            btn.click(fn=inference, inputs=inputs, outputs=outputs)
             examples = list(map(lambda el: el[0:len(el)] + defaults["example"][len(el):], [
                 ["coi ro do ma nuzba", "Lojban"],
                 ["mi djica lo nu do zvati ti", "Lojban", 0.667, 0.8, 1.8,4],
                 ["mu xagji sofybakni cu zvati le purdi", "Lojban", 0.667, 0.8, 1.8, "Nix-Deterministic"],
                 ["ni'o le pa tirxu be me'e zo .teris. pu ki kansa le za'u pendo be le nei le ka xabju le foldi be loi spati", "Lojban"],
-                [", mˈiː dʒʒˈiːʃaː lˈoːnˈʊuːdˈoː zvˈaːtiː tˈiː.", "Transcription"],
                 ["We propose VITS, Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech.", "English"],
             ]))
             gr.Examples(examples, inputs, fn=inference, outputs=outputs, cache_examples=True, run_on_click=True)
 demo.launch(server_name="0.0.0.0")
-# gr.Interface(fn=inference, inputs=inputs, outputs=outputs, title=title,
-#              description=description, article=article, examples=examples).launch(server_name="0.0.0.0")

         **hps.model)
     _ = model.eval()
+    _ = utils.load_checkpoint(current + "/pretrained/vits/pretrained_ljs.pth", model, None)
     hps_vctk = utils.get_hparams_from_file(current + "/vits/configs/vctk_base.json")
     net_g_vctk = SynthesizerTrn(
         **hps_vctk.model)
     _ = model.eval()
+    _ = utils.load_checkpoint(current + "/pretrained/vits/pretrained_vctk.pth", net_g_vctk, None)
     return model, hps, net_g_vctk, hps_vctk
+def float2pcm(sig, dtype='int16'):
+    """Convert floating point signal with a range from -1 to 1 to PCM.
+    Any signal values outside the interval [-1.0, 1.0) are clipped.
+    No dithering is used.
+    Note that there are different possibilities for scaling floating
+    point numbers to PCM numbers, this function implements just one of
+    them.  For an overview of alternatives see
+    http://blog.bjornroche.com/2009/12/int-float-int-its-jungle-out-there.html
+    Parameters
+    ----------
+    sig : array_like
+        Input array, must have floating point type.
+    dtype : data type, optional
+        Desired (integer) data type.
+    Returns
+    -------
+    numpy.ndarray
+        Integer data, scaled and clipped to the range of the given
+        *dtype*.
+    See Also
+    --------
+    pcm2float, dtype
+    """
+    sig = np.asarray(sig)
+    if sig.dtype.kind != 'f':
+        raise TypeError("'sig' must be a float array")
+    dtype = np.dtype(dtype)
+    if dtype.kind not in 'iu':
+        raise TypeError("'dtype' must be an integer type")
+    i = np.iinfo(dtype)
+    abs_max = 2 ** (i.bits - 1)
+    offset = i.min + abs_max
+    return (sig * abs_max + offset).clip(i.min, i.max).astype(dtype)
 def inference(text, language, noise_scale, noise_scale_w, length_scale, voice):
     if len(text.strip())==0:
         return []
     language = language_id_lookup[language] if bool(
         language_id_lookup[language]) else "jbo"
     if voice == 'Nix-Deterministic' and language == 'jbo':
+        return generate_voice(lojban2ipa(text,'nix'), current+"/pretrained/nix-tts/nix-ljspeech-v0.1")
     elif voice == 'Nix-Stochastic' and language == 'jbo':
+        return generate_voice(lojban2ipa(text,'nix'), current+"/pretrained/nix-tts/nix-ljspeech-sdp-v0.1")
     elif voice == 'LJS':
         ipa_text, stn_tst = get_text(text, language, hps, mode="VITS")
         with torch.no_grad():
             x_tst_lengths = torch.LongTensor([stn_tst.size(0)])
             audio = model.infer(x_tst, x_tst_lengths, noise_scale=noise_scale,
                                 noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0, 0].data.float().numpy()
+            return [ipa_text, (hps.data.sampling_rate, float2pcm(audio))]
     else:
         ipa_text, stn_tst = get_text(text, language, hps_vctk, mode="VITS")
         with torch.no_grad():
             sid = torch.LongTensor([voice])
             audio = model_vctk.infer(x_tst, x_tst_lengths, sid=sid, noise_scale=noise_scale,
                                      noise_scale_w=noise_scale_w, length_scale=length_scale)[0][0, 0].data.cpu().float().numpy()
+            return [ipa_text, (hps_vctk.data.sampling_rate, float2pcm(audio))]
 # download_pretrained()
 model, hps, model_vctk, hps_vctk = load_checkpoints()
 defaults = {
+    "text": "coi munje",
+    "language": "Lojban",
     "noise_scale": .667,
     "noise_scale_w": .8,
     "speed": 1.8,
     article = "<p style='text-align: center'><a href='https://arxiv.org/abs/2106.06103'>Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech</a> | <a href='https://github.com/jaywalnut310/vits'>Github Repo</a></p>"
     with gr.Row():
         with gr.Column():
+            input_text = gr.Textbox(lines=4, value=defaults["text"], label="Input text", placeholder="add your text, or click one of the examples to load them")
             langs = gr.Radio([
                 'Lojban',
                 'English',
                 'Transcription',
+                ], value=defaults["language"], label="Language")
             voices = gr.Radio(["LJS", 0, 1, 2, 3, 4, "Nix-Deterministic", "Nix-Stochastic"], value=defaults["voice"], label="Voice")
             noise_scale = gr.Slider(label="Noise scale", minimum=0, maximum=2,
                 step=0.1, value=defaults["noise_scale"])
             audio = gr.Audio(type="numpy", label="Output audio")
             outputs = [ ipa_block, audio ]
             btn = gr.Button("Vocalize")
+            btn.click(fn=inference, inputs=inputs, outputs=outputs, api_name="cupra")
             examples = list(map(lambda el: el[0:len(el)] + defaults["example"][len(el):], [
                 ["coi ro do ma nuzba", "Lojban"],
                 ["mi djica lo nu do zvati ti", "Lojban", 0.667, 0.8, 1.8,4],
                 ["mu xagji sofybakni cu zvati le purdi", "Lojban", 0.667, 0.8, 1.8, "Nix-Deterministic"],
                 ["ni'o le pa tirxu be me'e zo .teris. pu ki kansa le za'u pendo be le nei le ka xabju le foldi be loi spati", "Lojban"],
+                [", miː dʒˈiːʃaː loːnʊuː doː zvˈaːtiː tiː.", "Transcription"],
                 ["We propose VITS, Conditional Variational Autoencoder with Adversarial Learning for End-to-End Text-to-Speech.", "English"],
             ]))
             gr.Examples(examples, inputs, fn=inference, outputs=outputs, cache_examples=True, run_on_click=True)
 demo.launch(server_name="0.0.0.0")

assets/lojban/lojban.py DELETED Viewed

@@ -1,352 +0,0 @@
-from __future__ import annotations
-import sys
-import os
-from re import sub, compile
-from itertools import islice
-def krulermorna(text: str) -> str:
-    text = sub(r"\.", "", text)
-    text = sub(r"^", ".", text)
-    text = sub(r"u([aeiouy])", r"w\1", text)
-    text = sub(r"i([aeiouy])", r"ɩ\1", text)
-    text = sub(r"au", "ḁ", text)
-    text = sub(r"ai", "ą", text)
-    text = sub(r"ei", "ę", text)
-    text = sub(r"oi", "ǫ", text)
-    text = sub(r"\.", "", text)
-    return text
-def krulermornaize(words: list[str]) -> list[str]:
-    return [krulermorna(word) for word in words]
-ipa_vits = {
-    "a$": 'aː',
-    "a": 'aː',
-    # "e(?=v)": 'ɛːʔ',
-    # "e$": 'ɛːʔ',
-    "e": 'ɛː',
-    "i": 'iː',
-    "o": 'oː',
-    "u": 'ʊu',
-    # "u": 'ʊː',
-    "y": 'əː',
-    "ą": 'aɪ',
-    "ę": 'ɛɪ',
-    # "ę(?=\b)(?!')": 'ɛɪʔ',
-    "ǫ": 'ɔɪ',
-    "ḁ": 'aʊ',
-    "ɩa": 'jaː',
-    "ɩe": 'jɛː',
-    "ɩi": 'jiː',
-    "ɩo": 'jɔː',
-    "ɩu": 'juː',
-    "ɩy": 'jəː',
-    "ɩ": 'j',
-    "wa": 'waː',
-    "we": 'wɛː',
-    "wi": 'wiː',
-    "wo": 'wɔː',
-    "wu": 'wuː',
-    "wy": 'wəː',
-    "w": 'w',
-    "c": 'ʃ',
-    # "bj": 'bʒ',
-    "j": 'ʒ',
-    "s": 's',
-    "z": 'z',
-    "f": 'f',
-    "v": 'v',
-    "x": 'hhh',
-    "'": 'h',
-    # "dj":'dʒ',
-    # "tc":'tʃ',
-    # "dz":'ʣ',
-    # "ts":'ʦ',
-    'r': 'ɹ',
-    'r(?![ˈaeiouyḁąęǫ])': 'ɹɹ',
-    # 'r(?=[ˈaeiouyḁąęǫ])': 'ɹ',
-    "nˈu": 'nˈʊuː',
-    "nu": 'nʊuː',
-    "ng": 'ng',
-    "n": 'n',
-    "m": 'm',
-    "l": 'l',
-    "b": 'b',
-    "d": 'd',
-    "g": 'ɡ',
-    "k": 'k',
-    "p": 'p',
-    "t": 't',
-    "h": 'h'
-}
-ipa_nix = {
-    "a$": 'aː',
-    "a": 'aː',
-    # "e(?=v)": 'ɛːʔ',
-    # "e$": 'ɛːʔ',
-    "e": 'ɛː',
-    "i": 'iː',
-    "o": 'oː',
-    "u": 'ʊu',
-    # "u": 'ʊː',
-    "y": 'əː',
-    "ą": 'aɪ',
-    "ę": 'ɛɪ',
-    # "ę(?=\b)(?!')": 'ɛɪʔ',
-    "ǫ": 'ɔɪ',
-    "ḁ": 'aʊ',
-    "ɩa": 'jaː',
-    "ɩe": 'jɛː',
-    "ɩi": 'jiː',
-    "ɩo": 'jɔː',
-    "ɩu": 'juː',
-    "ɩy": 'jəː',
-    "ɩ": 'j',
-    "wa": 'waː',
-    "we": 'wɛː',
-    "wi": 'wiː',
-    "wo": 'wɔː',
-    "wu": 'wuː',
-    "wy": 'wəː',
-    "w": 'w',
-    "c": 'ʃ',
-    "bj": 'bɪʒ',
-    "j": 'ʒ',
-    "s": 's',
-    "z": 'z',
-    "f": 'f',
-    "v": 'v',
-    "x": 'hh',
-    "'": 'h',
-    # "dj":'dʒ',
-    # "tc":'tʃ',
-    # "dz":'ʣ',
-    # "ts":'ʦ',
-    'r': 'ɹ',
-    'r(?![ˈaeiouyḁąęǫ])': 'ɹɹɹɪ',
-    # 'r(?=[ˈaeiouyḁąęǫ])': 'ɹ',
-    "nˈu": 'nˈʊuː',
-    "nu": 'nʊuː',
-    "ng": 'ng',
-    "n": 'n',
-    "m": 'm',
-    "l": 'l',
-    "b": 'b',
-    "d": 'd',
-    "g": 'ɡ',
-    "k": 'k',
-    "p": 'p',
-    "t": 't',
-    "h": 'h'
-}
-vowel_pattern = compile("[aeiouyąęǫḁ]")
-vowel_coming_pattern = compile("(?=[aeiouyąęǫḁ])")
-diphthong_coming_pattern = compile("(?=[ąęǫḁ])")
-question_words = krulermornaize(["ma", "mo", "xu"])
-starter_words = krulermornaize(["le", "lo", "lei", "loi"])
-terminator_words = krulermornaize(["kei", "ku'o", "vau", "li'u"])
-def lojban2ipa(text: str, mode: str) -> str:
-    if mode == 'vits':
-        return lojban2ipa_vits(text)
-    if mode == 'nix':
-        return lojban2ipa_nix(text)
-    return lojban2ipa_vits(text)
-def lojban2ipa_vits(text: str) -> str:
-    text = krulermorna(text.strip())
-    words = text.split(' ')
-    rebuilt_words = []
-    question_sentence = False
-    for index, word in enumerate([*words]):
-        modified_word = word
-        prefix, postfix = "", ""
-        if word in question_words:
-            postfix = "?"
-            prefix=" " + prefix
-            # question_sentence = True
-        if word in starter_words:
-            prefix=" " + prefix
-            # question_sentence = True
-        if word in terminator_words:
-            postfix = ", "
-        # if not vowel_pattern.match(word[-1:][0]):
-        #     postfix += "ʔ"
-        #     # cmevla
-        #     if not vowel_pattern.match(word[0]):
-        #         prefix += "ʔ"
-        # if vowel_pattern.match(word[0]):
-        #     prefix = "ʔ" + prefix
-        if index == 0 or word in ["ni'o", "i"]:
-            prefix = ", " + prefix
-        split_word = vowel_coming_pattern.split(word)
-        tail_word = split_word[-2:]
-        # add stress to {klama}, {ni'o}
-        if len(tail_word) == 2 and len(tail_word[0]) > 0 and bool(vowel_pattern.match(tail_word[0][0])) and bool(vowel_pattern.match(tail_word[1][0])):
-            head_word = split_word[:-2]
-            modified_word = "".join(head_word) + "ˈ" + "".join(tail_word)
-            # prefix=" " + prefix
-            # add a pause after two-syllable words
-            postfix = postfix + " "
-        # add stress to {lau}, {coi}
-        elif len(tail_word) == 2 and len(tail_word[0]) > 0 and bool(diphthong_coming_pattern.match(tail_word[1][0])):
-            head_word = split_word[:-2]
-            modified_word = "".join(head_word) + tail_word[0] + "ˈ" + tail_word[1]
-            # prefix=" " + prefix
-            postfix = postfix + " "
-        # add stress to {le}
-        # elif len(tail_word) == 2 and len(tail_word[0]) > 0 and bool(vowel_pattern.match(tail_word[1][0])):
-        #     head_word = split_word[:-2]
-        #     modified_word = "".join(head_word) + tail_word[0] + "ˈ" + tail_word[1]+" "
-        #     postfix =postfix +" "
-        # add a pause even after a cmavo
-        if not (index - 1 >= 0 and words[index-1] in starter_words):
-            prefix = " " + prefix
-        # # add a pause before {.alis}
-        # if bool(vowel_pattern.match(word[0])):
-        #     word = ", " + word
-        """
-        for each letter: if the slice matches then convert the letter
-        """
-        rebuilt_word = ""
-        lit = enumerate([*modified_word])
-        for idx, x in lit:
-            tail = modified_word[idx:]
-            matched = False
-            consumed = 1
-            for attr, val in sorted(ipa_vits.items(), key=lambda x: len(str(x[0])), reverse=True):
-                pattern = compile("^"+attr)
-                matches = pattern.findall(tail)
-                if len(matches)>0:
-                    match = matches[0]
-                    consumed = len(match)
-                    rebuilt_word += val
-                    matched = True
-                    break
-            if not matched:
-                rebuilt_word += x
-            [next(lit, None) for _ in range(consumed - 1)]
-        rebuilt_words.append(prefix+rebuilt_word+postfix)
-    output = "".join(rebuilt_words).strip()
-    output = sub(r" {2,}", " ", output)
-    output = sub(r", ?(?=,)", "", output)
-    if question_sentence == True:
-        output += "?"
-    elif bool(vowel_pattern.match(text[-1:][0])):
-        output += "."
-    return output
-def lojban2ipa_nix(text: str) -> str:
-    text = krulermorna(text.strip())
-    words = text.split(' ')
-    rebuilt_words = []
-    question_sentence = False
-    for index, word in enumerate([*words]):
-        modified_word = word
-        prefix, postfix = "", ""
-        if word in question_words:
-            # postfix = "?"
-            prefix=" " + prefix
-            # question_sentence = True
-        if word in starter_words:
-            prefix=" " + prefix
-            # question_sentence = True
-        if word in terminator_words:
-            postfix = ", "
-        # if not vowel_pattern.match(word[-1:][0]):
-        #     postfix += "ʔ"
-        #     # cmevla
-        #     if not vowel_pattern.match(word[0]):
-        #         prefix += "ʔ"
-        # if vowel_pattern.match(word[0]):
-        #     prefix = "ʔ" + prefix
-        if index == 0 or word in ["ni'o", "i"]:
-            prefix = ", " + prefix
-        split_word = vowel_coming_pattern.split(word)
-        tail_word = split_word[-2:]
-        # add stress to {klama}, {ni'o}
-        if len(tail_word) == 2 and len(tail_word[0]) > 0 and bool(vowel_pattern.match(tail_word[0][0])) and bool(vowel_pattern.match(tail_word[1][0])):
-            head_word = split_word[:-2]
-            modified_word = "".join(head_word) + "ˈ" + "".join(tail_word)
-            # prefix=" " + prefix
-            # add a pause after two-syllable words
-            postfix = postfix + " "
-        # add stress to {lau}, {coi}
-        elif len(tail_word) == 2 and len(tail_word[0]) > 0 and bool(diphthong_coming_pattern.match(tail_word[1][0])):
-            head_word = split_word[:-2]
-            modified_word = "".join(head_word) + tail_word[0] + "ˈ" + tail_word[1]
-            # prefix=" " + prefix
-            postfix = postfix + " "
-        # add stress to {le}
-        # elif len(tail_word) == 2 and len(tail_word[0]) > 0 and bool(vowel_pattern.match(tail_word[1][0])):
-        #     head_word = split_word[:-2]
-        #     modified_word = "".join(head_word) + tail_word[0] + "ˈ" + tail_word[1]+" "
-        #     postfix =postfix +" "
-        # add a pause even after a cmavo
-        if not (index - 1 >= 0 and words[index-1] in starter_words):
-            prefix = " " + prefix
-        # # add a pause before {.alis}
-        # if bool(vowel_pattern.match(word[0])):
-        #     word = ", " + word
-        """
-        for each letter: if the slice matches then convert the letter
-        """
-        rebuilt_word = ""
-        lit = enumerate([*modified_word])
-        for idx, x in lit:
-            tail = modified_word[idx:]
-            matched = False
-            consumed = 1
-            for attr, val in sorted(ipa_nix.items(), key=lambda x: len(str(x[0])), reverse=True):
-                pattern = compile("^"+attr)
-                matches = pattern.findall(tail)
-                if len(matches)>0:
-                    match = matches[0]
-                    consumed = len(match)
-                    rebuilt_word += val
-                    matched = True
-                    break
-            if not matched:
-                rebuilt_word += x
-            [next(lit, None) for _ in range(consumed - 1)]
-        rebuilt_words.append(prefix+rebuilt_word+postfix)
-    output = "".join(rebuilt_words).strip()
-    output = sub(r" {2,}", " ", output)
-    output = sub(r", ?(?=,)", "", output)
-    if question_sentence == True:
-        output += "?"
-    elif bool(vowel_pattern.match(text[-1:][0])):
-        output += "."
-    return output
-# print(lojban2ipa("ni'o le pa tirxu be me'e zo .teris. pu ki kansa le za'u pendo be le nei le ka xabju le foldi be loi spati"))

dev.sh CHANGED Viewed

@@ -9,7 +9,8 @@ docker rm -f jboselvoha 2> /dev/null
 # jboselvoha
 docker run -d -it --name jboselvoha  \
 -v $(pwd)/assets:/home/user/app/assets:Z \
--v $(pwd)/lfs:/home/user/app/lfs/:Z \
 -v $(pwd)/app.py:/home/user/app/app.py:Z \
 -v $(pwd)/lojban:/home/user/app/lojban/:Z \
 -v $(pwd)/vits:/home/user/app/vits:Z \

 # jboselvoha
 docker run -d -it --name jboselvoha  \
 -v $(pwd)/assets:/home/user/app/assets:Z \
+-v $(pwd)/pretrained/nix-tts:/home/user/app/pretrained/nix-tts/:Z \
+-v $(pwd)/lfs/vits:/home/user/app/pretrained/vits/:Z \
 -v $(pwd)/app.py:/home/user/app/app.py:Z \
 -v $(pwd)/lojban:/home/user/app/lojban/:Z \
 -v $(pwd)/vits:/home/user/app/vits:Z \

pretrained/{nix-ljspeech-sdp-v0.1 → nix-tts/nix-ljspeech-sdp-v0.1}/decoder.onnx RENAMED Viewed

File without changes

pretrained/{nix-ljspeech-sdp-v0.1 → nix-tts/nix-ljspeech-sdp-v0.1}/encoder.onnx RENAMED Viewed

File without changes

pretrained/{nix-ljspeech-sdp-v0.1 → nix-tts/nix-ljspeech-sdp-v0.1}/tokenizer_state.pkl RENAMED Viewed

File without changes

pretrained/{nix-ljspeech-v0.1 → nix-tts/nix-ljspeech-v0.1}/decoder.onnx RENAMED Viewed

File without changes

pretrained/{nix-ljspeech-v0.1 → nix-tts/nix-ljspeech-v0.1}/encoder.onnx RENAMED Viewed

File without changes

pretrained/{nix-ljspeech-v0.1 → nix-tts/nix-ljspeech-v0.1}/tokenizer_state.pkl RENAMED Viewed

File without changes

pretrained/{pretrained_ljs.pth → vits/pretrained_ljs.pth} RENAMED Viewed

File without changes

pretrained/{pretrained_vctk.pth → vits/pretrained_vctk.pth} RENAMED Viewed

File without changes

vits/utils.py CHANGED Viewed

@@ -16,7 +16,6 @@ logger = logging
 def load_checkpoint(checkpoint_path, model, optimizer=None):
-  print(checkpoint_path)
   assert os.path.isfile(checkpoint_path)
   checkpoint_dict = torch.load(checkpoint_path, map_location='cpu')
   iteration = checkpoint_dict['iteration']
@@ -72,7 +71,6 @@ def latest_checkpoint_path(dir_path, regex="G_*.pth"):
   f_list = glob.glob(os.path.join(dir_path, regex))
   f_list.sort(key=lambda f: int("".join(filter(str.isdigit, f))))
   x = f_list[-1]
-  print(x)
   return x

 def load_checkpoint(checkpoint_path, model, optimizer=None):
   assert os.path.isfile(checkpoint_path)
   checkpoint_dict = torch.load(checkpoint_path, map_location='cpu')
   iteration = checkpoint_dict['iteration']
   f_list = glob.glob(os.path.join(dir_path, regex))
   f_list.sort(key=lambda f: int("".join(filter(str.isdigit, f))))
   x = f_list[-1]
   return x