Spaces:

MZhaovo
/

AI_TalkingFlower

Runtime error

App Files Files Community

MZhaovo commited on Nov 23, 2023

Commit

8f472c2

1 Parent(s): fc2bcff

Upload folder using huggingface_hub

Browse files

Files changed (7) hide show

README.md +3 -3
__pycache__/infer.cpython-39.pyc +0 -0
app.py +73 -55
assets/audios/nomorethan100.wav +0 -0
assets/audios/overlength.wav +0 -0
tools/__pycache__/webui.cpython-39.pyc +0 -0
tools/webui.py +64 -0

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 🌼🌼🌼
 app_file: app.py
 sdk: gradio
 sdk_version: 4.5.0
-colorFrom: yellow
-colorTo: gray
 pinned: true
----

 app_file: app.py
 sdk: gradio
 sdk_version: 4.5.0
+colorFrom: red
+colorTo: blue
 pinned: true
+---

__pycache__/infer.cpython-39.pyc CHANGED Viewed

Binary files a/__pycache__/infer.cpython-39.pyc and b/__pycache__/infer.cpython-39.pyc differ

app.py CHANGED Viewed

@@ -10,82 +10,101 @@ import gradio as gr
 import webbrowser
 from config import config
 from tools.translate import translate
-from webui import reload_javascript
 device = config.webui_config.device
 if device == "mps":
     os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
-def generate_audio(
-    slices,
-    sdp_ratio,
-    noise_scale,
-    noise_scale_w,
-    length_scale,
-    speaker,
-    language,
-):
-    audio_list = []
-    silence = np.zeros(hps.data.sampling_rate // 2, dtype=np.int16)
-    with torch.no_grad():
-        for piece in slices:
-            audio = infer(
-                piece,
-                sdp_ratio=sdp_ratio,
-                noise_scale=noise_scale,
-                noise_scale_w=noise_scale_w,
-                length_scale=length_scale,
-                sid=speaker,
-                language=language,
-                hps=hps,
-                net_g=net_g,
-                device=device,
-            )
-            audio16bit = gr.processing_utils.convert_to_16_bit_wav(audio)
-            audio_list.append(audio16bit)
-            audio_list.append(silence)  # 将静音添加到列表中
-    return audio_list
 def speak_fn(
         text: str,
         speaker="TalkFlower_CNzh",
         sdp_ratio=0.2,      # SDP/DP混合比
         noise_scale=0.6,        # 感情
         noise_scale_w=0.6,      # 音素长度
         length_scale=0.9,       # 语速
-        language="ZH"
     ):
-    print(text)
     if len(text) > 100:
-        gr.Warning("Too long! No more than 100 characters. 一口气不要超过 100 个字，憋死我了。")
-        return gr.update()
     audio_list = []
-    audio_list.extend(
-        generate_audio(
-            text.split("|"),
-            sdp_ratio,
-            noise_scale,
-            noise_scale_w,
-            length_scale,
-            speaker,
-            language,
-        )
-    )
     audio_concat = np.concatenate(audio_list)
-    return (hps.data.sampling_rate, audio_concat)
 def init_fn():
-    gr.Info("2023-11-23: 用的人多起来了，生成可能要等一会。买了更好的服务器看看效果怎么样。")
     gr.Info("2023-11-23: Only support Chinese now. Trying to train a mutilingual model.")
 with open("./css/style.css", "r", encoding="utf-8") as f:
     customCSS = f.read()
 with gr.Blocks(css=customCSS) as demo:
-    # talkingFlowerModel = gr.HTML("""<div id="talking_flower_model">123</div>""")
     talkingFlowerPic = gr.HTML("""<img src="file=assets/flower-2x.webp" alt="TalkingFlowerPic">""", elem_id="talking_flower_pic")
     input_text = gr.Textbox(lines=1, label="Talking Flower will say:", elem_classes="wonder-card", elem_id="input_text")
     speak_button = gr.Button("Speak!", elem_id="comfirm_button", elem_classes="button wonder-card")
@@ -98,13 +117,13 @@ with gr.Blocks(css=customCSS) as demo:
     )
     input_text.submit(
         speak_fn,
-        inputs=[input_text],
-        outputs=[audio_output],
     )
     speak_button.click(
         speak_fn,
-        inputs=[input_text],
-        outputs=[audio_output],
     )
@@ -118,6 +137,5 @@ if __name__ == "__main__":
         show_api=False,
         # server_name=server_name,
         # server_port=server_port,
-        share=True,
-        inbrowser=True,  # 禁止在docker下开启inbrowser
     )

 import webbrowser
 from config import config
 from tools.translate import translate
+from tools.webui import reload_javascript
 device = config.webui_config.device
 if device == "mps":
     os.environ["PYTORCH_ENABLE_MPS_FALLBACK"] = "1"
 def speak_fn(
         text: str,
+        exceed_flag,
         speaker="TalkFlower_CNzh",
         sdp_ratio=0.2,      # SDP/DP混合比
         noise_scale=0.6,        # 感情
         noise_scale_w=0.6,      # 音素长度
         length_scale=0.9,       # 语速
+        language="ZH",
+        interval_between_para=0.2,      # 段间间隔
+        interval_between_sent=1,        # 句间间隔
     ):
+    while text.find("\n\n") != -1:
+        text = text.replace("\n\n", "\n")
     if len(text) > 100:
+        print(f"Too Long Text: {text}")
+        gr.Warning("Too long! No more than 100 characters. 一口气不要超过 100 个字，憋坏我了。")
+        if exceed_flag:
+            return gr.update(value="./assets/audios/nomorethan100.wav", autoplay=True), False
+        else:
+            return gr.update(value="./assets/audios/overlength.wav", autoplay=True), True
     audio_list = []
+    if len(text) > 42:
+        print(f"Long Text: {text}")
+        para_list = re_matching.cut_para(text)
+        for p in para_list:
+            audio_list_sent = []
+            sent_list = re_matching.cut_sent(p)
+            for s in sent_list:
+                audio = infer(
+                    s,
+                    sdp_ratio=sdp_ratio,
+                    noise_scale=noise_scale,
+                    noise_scale_w=noise_scale_w,
+                    length_scale=length_scale,
+                    sid=speaker,
+                    language=language,
+                    hps=hps,
+                    net_g=net_g,
+                    device=device,
+                )
+                audio_list_sent.append(audio)
+                silence = np.zeros((int)(44100 * interval_between_sent))
+                audio_list_sent.append(silence)
+            if (interval_between_para - interval_between_sent) > 0:
+                silence = np.zeros(
+                    (int)(44100 * (interval_between_para - interval_between_sent))
+                )
+                audio_list_sent.append(silence)
+            audio16bit = gr.processing_utils.convert_to_16_bit_wav(
+                np.concatenate(audio_list_sent)
+            )  # 对完整句子做音量归一
+            audio_list.append(audio16bit)
+    else:
+        print(f"Short Text: {text}")
+        silence = np.zeros(hps.data.sampling_rate // 2, dtype=np.int16)
+        with torch.no_grad():
+            for piece in text.split("|"):
+                audio = infer(
+                    piece,
+                    sdp_ratio=sdp_ratio,
+                    noise_scale=noise_scale,
+                    noise_scale_w=noise_scale_w,
+                    length_scale=length_scale,
+                    sid=speaker,
+                    language=language,
+                    hps=hps,
+                    net_g=net_g,
+                    device=device,
+                )
+                audio16bit = gr.processing_utils.convert_to_16_bit_wav(audio)
+                audio_list.append(audio16bit)
+                audio_list.append(silence)  # 将静音添加到列表中
     audio_concat = np.concatenate(audio_list)
+    return (hps.data.sampling_rate, audio_concat), exceed_flag
 def init_fn():
+    gr.Info("2023-11-24: 优化长句生成效果；更新了一些小��蛋。")
     gr.Info("2023-11-23: Only support Chinese now. Trying to train a mutilingual model.")
 with open("./css/style.css", "r", encoding="utf-8") as f:
     customCSS = f.read()
 with gr.Blocks(css=customCSS) as demo:
+    exceed_flag = gr.State(value=False)
     talkingFlowerPic = gr.HTML("""<img src="file=assets/flower-2x.webp" alt="TalkingFlowerPic">""", elem_id="talking_flower_pic")
     input_text = gr.Textbox(lines=1, label="Talking Flower will say:", elem_classes="wonder-card", elem_id="input_text")
     speak_button = gr.Button("Speak!", elem_id="comfirm_button", elem_classes="button wonder-card")
     )
     input_text.submit(
         speak_fn,
+        inputs=[input_text, exceed_flag],
+        outputs=[audio_output, exceed_flag],
     )
     speak_button.click(
         speak_fn,
+        inputs=[input_text, exceed_flag],
+        outputs=[audio_output, exceed_flag],
     )
         show_api=False,
         # server_name=server_name,
         # server_port=server_port,
+        inbrowser=True,
     )

assets/audios/nomorethan100.wav ADDED Viewed

Binary file (166 kB). View file

assets/audios/overlength.wav ADDED Viewed

Binary file (235 kB). View file

tools/__pycache__/webui.cpython-39.pyc ADDED Viewed

Binary file (5.06 kB). View file

tools/webui.py ADDED Viewed

	@@ -0,0 +1,64 @@

+import gradio as gr
+import os
+GradioTemplateResponseOriginal = gr.routes.templates.TemplateResponse
+root_path = os.path.dirname(os.path.realpath(__file__))
+def webpath(fn):
+    if fn.startswith(root_path):
+        web_path = os.path.relpath(fn, root_path).replace('\\', '/')
+    else:
+        web_path = os.path.abspath(fn)
+    return f'file={web_path}?{os.path.getmtime(fn)}'
+def list_scripts(scriptdirname, extension):
+    scripts_list = []
+    scripts_dir = os.path.join(root_path, scriptdirname)
+    if os.path.exists(scripts_dir):
+        for filename in sorted(os.listdir(scripts_dir)):
+            scripts_list.append(ScriptFile(shared.assets_path, filename, os.path.join(scripts_dir, filename)))
+    scripts_list = [x for x in scripts_list if os.path.splitext(x.path)[1].lower() == extension and os.path.isfile(x.path)]
+    return scripts_list
+def javascript_html():
+    head = ""
+    for script in list_scripts("javascript", ".js"):
+        head += f'<script type="text/javascript" src="{webpath(script.path)}"></script>\n'
+    for script in list_scripts("javascript", ".mjs"):
+        head += f'<script type="module" src="{webpath(script.path)}"></script>\n'
+    return head
+def reload_javascript():
+    js = javascript_html()
+    js += """
+<link rel="preconnect" href="https://fonts.googleapis.com">
+<link rel="preconnect" href="https://fonts.gstatic.com" crossorigin>
+<link href="https://fonts.googleapis.com/css2?family=Tilt+Warp&display=swap" rel="stylesheet">
+<script type="importmap">
+{
+    "imports": {
+        "three": "https://unpkg.com/three@v0.158.0/build/three.module.js",
+        "three/addons/": "https://unpkg.com/three@v0.158.0/examples/jsm/"
+    }
+}
+</script>
+    """
+    corner = """
+<a href="https://www.bilibili.com/video/BV14M411Z7FR" class="bilibili-corner" aria-label="View author on Bilibili">
+    <svg aria-hidden="true" style="fill:#F87399;/* color:#fff; */position: absolute;top: 0;border: 0;right: 0;" viewBox="0 0 80 80" height="80" width="80">
+        <path d="M0 0L80 80V0H0Z" fill="#F87399"></path>
+        <path d="M63.7507 10.5251H64.8893C66.9027 10.5985 68.5813 11.3099 69.92 12.6621C71.2587 14.013 71.952 15.7155 72 17.767V27.7595C71.952 29.8096 71.2587 31.5189 69.92 32.882C68.5813 34.2451 66.904 34.9511 64.8893 35H47.1107C45.0973 34.9511 43.4187 34.2451 42.08 32.882C40.7413 31.5189 40.048 29.811 40 27.7595V17.767C40.048 15.7155 40.7413 14.013 42.08 12.6621C43.4187 11.3099 45.096 10.5985 47.1107 10.5251H48.1427L46.5773 9.00454C46.4164 8.84495 46.2892 8.65348 46.2036 8.44193C46.1181 8.23038 46.076 8.00323 46.08 7.77448C46.08 7.29115 46.2453 6.88113 46.5773 6.54307L46.6133 6.50641C46.9693 6.16835 47.3773 6 47.84 6C48.3027 6 48.7107 6.16835 49.0667 6.50641L52.8707 10.236C52.9653 10.3323 53.0493 10.4287 53.12 10.5251H58.8093C58.8648 10.4178 58.9368 10.3201 59.0227 10.236L62.8267 6.50641C63.1827 6.16835 63.5907 6 64.0533 6C64.516 6 64.936 6.20501 65.292 6.54307C65.648 6.88113 65.8133 7.29115 65.8133 7.77448C65.8133 8.25646 65.648 8.66648 65.316 9.00454L63.7507 10.5251ZM47.1107 14.0375C46.116 14.0619 45.28 14.4122 44.604 15.0869C43.9293 15.7631 43.5787 16.6211 43.556 17.6584V27.8681C43.5787 28.9054 43.9293 29.7621 44.604 30.4382C45.28 31.1143 46.116 31.4646 47.1107 31.4877H64.8893C65.884 31.4646 66.72 31.1143 67.396 30.4382C68.0707 29.7621 68.4213 28.9054 68.444 27.8681V17.6584C68.4213 16.6198 68.0707 15.7631 67.396 15.0869C66.72 14.4122 65.884 14.0619 64.8893 14.0375H47.1107ZM50.6667 19.2876C51.164 19.2876 51.5787 19.4559 51.9107 19.794C52.244 20.1321 52.4213 20.5665 52.444 21.0974V22.6899C52.4213 23.2208 52.244 23.6552 51.9107 23.9933C51.5787 24.3327 51.164 24.5011 50.6667 24.5011C50.1693 24.5011 49.7547 24.3314 49.4227 23.9933C49.0893 23.6552 48.912 23.2208 48.8893 22.6899V21.0974C48.8893 20.591 49.0613 20.1619 49.404 19.8117C49.748 19.4627 50.1693 19.2876 50.6667 19.2876ZM61.3333 19.2876C61.8307 19.2876 62.2453 19.4559 62.5773 19.794C62.9107 20.1321 63.088 20.5665 63.1107 21.0974V22.6899C63.088 23.2208 62.9107 23.6552 62.5773 23.9933C62.2453 24.3327 61.8307 24.5011 61.3333 24.5011C60.836 24.5011 60.4213 24.3314 60.0893 23.9933C59.756 23.6552 59.5787 23.2208 59.556 22.6899V21.0974C59.5787 20.5665 59.756 20.1321 60.0893 19.794C60.4213 19.4559 60.836 19.2876 61.3333 19.2876Z" fill="white"></path>
+    </svg>
+</a>
+"""
+    def template_response(*args, **kwargs):
+        res = GradioTemplateResponseOriginal(*args, **kwargs)
+        # res.body = res.body.replace(b'</head>', f'{meta}{js}</head>'.encode("utf8"))
+        res.body = res.body.replace(b'</head>', f'{js}</head>'.encode("utf8"))
+        res.body = res.body.replace(b'</body>', f'{corner}</body>'.encode("utf8"))
+        res.init_headers()
+        return res
+    gr.routes.templates.TemplateResponse = template_response