Spaces:

skytnt
/

moe-tts

Running on CPU Upgrade

App Files Files Community

skytnt commited on Oct 22, 2022

Commit

7b1c05b

•

1 Parent(s): 7ad7315

120 words

Browse files

Files changed (2) hide show

README.md +1 -1
app.py +7 -7

README.md CHANGED Viewed

@@ -4,7 +4,7 @@ emoji: 😊🎙️
 colorFrom: red
 colorTo: pink
 sdk: gradio
-sdk_version: 3.4
 app_file: app.py
 pinned: false
 license: mit

 colorFrom: red
 colorTo: pink
 sdk: gradio
+sdk_version: 3.6
 app_file: app.py
 pinned: false
 license: mit

app.py CHANGED Viewed

@@ -28,7 +28,7 @@ def create_tts_fn(model, hps, speaker_ids):
     def tts_fn(text, speaker, speed, is_phoneme):
         if limitation:
             text_len = len(text)
-            max_len = 60
             if is_phoneme:
                 max_len *= 3
             else:
@@ -57,7 +57,7 @@ def create_vc_fn(model, hps, speaker_ids):
             return "You need to upload an audio", None
         sampling_rate, audio = input_audio
         duration = audio.shape[0] / sampling_rate
-        if limitation and duration > 15:
             return "Error: Audio is too long", None
         original_speaker_id = speaker_ids[original_speaker]
         target_speaker_id = speaker_ids[target_speaker]
@@ -93,7 +93,7 @@ def create_soft_vc_fn(model, hps, speaker_ids):
             return "You need to upload an audio", None
         sampling_rate, audio = input_audio
         duration = audio.shape[0] / sampling_rate
-        if limitation and duration > 15:
             return "Error: Audio is too long", None
         target_speaker_id = speaker_ids[target_speaker]
@@ -191,7 +191,7 @@ if __name__ == '__main__':
                                 gr.Markdown(f"## {name}\n\n"
                                             f"![cover](file/{cover_path})\n\n"
                                             f"lang: {lang}")
-                                tts_input1 = gr.TextArea(label="Text (60 words limitation)", value=example,
                                                          elem_id=f"tts-input{i}")
                                 tts_input2 = gr.Dropdown(label="Speaker", choices=speakers,
                                                          type="index", value=speakers[0])
@@ -240,7 +240,7 @@ if __name__ == '__main__':
                                                     value=speakers[0])
                             vc_input2 = gr.Dropdown(label="Target Speaker", choices=speakers, type="index",
                                                     value=speakers[1])
-                            vc_input3 = gr.Audio(label="Input Audio (15s limitation)")
                             vc_submit = gr.Button("Convert", variant="primary")
                             vc_output1 = gr.Textbox(label="Output Message")
                             vc_output2 = gr.Audio(label="Output Audio")
@@ -256,9 +256,9 @@ if __name__ == '__main__':
                             source_tabs = gr.Tabs()
                             with source_tabs:
                                 with gr.TabItem("microphone"):
-                                    vc_input2 = gr.Audio(label="Input Audio (15s limitation)", source="microphone")
                                 with gr.TabItem("upload"):
-                                    vc_input3 = gr.Audio(label="Input Audio (15s limitation)", source="upload")
                             vc_submit = gr.Button("Convert", variant="primary")
                             vc_output1 = gr.Textbox(label="Output Message")
                             vc_output2 = gr.Audio(label="Output Audio")

     def tts_fn(text, speaker, speed, is_phoneme):
         if limitation:
             text_len = len(text)
+            max_len = 120
             if is_phoneme:
                 max_len *= 3
             else:
             return "You need to upload an audio", None
         sampling_rate, audio = input_audio
         duration = audio.shape[0] / sampling_rate
+        if limitation and duration > 30:
             return "Error: Audio is too long", None
         original_speaker_id = speaker_ids[original_speaker]
         target_speaker_id = speaker_ids[target_speaker]
             return "You need to upload an audio", None
         sampling_rate, audio = input_audio
         duration = audio.shape[0] / sampling_rate
+        if limitation and duration > 30:
             return "Error: Audio is too long", None
         target_speaker_id = speaker_ids[target_speaker]
                                 gr.Markdown(f"## {name}\n\n"
                                             f"![cover](file/{cover_path})\n\n"
                                             f"lang: {lang}")
+                                tts_input1 = gr.TextArea(label="Text (120 words limitation)", value=example,
                                                          elem_id=f"tts-input{i}")
                                 tts_input2 = gr.Dropdown(label="Speaker", choices=speakers,
                                                          type="index", value=speakers[0])
                                                     value=speakers[0])
                             vc_input2 = gr.Dropdown(label="Target Speaker", choices=speakers, type="index",
                                                     value=speakers[1])
+                            vc_input3 = gr.Audio(label="Input Audio (30s limitation)")
                             vc_submit = gr.Button("Convert", variant="primary")
                             vc_output1 = gr.Textbox(label="Output Message")
                             vc_output2 = gr.Audio(label="Output Audio")
                             source_tabs = gr.Tabs()
                             with source_tabs:
                                 with gr.TabItem("microphone"):
+                                    vc_input2 = gr.Audio(label="Input Audio (30s limitation)", source="microphone")
                                 with gr.TabItem("upload"):
+                                    vc_input3 = gr.Audio(label="Input Audio (30s limitation)", source="upload")
                             vc_submit = gr.Button("Convert", variant="primary")
                             vc_output1 = gr.Textbox(label="Output Message")
                             vc_output2 = gr.Audio(label="Output Audio")