Spaces:

Kit-Lemonfoot
/

vtuber_rvc_models

Running

App Files Files Community

Kit-Lemonfoot commited on Nov 12, 2023

Commit

12c4d09

1 Parent(s): 61f8fb4

Properly restricted audio length on Spaces. (The space isn't built for song covers. Take that shit to Colab or local.)

Browse files

Files changed (1) hide show

app.py +7 -3

app.py CHANGED Viewed

@@ -28,6 +28,7 @@ from config import Config
 config = Config()
 logging.getLogger("numba").setLevel(logging.WARNING)
 limitation = os.getenv("SYSTEM") == "spaces"
 audio_mode = []
 f0method_mode = ["pm", "crepe", "harvest"]
@@ -50,7 +51,7 @@ def infer(name, path, index, vc_audio_mode, vc_input, vc_upload, tts_text, tts_v
                 return "Please upload an audio file.", None
             sampling_rate, audio = vc_upload
             duration = audio.shape[0] / sampling_rate
-            if duration > 360 and limitation:
                 return "Too long! Please upload an audio file that is less than 1 minute.", None
             audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
             if len(audio.shape) > 1:
@@ -58,12 +59,15 @@ def infer(name, path, index, vc_audio_mode, vc_input, vc_upload, tts_text, tts_v
             if sampling_rate != 16000:
                 audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
         elif vc_audio_mode == "TTS Audio":
-            if len(tts_text) > 600 and limitation:
                 return "Text is too long.", None
             if tts_text is None or tts_voice is None:
                 return "You need to enter text and select a voice.", None
             asyncio.run(edge_tts.Communicate(tts_text, "-".join(tts_voice.split('-')[:-1])).save("tts.mp3"))
             audio, sr = librosa.load("tts.mp3", sr=16000, mono=True)
             vc_input = "tts.mp3"
         times = [0, 0, 0]
         f0_up_key = int(f0_up_key)
@@ -379,7 +383,7 @@ if __name__ == '__main__':
                                     vc_inst_preview = gr.Audio(label="Instrumental Preview", visible=False)
                                     vc_audio_preview = gr.Audio(label="Audio Preview", visible=False)
                                     # TTS
-                                    tts_text = gr.Textbox(visible=True, label="TTS text", info="Text to speech input", interactive=True)
                                     tts_voice = gr.Dropdown(label="Edge-tts speaker", choices=voices, visible=True, allow_custom_value=False, value="en-US-AnaNeural-Female", interactive=True)
                                 with gr.Column():
                                     vc_transform0 = gr.Number(label="Transpose", value=0, info='Type "12" to change from male to female voice. Type "-12" to change female to male voice')

 config = Config()
 logging.getLogger("numba").setLevel(logging.WARNING)
 limitation = os.getenv("SYSTEM") == "spaces"
+#limitation=True
 audio_mode = []
 f0method_mode = ["pm", "crepe", "harvest"]
                 return "Please upload an audio file.", None
             sampling_rate, audio = vc_upload
             duration = audio.shape[0] / sampling_rate
+            if duration > 60 and limitation:
                 return "Too long! Please upload an audio file that is less than 1 minute.", None
             audio = (audio / np.iinfo(audio.dtype).max).astype(np.float32)
             if len(audio.shape) > 1:
             if sampling_rate != 16000:
                 audio = librosa.resample(audio, orig_sr=sampling_rate, target_sr=16000)
         elif vc_audio_mode == "TTS Audio":
+            if len(tts_text) > 250 and limitation:
                 return "Text is too long.", None
             if tts_text is None or tts_voice is None:
                 return "You need to enter text and select a voice.", None
             asyncio.run(edge_tts.Communicate(tts_text, "-".join(tts_voice.split('-')[:-1])).save("tts.mp3"))
             audio, sr = librosa.load("tts.mp3", sr=16000, mono=True)
+            duration = audio.shape[0] / sr
+            if duration > 30 and limitation:
+                return "Your text generated an audio that was too long.", None
             vc_input = "tts.mp3"
         times = [0, 0, 0]
         f0_up_key = int(f0_up_key)
                                     vc_inst_preview = gr.Audio(label="Instrumental Preview", visible=False)
                                     vc_audio_preview = gr.Audio(label="Audio Preview", visible=False)
                                     # TTS
+                                    tts_text = gr.Textbox(visible=True, label="TTS text", info="Text to speech input (There is a limit of 250 characters)", interactive=True)
                                     tts_voice = gr.Dropdown(label="Edge-tts speaker", choices=voices, visible=True, allow_custom_value=False, value="en-US-AnaNeural-Female", interactive=True)
                                 with gr.Column():
                                     vc_transform0 = gr.Number(label="Transpose", value=0, info='Type "12" to change from male to female voice. Type "-12" to change female to male voice')