EZ-Voice-Clone-EZ

Runtime error

App Files Files Community

Omnibus commited on Feb 16

Commit

5c8292d

•

1 Parent(s): 7cd15e7

Update vc.py

Browse files

Files changed (1) hide show

vc.py +31 -1

vc.py CHANGED Viewed

@@ -4,6 +4,10 @@ from pathlib import Path
 from pytube import YouTube
 from pydub import AudioSegment
 from transformers import AutoProcessor, BarkModel
 from TTS.api import TTS
 import scipy
 import uuid
@@ -20,6 +24,30 @@ model = BarkModel.from_pretrained("suno/bark-small").to(device)
 num_list = ["1","2","3","4","5","6","7","8","9","10"]
 lang_list = ["en","de"]
 #SAMPLE_RATE = 24_000
 def run_bark(text, n='1', lang='en'):
     uid=uuid.uuid4()
     #history_prompt = []
@@ -115,6 +143,8 @@ with gr.Blocks() as app:
             tog = gr.Radio(label="Input Type", choices=["Preset","Custom"], value="Preset")
     with gr.Group(visible=True) as group_1:
         speaker_num = gr.Dropdown(label="Speaker Voice", choices=num_list,value="1")
     with gr.Group(visible=False) as group_2:
         with gr.Row():
             with gr.Column():
@@ -132,7 +162,7 @@ with gr.Blocks() as app:
                 load_yt_btn = gr.Button("Load URL")
                 yt_vid = gr.Video(interactive=False)
     tog.change(tog_in,tog,[group_1,group_2])
     #in_aud_file.change(pre_aud,in_aud_file,aud_file)
     load_yt_btn.click(load_video_yt, in_aud_yt, [yt_vid,in_aud_file,aud_file])

 from pytube import YouTube
 from pydub import AudioSegment
 from transformers import AutoProcessor, BarkModel
+from bark import SAMPLE_RATE, generate_audio, preload_models
+from bark.generation import SUPPORTED_LANGS
 from TTS.api import TTS
 import scipy
 import uuid
 num_list = ["1","2","3","4","5","6","7","8","9","10"]
 lang_list = ["en","de"]
 #SAMPLE_RATE = 24_000
+DEBUG_MODE=False
+AVAILABLE_PROMPTS = ["Unconditional", "Announcer"]
+PROMPT_LOOKUP = {}
+for _, lang in SUPPORTED_LANGS:
+    for n in range(10):
+        label = f"Speaker {n} ({lang})"
+        AVAILABLE_PROMPTS.append(label)
+        PROMPT_LOOKUP[label] = f"{lang}_speaker_{n}"
+PROMPT_LOOKUP["Unconditional"] = None
+PROMPT_LOOKUP["Announcer"] = "announcer"
+def gen_tts(text, history_prompt):  # , temp_semantic, temp_waveform):
+    history_prompt = PROMPT_LOOKUP[history_prompt]
+    if DEBUG_MODE:
+        audio_arr = np.zeros(SAMPLE_RATE)
+    else:
+        # , text_temp=temp_semantic, waveform_temp=temp_waveform)
+        audio_arr = generate_audio(text, history_prompt=history_prompt)
+    audio_arr = (audio_arr * 32767).astype(np.int16)
+    return (SAMPLE_RATE, audio_arr)
 def run_bark(text, n='1', lang='en'):
     uid=uuid.uuid4()
     #history_prompt = []
             tog = gr.Radio(label="Input Type", choices=["Preset","Custom"], value="Preset")
     with gr.Group(visible=True) as group_1:
         speaker_num = gr.Dropdown(label="Speaker Voice", choices=num_list,value="1")
+        options = gr.Dropdown(AVAILABLE_PROMPTS, value="Speaker 1 (en)", label="Acoustic Prompt", elem_id="speaker_option")
+        semantic_btn=gr.Button("Run Semantic")
     with gr.Group(visible=False) as group_2:
         with gr.Row():
             with gr.Column():
                 load_yt_btn = gr.Button("Load URL")
                 yt_vid = gr.Video(interactive=False)
+    semantic_btn.click(gen_tts,[in_text,options],out_audio)
     tog.change(tog_in,tog,[group_1,group_2])
     #in_aud_file.change(pre_aud,in_aud_file,aud_file)
     load_yt_btn.click(load_video_yt, in_aud_yt, [yt_vid,in_aud_file,aud_file])