Omnibus commited on
Commit
5c8292d
1 Parent(s): 7cd15e7

Update vc.py

Browse files
Files changed (1) hide show
  1. vc.py +31 -1
vc.py CHANGED
@@ -4,6 +4,10 @@ from pathlib import Path
4
  from pytube import YouTube
5
  from pydub import AudioSegment
6
  from transformers import AutoProcessor, BarkModel
 
 
 
 
7
  from TTS.api import TTS
8
  import scipy
9
  import uuid
@@ -20,6 +24,30 @@ model = BarkModel.from_pretrained("suno/bark-small").to(device)
20
  num_list = ["1","2","3","4","5","6","7","8","9","10"]
21
  lang_list = ["en","de"]
22
  #SAMPLE_RATE = 24_000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  def run_bark(text, n='1', lang='en'):
24
  uid=uuid.uuid4()
25
  #history_prompt = []
@@ -115,6 +143,8 @@ with gr.Blocks() as app:
115
  tog = gr.Radio(label="Input Type", choices=["Preset","Custom"], value="Preset")
116
  with gr.Group(visible=True) as group_1:
117
  speaker_num = gr.Dropdown(label="Speaker Voice", choices=num_list,value="1")
 
 
118
  with gr.Group(visible=False) as group_2:
119
  with gr.Row():
120
  with gr.Column():
@@ -132,7 +162,7 @@ with gr.Blocks() as app:
132
  load_yt_btn = gr.Button("Load URL")
133
  yt_vid = gr.Video(interactive=False)
134
 
135
-
136
  tog.change(tog_in,tog,[group_1,group_2])
137
  #in_aud_file.change(pre_aud,in_aud_file,aud_file)
138
  load_yt_btn.click(load_video_yt, in_aud_yt, [yt_vid,in_aud_file,aud_file])
 
4
  from pytube import YouTube
5
  from pydub import AudioSegment
6
  from transformers import AutoProcessor, BarkModel
7
+
8
+ from bark import SAMPLE_RATE, generate_audio, preload_models
9
+ from bark.generation import SUPPORTED_LANGS
10
+
11
  from TTS.api import TTS
12
  import scipy
13
  import uuid
 
24
  num_list = ["1","2","3","4","5","6","7","8","9","10"]
25
  lang_list = ["en","de"]
26
  #SAMPLE_RATE = 24_000
27
+ DEBUG_MODE=False
28
+
29
+ AVAILABLE_PROMPTS = ["Unconditional", "Announcer"]
30
+ PROMPT_LOOKUP = {}
31
+ for _, lang in SUPPORTED_LANGS:
32
+ for n in range(10):
33
+ label = f"Speaker {n} ({lang})"
34
+ AVAILABLE_PROMPTS.append(label)
35
+ PROMPT_LOOKUP[label] = f"{lang}_speaker_{n}"
36
+ PROMPT_LOOKUP["Unconditional"] = None
37
+ PROMPT_LOOKUP["Announcer"] = "announcer"
38
+
39
+ def gen_tts(text, history_prompt): # , temp_semantic, temp_waveform):
40
+ history_prompt = PROMPT_LOOKUP[history_prompt]
41
+ if DEBUG_MODE:
42
+ audio_arr = np.zeros(SAMPLE_RATE)
43
+ else:
44
+ # , text_temp=temp_semantic, waveform_temp=temp_waveform)
45
+ audio_arr = generate_audio(text, history_prompt=history_prompt)
46
+ audio_arr = (audio_arr * 32767).astype(np.int16)
47
+ return (SAMPLE_RATE, audio_arr)
48
+
49
+
50
+
51
  def run_bark(text, n='1', lang='en'):
52
  uid=uuid.uuid4()
53
  #history_prompt = []
 
143
  tog = gr.Radio(label="Input Type", choices=["Preset","Custom"], value="Preset")
144
  with gr.Group(visible=True) as group_1:
145
  speaker_num = gr.Dropdown(label="Speaker Voice", choices=num_list,value="1")
146
+ options = gr.Dropdown(AVAILABLE_PROMPTS, value="Speaker 1 (en)", label="Acoustic Prompt", elem_id="speaker_option")
147
+ semantic_btn=gr.Button("Run Semantic")
148
  with gr.Group(visible=False) as group_2:
149
  with gr.Row():
150
  with gr.Column():
 
162
  load_yt_btn = gr.Button("Load URL")
163
  yt_vid = gr.Video(interactive=False)
164
 
165
+ semantic_btn.click(gen_tts,[in_text,options],out_audio)
166
  tog.change(tog_in,tog,[group_1,group_2])
167
  #in_aud_file.change(pre_aud,in_aud_file,aud_file)
168
  load_yt_btn.click(load_video_yt, in_aud_yt, [yt_vid,in_aud_file,aud_file])