Omnibus commited on
Commit
3846d12
1 Parent(s): 01bbbb5

Update vc.py

Browse files
Files changed (1) hide show
  1. vc.py +45 -12
vc.py CHANGED
@@ -13,18 +13,49 @@ uid = uuid.uuid4()
13
 
14
  device = "cuda" if torch.cuda.is_available() else "cpu"
15
 
16
- def custom_bark(inp, in_aud=None, trim_aud=None, in_aud_mic=None):
17
- if in_aud_mic != None:
18
- speaker_wav=in_aud_mic
19
- if in_aud !=None and trim_aud==None:
20
- speaker_wav=in_aud
21
- #speaker_wav=Path(f"{uid}-tmp_aud.mp4")
22
- if trim_aud != None:
23
- speaker_wav=Path(f"{uid}-trim.wav")
24
- tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False).to(device)
25
- tts.tts_to_file(inp, speaker_wav=speaker_wav, language="en", file_path=f"{uid}-output.wav")
26
- return (f"{uid}-output.wav")
27
-
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  def load_video_yt(vid):
29
  yt = YouTube(vid)
30
  vid = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first().download(filename=f"{uid}-tmp.mp4")
@@ -63,6 +94,8 @@ with gr.Blocks() as app:
63
  with gr.Group():
64
  with gr.Row():
65
  gr.Markdown('''<H1> Audio Source:''')
 
 
66
  with gr.Row():
67
  with gr.Column():
68
 
 
13
 
14
  device = "cuda" if torch.cuda.is_available() else "cpu"
15
 
16
+ processor = AutoProcessor.from_pretrained("suno/bark-small")
17
+ model = BarkModel.from_pretrained("suno/bark-small").to(device)
18
+ num_list = ["1","2","3","4","5","6","7","8","9","10"]
19
+ lang_list = ["en","de"]
20
+ #SAMPLE_RATE = 24_000
21
+ def run_bark(text, n='1', lang='en'):
22
+ uid=uuid.uuid4()
23
+ #history_prompt = []
24
+ semantic_prompt=f"v2/{lang}_speaker_{int(n)-1}"
25
+
26
+ #text=["Hello, my name is Suno. And, uh — and I like pizza. [laughs] But I also have other interests such as playing tic tac toe."],
27
+ inputs = processor(text=text,
28
+ voice_preset = semantic_prompt,
29
+ return_tensors="pt",
30
+ )
31
+ print("generating")
32
+ speech_values = model.generate(
33
+ **inputs, coarse_temperature = 0.8, temperature = 0.5, do_sample=True
34
+ )
35
+ #speech_values = model.generate(**inputs, do_sample=True)
36
+ sampling_rate = model.generation_config.sample_rate
37
+
38
+ #sampling_rate = 24_000
39
+ print("writing")
40
+ scipy.io.wavfile.write(f"bark_out-{uid}.wav", rate=sampling_rate, data=speech_values.cpu().numpy().squeeze())
41
+ return (f"bark_out-{uid}.wav")
42
+
43
+
44
+
45
+ def custom_bark(inp, tog, in_aud=None, trim_aud=None, in_aud_mic=None):
46
+ if tog=="Custom":
47
+ if in_aud_mic != None:
48
+ speaker_wav=in_aud_mic
49
+ if in_aud !=None and trim_aud==None:
50
+ speaker_wav=in_aud
51
+ #speaker_wav=Path(f"{uid}-tmp_aud.mp4")
52
+ if trim_aud != None:
53
+ speaker_wav=Path(f"{uid}-trim.wav")
54
+ tts = TTS(model_name="tts_models/multilingual/multi-dataset/your_tts", progress_bar=False).to(device)
55
+ tts.tts_to_file(inp, speaker_wav=speaker_wav, language="en", file_path=f"{uid}-output.wav")
56
+ return (f"{uid}-output.wav")
57
+ if tog=="Preset":
58
+ return (run_bark(inp))
59
  def load_video_yt(vid):
60
  yt = YouTube(vid)
61
  vid = yt.streams.filter(progressive=True, file_extension='mp4').order_by('resolution').desc().first().download(filename=f"{uid}-tmp.mp4")
 
94
  with gr.Group():
95
  with gr.Row():
96
  gr.Markdown('''<H1> Audio Source:''')
97
+ with gr.Row():
98
+ with gr.Radio(label="Input Type", choices=["Preset","Custom"], value="Preset")
99
  with gr.Row():
100
  with gr.Column():
101