Spaces:
Running
Running
File size: 1,699 Bytes
cfd58c5 2a5abe3 88cd06b cfd58c5 88cd06b cfd58c5 88cd06b b026f7e 88cd06b b026f7e f4e63b7 191ed4a a2597a8 f3aa612 cfd58c5 249facd f3aa612 cfd58c5 c370655 f3aa612 cfd58c5 107d5cd b026f7e 6e4cda4 b026f7e 107d5cd cfd58c5 107d5cd b026f7e cfd58c5 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 |
import gradio as gr
import torch
from transformers import AutoProcessor, BarkModel
import scipy
# device = "cuda" if torch.cuda.is_available() else "cpu"
# model = BarkModel.from_pretrained("suno/bark-small", torch_dtype=torch.float16).to(device)
# model.enable_cpu_offload()
device = "cpu"
processor = AutoProcessor.from_pretrained("suno/bark-small")
model = BarkModel.from_pretrained("suno/bark-small").to(device)
num_list = ["1","2","3","4","5","6","7","8","9","10"]
lang_list = ["en","fr"]
def run_bark(text, n, lang):
#history_prompt = []
semantic_prompt=f"v2/{lang}_speaker_{int(n)}"
#text=["Hello, my name is Suno. And, uh — and I like pizza. [laughs] But I also have other interests such as playing tic tac toe."],
inputs = processor(text=text,
voice_preset = semantic_prompt,
return_tensors="pt",
)
speech_values = model.generate(**inputs, do_sample=True)
sampling_rate = model.generation_config.sample_rate
#sampling_rate = model.config.sample_rate
#sampling_rate = 24000
scipy.io.wavfile.write("bark_out.wav", rate=sampling_rate, data=speech_values.cpu().numpy().squeeze())
return ("bark_out.wav")
with gr.Blocks() as app:
with gr.Column():
in_text = gr.Textbox()
with gr.Row():
speaker_num = gr.Dropdown(label="Speaker Voice", choices=num_list,value="1")
speaker_lang = gr.Dropdown(label="Speaker Language", choices=lang_list,value="1")
#speaker_num = gr.Number(value=0)
go_btn = gr.Button()
with gr.Column():
out_audio = gr.Audio()
go_btn.click(run_bark,[in_text, speaker_num, speaker_lang],out_audio)
app.launch() |