import gradio as gr import torch from transformers import AutoProcessor, AutoModel import scipy processor = AutoProcessor.from_pretrained("suno/bark-small") model = AutoModel.from_pretrained("suno/bark-small") def run_bark(text, lang="en", n=1): #history_prompt = [] semantic_prompt=f"v2/{lang}_speaker_{n}" #text=["Hello, my name is Suno. And, uh — and I like pizza. [laughs] But I also have other interests such as playing tic tac toe."], inputs = processor(text=text, voice_preset = semantic_prompt, return_tensors="pt", ) speech_values = model.generate(**inputs, do_sample=True) sampling_rate = model.generation_config.sample_rate #sampling_rate = model.config.sample_rate #sampling_rate = 24000 scipy.io.wavfile.write("bark_out.wav", rate=sampling_rate, data=speech_values.cpu().numpy().squeeze()) return ("bark_out.wav") with gr.Blocks() as app: in_text = gr.Textbox() out_audio = gr.Audio() go_btn = gr.Button() go_btn.click(run_bark,in_text,out_audio) app.launch()