import gradio as gr from transformers import AutoProcessor, BarkModel import scipy import numpy as np # Load the processor and model processor = AutoProcessor.from_pretrained("suno/bark") model = BarkModel.from_pretrained("suno/bark") def generate_audio(text): # Your preset may vary voice_preset = "v2/en_speaker_6" inputs = processor(text, voice_preset=voice_preset) audio_array = model.generate(**inputs) # Move the tensor to CPU and convert to numpy array audio_array = audio_array.cpu().numpy().squeeze() sample_rate = model.generation_config.sample_rate # Saving the audio file temporarily output_file = '/tmp/bark_out.wav' scipy.io.wavfile.write(output_file, rate=sample_rate, data=audio_array) # Return the path to the saved audio file return output_file # Define the Gradio interface iface = gr.Interface( fn=generate_audio, inputs="text", outputs="audio", examples=[["Hello, my dog is cute"]], allow_flagging="never" ) # Launch the interface iface.launch()