import gradio as gr from transformers import AutoProcessor, BarkModel import scipy ############################ ### Variable Declaration ### ############################ # -- UI Variables ui_input_voice_preseter=gr.Dropdown( ["v2/en_speaker_0","v2/en_speaker_9"], label="Voice Presenter" ) ui_input_filename=gr.Textbox(label="Input WAV Filename") ui_input_text=gr.Textbox(lines=22,label="Input Text") ui_output=gr.Audio(label="Output") # -- Model Variables processor = AutoProcessor.from_pretrained("suno/bark") model = BarkModel.from_pretrained("suno/bark") ############################ ### Processing Functions ### ############################ # -- On Click of Submit Button in UI def submit(voice_preseter, filename, input_text): print("Hello World") inputs = processor(input_text, voice_preset=voice_preseter) audio_array = model.generate(**inputs) audio_array = audio_array.cpu().numpy().squeeze() sample_rate = model.generation_config.sample_rate scipy.io.wavfile.write(filename, rate=sample_rate, data=audio_array) retun gr.Audio(source=[os.path.join(os.path.dirname(__file__),filename]) ############################ ###### Main Program ######## ############################ ui_input_filename = "Hello uh ... [clears throat], \ Bark is a transformer-based text-to-speech model proposed by Suno AI. \ This voice is auto generated" # -- Start of Program - Main def main(): demo = gr.Interface( fn=submit, inputs=[ui_input_voice_preseter,ui_input_filename,ui_input_text], outputs=ui_output, allow_flagging="never" ) demo.queue().launch() # -- Calling Main Function if __name__ == '__main__': main()