Spaces:

rahgadda
/

bark-voice-generator

Running

File size: 2,028 Bytes

42a53f6
 
 
ec66fcf
42a53f6
 
 
 
 
 
72c4624
ec66fcf
 
 
42a53f6
65a2eb1
42a53f6
ec66fcf
42a53f6
 
 
 
 
 
 
 
 
 
72c4624
ec66fcf
42a53f6
72c4624
65a2eb1
 
 
 
 
ec66fcf
 
 
42a53f6
 
 
65a2eb1
ec66fcf
5767f76
ec66fcf
 
 
 
65a2eb1
 
 
 
 
72c4624
65a2eb1

import gradio as gr
from transformers import AutoProcessor, BarkModel
import scipy
import os

############################
### Variable Declaration ###
############################

# -- UI Variables
ui_input_voice_presenter=gr.Dropdown(
    choices=["v2/en_speaker_0","v2/en_speaker_9"],
    value=["v2/en_speaker_0"], 
    label="Voice Presenter"
)
ui_input_filename=gr.Textbox(label="Input WAV Filename")
ui_input_text=gr.Textbox(lines=22,label="Input Text")
ui_output=gr.Audio(label="Output",value=os.path.join(os.path.dirname(__file__))+"/sample.wav")

# -- Model Variables
processor = AutoProcessor.from_pretrained("suno/bark")
model = BarkModel.from_pretrained("suno/bark")

############################
### Processing Functions ###
############################

# -- On Click of Submit Button in UI
def submit(voice_presenter, filename, input_text):
   print("Started Generating Voice")
   
   inputs = processor(input_text, voice_preset=voice_presenter) 
   audio_array = model.generate(**inputs)
   audio_array = audio_array.cpu().numpy().squeeze() 
   sample_rate = model.generation_config.sample_rate
   scipy.io.wavfile.write(filename, rate=sample_rate, data=audio_array) 
   
   print("Generation of Voice completed")

   return os.path.join(os.path.dirname(__file__))+"/sample.wav"

############################
###### Main Program ########
############################
ui_input_voice_presenter.value="v2/en_speaker_0"
ui_input_filename.value="sample.wav"
ui_input_text.value = """Hello uh ... [clears throat],
Bark is a transformer-based text-to-speech model proposed by Suno AI.
This voice is auto generated
"""

# -- Start of Program - Main
def main():
    demo = gr.Interface(
                        fn=submit, 
                        inputs=[ui_input_voice_presenter,ui_input_filename,ui_input_text], 
                        outputs=ui_output,
                        allow_flagging="never"
                    ) 
    demo.queue().launch()

# -- Calling Main Function
if __name__ == '__main__':
    main()