rahgadda's picture
Initial Draft
ec66fcf
raw
history blame
No virus
2.03 kB
import gradio as gr
from transformers import AutoProcessor, BarkModel
import scipy
import os
############################
### Variable Declaration ###
############################
# -- UI Variables
ui_input_voice_presenter=gr.Dropdown(
choices=["v2/en_speaker_0","v2/en_speaker_9"],
value=["v2/en_speaker_0"],
label="Voice Presenter"
)
ui_input_filename=gr.Textbox(label="Input WAV Filename")
ui_input_text=gr.Textbox(lines=22,label="Input Text")
ui_output=gr.Audio(label="Output",value=os.path.join(os.path.dirname(__file__))+"/sample.wav")
# -- Model Variables
processor = AutoProcessor.from_pretrained("suno/bark")
model = BarkModel.from_pretrained("suno/bark")
############################
### Processing Functions ###
############################
# -- On Click of Submit Button in UI
def submit(voice_presenter, filename, input_text):
print("Started Generating Voice")
inputs = processor(input_text, voice_preset=voice_presenter)
audio_array = model.generate(**inputs)
audio_array = audio_array.cpu().numpy().squeeze()
sample_rate = model.generation_config.sample_rate
scipy.io.wavfile.write(filename, rate=sample_rate, data=audio_array)
print("Generation of Voice completed")
return os.path.join(os.path.dirname(__file__))+"/sample.wav"
############################
###### Main Program ########
############################
ui_input_voice_presenter.value="v2/en_speaker_0"
ui_input_filename.value="sample.wav"
ui_input_text.value = """Hello uh ... [clears throat],
Bark is a transformer-based text-to-speech model proposed by Suno AI.
This voice is auto generated
"""
# -- Start of Program - Main
def main():
demo = gr.Interface(
fn=submit,
inputs=[ui_input_voice_presenter,ui_input_filename,ui_input_text],
outputs=ui_output,
allow_flagging="never"
)
demo.queue().launch()
# -- Calling Main Function
if __name__ == '__main__':
main()