File size: 2,028 Bytes
42a53f6
 
 
ec66fcf
42a53f6
 
 
 
 
 
72c4624
ec66fcf
 
 
42a53f6
65a2eb1
42a53f6
ec66fcf
42a53f6
 
 
 
 
 
 
 
 
 
72c4624
ec66fcf
42a53f6
72c4624
65a2eb1
 
 
 
 
ec66fcf
 
 
42a53f6
 
 
65a2eb1
ec66fcf
5767f76
ec66fcf
 
 
 
65a2eb1
 
 
 
 
72c4624
65a2eb1
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
import gradio as gr
from transformers import AutoProcessor, BarkModel
import scipy
import os

############################
### Variable Declaration ###
############################

# -- UI Variables
ui_input_voice_presenter=gr.Dropdown(
    choices=["v2/en_speaker_0","v2/en_speaker_9"],
    value=["v2/en_speaker_0"], 
    label="Voice Presenter"
)
ui_input_filename=gr.Textbox(label="Input WAV Filename")
ui_input_text=gr.Textbox(lines=22,label="Input Text")
ui_output=gr.Audio(label="Output",value=os.path.join(os.path.dirname(__file__))+"/sample.wav")

# -- Model Variables
processor = AutoProcessor.from_pretrained("suno/bark")
model = BarkModel.from_pretrained("suno/bark")

############################
### Processing Functions ###
############################

# -- On Click of Submit Button in UI
def submit(voice_presenter, filename, input_text):
   print("Started Generating Voice")
   
   inputs = processor(input_text, voice_preset=voice_presenter) 
   audio_array = model.generate(**inputs)
   audio_array = audio_array.cpu().numpy().squeeze() 
   sample_rate = model.generation_config.sample_rate
   scipy.io.wavfile.write(filename, rate=sample_rate, data=audio_array) 
   
   print("Generation of Voice completed")

   return os.path.join(os.path.dirname(__file__))+"/sample.wav"

############################
###### Main Program ########
############################
ui_input_voice_presenter.value="v2/en_speaker_0"
ui_input_filename.value="sample.wav"
ui_input_text.value = """Hello uh ... [clears throat],
Bark is a transformer-based text-to-speech model proposed by Suno AI.
This voice is auto generated
"""

# -- Start of Program - Main
def main():
    demo = gr.Interface(
                        fn=submit, 
                        inputs=[ui_input_voice_presenter,ui_input_filename,ui_input_text], 
                        outputs=ui_output,
                        allow_flagging="never"
                    ) 
    demo.queue().launch()

# -- Calling Main Function
if __name__ == '__main__':
    main()