tutorial / app.py
marcogallen's picture
Text to speak generation
1b0d41c
raw
history blame contribute delete
No virus
1.15 kB
import gradio as gr
from transformers import BarkModel, AutoProcessor
import torch
from scipy.io.wavfile import write as write_wav
import os
'''
This app runs a text to voice transformer
'''
### Because we are using CPU we add this code: ###
device = "cpu"
# load in fp16
model = BarkModel.from_pretrained("suno/bark-small").to(device)
processor = AutoProcessor.from_pretrained("suno/bark")
voice_preset = "v2/en_speaker_3"
def generate_audio(text, preset, output_file_name = "bark_generation"):
file_name = output_file_name + ".wav"
inputs = processor(text, voice_preset)
audio_array = model.generate(**inputs)
audio_array = audio_array.cpu().numpy().squeeze()
sample_rate = model.generation_config.sample_rate
write_wav(file_name, sample_rate, audio_array)
return file_name
#Presets drop down
presets = ["v2/en_speaker_0", "v2/en_speaker_1", "v2/en_speaker_2", "v2/en_speaker_3","v2/en_speaker_4", "v2/en_speaker_5", "v2/en_speaker_6", "v2/en_speaker_9"]
#Gradio interface
iface = gr.Interface(fn=generate_audio, inputs=["text", gr.components.Dropdown(choices=presets),"text"], outputs="audio")
iface.launch()