Spaces:
Running
Running
File size: 1,100 Bytes
80bb6f2 a0252e7 80bb6f2 a0252e7 80bb6f2 691908b 80bb6f2 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 |
import gradio as gr
import torch
import numpy as np
import scipy
from transformers import VitsModel, AutoTokenizer
# Load the model and tokenizer
model = VitsModel.from_pretrained("kakao-enterprise/vits-ljs")
tokenizer = AutoTokenizer.from_pretrained("kakao-enterprise/vits-ljs")
def text_to_speech(text):
# Tokenize the text
inputs = tokenizer(text, return_tensors="pt")
# Generate audio
with torch.no_grad():
output = model(**inputs).waveform
# Convert to numpy array and save as WAV file
audio_array = output.cpu().numpy().squeeze()
audio_array /= 1.414
audio_array *= 32767
audio_array = audio_array.astype(np.int16)
# Save to WAV file
output_file = "output.wav"
scipy.io.wavfile.write(output_file, rate=model.config.sampling_rate, data=audio_array)
# Return the path to the WAV file
return output_file
demo = gr.Interface(
text_to_speech,
gr.Textbox(label="Text to narrate"),
gr.Audio(label="Narrated audio"),
title="Text-to-Speech",
description="Enter text to generate audio narration",
)
demo.launch() |