gskdsrikrishna's picture
Update app.py
b988aff verified
import streamlit as st
import torch
import torchaudio
from scipy.io.wavfile import write
# Load pre-trained Tacotron2 and WaveGlow models (ensure these are properly installed and loaded)
# The following lines are placeholders for model loading. You need to load the actual pre-trained models.
# tacotron2 = Tacotron2()
# waveglow = WaveGlow()
def synthesize_voice(text):
"""
Synthesize voice from the given text using a pre-trained Tacotron2 and WaveGlow model.
This function is a simplified placeholder; you need to load actual models and synthesize properly.
"""
# For the sake of this example, generate a dummy wave
# In reality, you would generate this using the Tacotron2 and WaveGlow models
sample_rate = 22050
duration = 2 # 2 seconds
audio = torch.sin(torch.linspace(0, duration * 2 * torch.pi, sample_rate * duration))
# Save the synthesized audio to a file
output_path = "synthesized_voice.wav"
write(output_path, sample_rate, audio.numpy())
return output_path
def main():
st.title("Voice Cloning App")
st.write("Upload an audio sample of a person's voice and input text to clone their voice.")
# File uploader for the voice sample (assuming the voice sample is used for reference)
uploaded_audio = st.file_uploader("Choose a voice sample (WAV, MP3, etc.)", type=["wav", "mp3"])
if uploaded_audio is not None:
st.audio(uploaded_audio, format="audio/wav")
# Textbox to input the text to be cloned
text_input = st.text_area("Enter text for voice cloning")
if st.button("Generate Cloned Voice"):
if text_input:
# Use the text input for voice cloning
output_path = synthesize_voice(text_input)
# Play the generated audio using Streamlit's audio component
st.audio(output_path, format="audio/wav")
st.success("Voice cloning successful!")
if __name__ == "__main__":
main()