|
import streamlit as st |
|
import torch |
|
import torchaudio |
|
from scipy.io.wavfile import write |
|
|
|
|
|
|
|
|
|
|
|
|
|
def synthesize_voice(text): |
|
""" |
|
Synthesize voice from the given text using a pre-trained Tacotron2 and WaveGlow model. |
|
This function is a simplified placeholder; you need to load actual models and synthesize properly. |
|
""" |
|
|
|
|
|
sample_rate = 22050 |
|
duration = 2 |
|
audio = torch.sin(torch.linspace(0, duration * 2 * torch.pi, sample_rate * duration)) |
|
|
|
|
|
output_path = "synthesized_voice.wav" |
|
write(output_path, sample_rate, audio.numpy()) |
|
return output_path |
|
|
|
def main(): |
|
st.title("Voice Cloning App") |
|
st.write("Upload an audio sample of a person's voice and input text to clone their voice.") |
|
|
|
|
|
uploaded_audio = st.file_uploader("Choose a voice sample (WAV, MP3, etc.)", type=["wav", "mp3"]) |
|
|
|
if uploaded_audio is not None: |
|
st.audio(uploaded_audio, format="audio/wav") |
|
|
|
|
|
text_input = st.text_area("Enter text for voice cloning") |
|
|
|
if st.button("Generate Cloned Voice"): |
|
if text_input: |
|
|
|
output_path = synthesize_voice(text_input) |
|
|
|
|
|
st.audio(output_path, format="audio/wav") |
|
st.success("Voice cloning successful!") |
|
|
|
if __name__ == "__main__": |
|
main() |
|
|