|
import gradio as gr |
|
import numpy as np |
|
import librosa |
|
import soundfile as sf |
|
from TTS.api import TTS |
|
import torch |
|
import os |
|
import tempfile |
|
|
|
|
|
try: |
|
tts = TTS("tts_models/multilingual/multi-dataset/your_tts", progress_bar=False) |
|
except Exception as e: |
|
print(f"Error initializing TTS model: {e}") |
|
tts = None |
|
|
|
def load_audio(audio_path): |
|
try: |
|
audio, sr = librosa.load(audio_path, sr=None) |
|
return audio, sr |
|
except Exception as e: |
|
print(f"Error loading audio: {e}") |
|
return None, None |
|
|
|
def save_audio(audio, sr, path): |
|
try: |
|
sf.write(path, audio, sr) |
|
except Exception as e: |
|
print(f"Error saving audio: {e}") |
|
|
|
def pitch_shift(audio, sr, n_steps): |
|
try: |
|
return librosa.effects.pitch_shift(audio, sr=sr, n_steps=n_steps) |
|
except Exception as e: |
|
print(f"Error in pitch shifting: {e}") |
|
return audio |
|
|
|
def change_voice(audio_path, pitch_shift_amount, formant_shift_amount): |
|
if tts is None: |
|
return None, None |
|
|
|
audio, sr = load_audio(audio_path) |
|
if audio is None or sr is None: |
|
return None, None |
|
|
|
pitched_audio = pitch_shift(audio, sr, pitch_shift_amount) |
|
|
|
try: |
|
with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as temp_file: |
|
save_audio(pitched_audio, sr, temp_file.name) |
|
converted_audio_path = tts.voice_conversion( |
|
source_wav=temp_file.name, |
|
target_wav="path/to/female_target_voice.wav", |
|
output_wav=None |
|
) |
|
|
|
converted_audio, _ = load_audio(converted_audio_path) |
|
formant_shifted_audio = librosa.effects.pitch_shift(converted_audio, sr=sr, n_steps=formant_shift_amount) |
|
|
|
os.unlink(temp_file.name) |
|
os.unlink(converted_audio_path) |
|
|
|
return sr, formant_shifted_audio |
|
except Exception as e: |
|
print(f"Error in voice conversion: {e}") |
|
return None, None |
|
|
|
def process_audio(audio_file, pitch_shift_amount, formant_shift_amount): |
|
if audio_file is None: |
|
return None |
|
|
|
|
|
sr, audio = change_voice(audio_file, pitch_shift_amount, formant_shift_amount) |
|
if sr is None or audio is None: |
|
return None |
|
|
|
output_path = "output_voice.wav" |
|
save_audio(audio, sr, output_path) |
|
|
|
return output_path |
|
|
|
|
|
custom_css = """ |
|
.gradio-container { |
|
background-color: #f0f4f8; |
|
} |
|
.container { |
|
max-width: 900px; |
|
margin: auto; |
|
padding: 20px; |
|
border-radius: 10px; |
|
background-color: white; |
|
box-shadow: 0 4px 6px rgba(0, 0, 0, 0.1); |
|
} |
|
h1 { |
|
color: #2c3e50; |
|
text-align: center; |
|
font-size: 2.5em; |
|
margin-bottom: 20px; |
|
} |
|
.description { |
|
text-align: center; |
|
color: #34495e; |
|
margin-bottom: 30px; |
|
} |
|
.input-section, .output-section { |
|
background-color: #ecf0f1; |
|
padding: 20px; |
|
border-radius: 8px; |
|
margin-bottom: 20px; |
|
} |
|
.input-section h3, .output-section h3 { |
|
color: #2980b9; |
|
margin-bottom: 15px; |
|
} |
|
""" |
|
|
|
|
|
with gr.Blocks(css=custom_css) as demo: |
|
gr.HTML( |
|
""" |
|
<div style="text-align: center; max-width: 800px; margin: 0 auto;"> |
|
<div style="display: inline-flex; align-items: center; gap: 0.8rem; font-size: 1.75rem;"> |
|
<svg xmlns="http://www.w3.org/2000/svg" width="1em" height="1em" fill="currentColor" viewBox="0 0 16 16" style="vertical-align: middle;"> |
|
<path d="M3.5 6.5A.5.5 0 0 1 4 7v1a4 4 0 0 0 8 0V7a.5.5 0 0 1 1 0v1a5 5 0 0 1-4.5 4.975V15h3a.5.5 0 0 1 0 1h-7a.5.5 0 0 1 0-1h3v-2.025A5 5 0 0 1 3 8V7a.5.5 0 0 1 .5-.5z"/> |
|
<path d="M10 8a2 2 0 1 1-4 0V3a2 2 0 1 1 4 0v5zM8 0a3 3 0 0 0-3 3v5a3 3 0 0 0 6 0V3a3 3 0 0 0-3-3z"/> |
|
</svg> |
|
<h1 style="font-weight: 900; margin-bottom: 7px;"> |
|
AI Voice Changer |
|
</h1> |
|
</div> |
|
<p class="description">Transform any voice into a realistic female voice using advanced AI technology</p> |
|
</div> |
|
""" |
|
) |
|
|
|
with gr.Row(): |
|
with gr.Column(elem_classes="input-section"): |
|
gr.Markdown("### Input") |
|
audio_input = gr.Audio(type="filepath", label="Upload Voice") |
|
pitch_shift = gr.Slider(-12, 12, step=0.5, label="Pitch Shift", value=0) |
|
formant_shift = gr.Slider(-5, 5, step=0.1, label="Formant Shift", value=0) |
|
submit_btn = gr.Button("Transform Voice", variant="primary") |
|
|
|
with gr.Column(elem_classes="output-section"): |
|
gr.Markdown("### Output") |
|
audio_output = gr.Audio(label="Transformed Voice") |
|
|
|
submit_btn.click( |
|
fn=process_audio, |
|
inputs=[audio_input, pitch_shift, formant_shift], |
|
outputs=audio_output, |
|
) |
|
|
|
gr.Markdown( |
|
""" |
|
### How to use: |
|
1. Upload an audio file containing the voice you want to transform. |
|
2. Adjust the Pitch Shift and Formant Shift sliders to fine-tune the voice (optional). |
|
3. Click the "Transform Voice" button to process the audio. |
|
4. Listen to the transformed voice in the output section. |
|
5. Download the transformed audio file if desired. |
|
|
|
Note: This application uses AI to transform voices. The quality of the output may vary depending on the input audio quality and the chosen settings. |
|
""" |
|
) |
|
|
|
if __name__ == "__main__": |
|
demo.launch() |