|
|
import gradio as gr |
|
|
import numpy as np |
|
|
import librosa |
|
|
import soundfile as sf |
|
|
import pandas as pd |
|
|
|
|
|
def generate_audio(clip_length=4.0, fade_in_duration=0.5, fade_out_duration=0.5, volume_factor=0.3): |
|
|
|
|
|
narration, sr = librosa.load('narration.wav', sr=None) |
|
|
baa, _ = librosa.load('baa.wav', sr=sr) |
|
|
murmur, _ = librosa.load('murmur.wav', sr=sr) |
|
|
|
|
|
|
|
|
narration_rms = np.sqrt(np.mean(narration**2)) |
|
|
baa_rms = np.sqrt(np.mean(baa**2)) |
|
|
murmur_rms = np.sqrt(np.mean(murmur**2)) |
|
|
|
|
|
|
|
|
baa_normalized = baa * (narration_rms / baa_rms) |
|
|
murmur_normalized = murmur * (narration_rms / murmur_rms) |
|
|
|
|
|
|
|
|
output_length = len(narration) |
|
|
combined = np.zeros(output_length) |
|
|
|
|
|
|
|
|
combined += narration |
|
|
|
|
|
|
|
|
baa_clip = baa_normalized[:int(clip_length*sr)] * volume_factor |
|
|
|
|
|
|
|
|
|
|
|
fade_in_samples = int(fade_in_duration * len(baa_clip)) |
|
|
fade_out_samples = int(fade_out_duration * len(baa_clip)) |
|
|
fade_in = np.linspace(0, 1, fade_in_samples) |
|
|
fade_out = np.linspace(1, 0, fade_out_samples) |
|
|
|
|
|
|
|
|
if fade_in_samples > 0: |
|
|
baa_clip[:fade_in_samples] *= fade_in |
|
|
if fade_out_samples > 0: |
|
|
baa_clip[-fade_out_samples:] *= fade_out |
|
|
|
|
|
start_idx = int(5 * sr) |
|
|
end_idx = start_idx + len(baa_clip) |
|
|
if end_idx <= output_length: |
|
|
combined[start_idx:end_idx] += baa_clip |
|
|
|
|
|
|
|
|
murmur_clip = murmur_normalized[:int(clip_length*sr)] * volume_factor |
|
|
|
|
|
|
|
|
murmur_fade_in_samples = int(fade_in_duration * len(murmur_clip)) |
|
|
murmur_fade_out_samples = int(fade_out_duration * len(murmur_clip)) |
|
|
|
|
|
|
|
|
if murmur_fade_in_samples > 0: |
|
|
murmur_fade_in = np.linspace(0, 1, murmur_fade_in_samples) |
|
|
murmur_clip[:murmur_fade_in_samples] *= murmur_fade_in |
|
|
if murmur_fade_out_samples > 0: |
|
|
murmur_fade_out = np.linspace(1, 0, murmur_fade_out_samples) |
|
|
murmur_clip[-murmur_fade_out_samples:] *= murmur_fade_out |
|
|
|
|
|
start_idx = int(15 * sr) |
|
|
end_idx = start_idx + len(murmur_clip) |
|
|
if end_idx <= output_length: |
|
|
combined[start_idx:end_idx] += murmur_clip |
|
|
|
|
|
|
|
|
max_val = np.max(np.abs(combined)) |
|
|
if max_val > 1.0: |
|
|
combined = combined / max_val |
|
|
|
|
|
return (sr, combined) |
|
|
|
|
|
def visualize_sfx(sound_effect_clip_length, fade_in_duration, fade_out_duration, sound_effect_volume_factor): |
|
|
|
|
|
fade_in_seconds = fade_in_duration * sound_effect_clip_length |
|
|
fade_out_seconds = fade_out_duration * sound_effect_clip_length |
|
|
|
|
|
|
|
|
time_resolution = 0.01 |
|
|
times = np.arange(0, sound_effect_clip_length + time_resolution, time_resolution) |
|
|
|
|
|
|
|
|
volumes = [] |
|
|
for t in times: |
|
|
if t <= fade_in_seconds and fade_in_seconds > 0: |
|
|
|
|
|
volume = sound_effect_volume_factor * (t / fade_in_seconds) |
|
|
elif t >= sound_effect_clip_length - fade_out_seconds and fade_out_seconds > 0: |
|
|
|
|
|
fade_out_progress = (sound_effect_clip_length - t) / fade_out_seconds |
|
|
volume = sound_effect_volume_factor * fade_out_progress |
|
|
else: |
|
|
|
|
|
volume = sound_effect_volume_factor |
|
|
|
|
|
volumes.append(volume) |
|
|
|
|
|
|
|
|
plot_data = pd.DataFrame({ |
|
|
"time": times, |
|
|
"volume": volumes |
|
|
}) |
|
|
|
|
|
return plot_data |
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
sound_effect_clip_length = gr.Slider(minimum=0.5, maximum=5, value=4.0, step=0.1, label="Sound Effect Clip Length (seconds)") |
|
|
fade_in_duration = gr.Slider(minimum=0.0, maximum=1.0, value=0.2, step=0.05, label="Fade In Duration Factor", info="0.0 = no fade in, 1.0 = fade in over entire clip") |
|
|
fade_out_duration = gr.Slider(minimum=0.0, maximum=1.0, value=0.2, step=0.05, label="Fade Out Duration Factor", info="0.0 = no fade out, 1.0 = fade out over entire clip") |
|
|
sound_effect_volume_factor = gr.Slider(minimum=0.1, maximum=1.0, value=0.15, step=0.05, label="Sound Effect Volume Factor", info="0.1 is 10% of the narration volume, 1.0 is 100% of the original volume") |
|
|
visualization = gr.LinePlot(label="Sound Effect Volume Envelope", x="time", y="volume", y_lim=[0, 1]) |
|
|
generate_button = gr.Button("Generate Audio") |
|
|
with gr.Column(): |
|
|
output = gr.Audio() |
|
|
|
|
|
gr.on( |
|
|
[demo.load, sound_effect_clip_length.change, fade_in_duration.change, fade_out_duration.change, sound_effect_volume_factor.change], |
|
|
fn=visualize_sfx, |
|
|
inputs=[sound_effect_clip_length, fade_in_duration, fade_out_duration, sound_effect_volume_factor], |
|
|
outputs=visualization |
|
|
) |
|
|
generate_button.click(generate_audio, inputs=[sound_effect_clip_length, fade_in_duration, fade_out_duration, sound_effect_volume_factor], outputs=output) |
|
|
|
|
|
if __name__ == "__main__": |
|
|
demo.launch() |