import gradio as gr import numpy as np from spleeter.separator import Separator import soundfile as sf import base64 # Spleeter separation def spleeter_separate(audio): separator = Separator('spleeter:5stems') prediction = separator.separate(audio) return prediction['vocals'], prediction['accompaniment'], prediction['bass'], prediction['drums'], prediction['other'] # Function to control the volume of each stem def adjust_volume(stems, volumes): adjusted_stems = [] for stem, volume in zip(stems, volumes): adjusted_stem = stem * volume adjusted_stems.append(adjusted_stem) return adjusted_stems # Function to handle the separation and volume adjustment def process_audio(audio, volumes): stems = spleeter_separate(audio) adjusted_stems = adjust_volume(stems, volumes) reconstructed_audio = sum(adjusted_stems) return reconstructed_audio.astype(np.float32) # Gradio interface def separate_audio(audio, vocals, accompaniment, bass, drums, other): audio = np.frombuffer(base64.b64decode(audio), dtype=np.float32) volumes = [vocals, accompaniment, bass, drums, other] reconstructed_audio = process_audio(audio, volumes) return base64.b64encode(reconstructed_audio.tobytes()).decode() iface = gr.Interface( fn=separate_audio, inputs=[ gr.inputs.Audio(label="Audio file"), gr.inputs.Slider(0.0, 1.0, step=0.1, label="Vocals"), gr.inputs.Slider(0.0, 1.0, step=0.1, label="Accompaniment"), gr.inputs.Slider(0.0, 1.0, step=0.1, label="Bass"), gr.inputs.Slider(0.0, 1.0, step=0.1, label="Drums"), gr.inputs.Slider(0.0, 1.0, step=0.1, label="Other") ], outputs=gr.outputs.Audio(label="Separated Audio", type="numpy"), title="Song Stem Separation", description="Isolate vocals, accompaniment, bass, and drums of any song using the Spleeter model." ) iface.launch()