|
|
import gradio as gr |
|
|
import torchaudio |
|
|
import tempfile |
|
|
from audiocraft.models import MusicGen |
|
|
from bark import SAMPLE_RATE as BARK_SAMPLE_RATE, generate_audio as bark_generate_audio |
|
|
from pydub import AudioSegment |
|
|
|
|
|
|
|
|
musicgen = MusicGen.get_pretrained('facebook/musicgen-small') |
|
|
|
|
|
|
|
|
def generate_song(lyrics, genre_prompt): |
|
|
|
|
|
vocals = bark_generate_audio(lyrics, history_prompt="v2/en_speaker_6") |
|
|
|
|
|
|
|
|
vocals_path = tempfile.mktemp(suffix=".wav") |
|
|
torchaudio.save(vocals_path, vocals.squeeze(0).cpu(), BARK_SAMPLE_RATE) |
|
|
|
|
|
|
|
|
musicgen.set_generation_params(duration=15) |
|
|
music = musicgen.generate([genre_prompt]) |
|
|
music_path = tempfile.mktemp(suffix=".wav") |
|
|
torchaudio.save(music_path, music[0].cpu(), 32000) |
|
|
|
|
|
|
|
|
vocals_seg = AudioSegment.from_wav(vocals_path) |
|
|
music_seg = AudioSegment.from_wav(music_path) |
|
|
|
|
|
|
|
|
mixed = music_seg.overlay(vocals_seg.set_frame_rate(32000).set_channels(1)) |
|
|
output_path = tempfile.mktemp(suffix=".wav") |
|
|
mixed.export(output_path, format="wav") |
|
|
|
|
|
return output_path |
|
|
|
|
|
|
|
|
iface = gr.Interface( |
|
|
fn=generate_song, |
|
|
inputs=[ |
|
|
gr.Textbox(label="Enter Lyrics", lines=4), |
|
|
gr.Textbox(label="Enter Genre (e.g., 'hip-hop with 808s')") |
|
|
], |
|
|
outputs=gr.Audio(label="Generated Song") |
|
|
) |
|
|
|
|
|
|
|
|
iface. |
|
|
launch() |