|
|
import gradio as gr |
|
|
import json |
|
|
from datetime import datetime |
|
|
import yaml |
|
|
import time |
|
|
import re |
|
|
import os |
|
|
import os.path as op |
|
|
import torch |
|
|
import soundfile as sf |
|
|
import numpy as np |
|
|
import tempfile |
|
|
|
|
|
from download import download_model |
|
|
|
|
|
|
|
|
APP_DIR = op.dirname(op.abspath(__file__)) |
|
|
download_model(APP_DIR) |
|
|
large_model_path = op.join(APP_DIR, "ckpt", "SongGeneration-v1.5-beta") |
|
|
download_model(large_model_path, repo_id="waytan22/SongGeneration-v1.5-beta", revision="db10f47") |
|
|
print("Successful downloaded model.") |
|
|
|
|
|
|
|
|
from levo_inference import LeVoInference |
|
|
MODEL = LeVoInference(large_model_path) |
|
|
|
|
|
EXAMPLE_LYRICS = """ |
|
|
[intro-medium] |
|
|
|
|
|
[verse] |
|
|
夜晚的街灯闪烁 |
|
|
我漫步在熟悉的角落 |
|
|
回忆像潮水般涌来 |
|
|
你的笑容如此清晰 |
|
|
在心头无法抹去 |
|
|
那些曾经的甜蜜 |
|
|
如今只剩我独自回忆 |
|
|
|
|
|
[chorus] |
|
|
回忆的温度还在 |
|
|
你却已不在 |
|
|
我的心被爱填满 |
|
|
却又被思念刺痛 |
|
|
音乐的节奏奏响 |
|
|
我的心却在流浪 |
|
|
没有你的日子 |
|
|
我该如何继续向前 |
|
|
|
|
|
[inst-medium] |
|
|
|
|
|
[verse] |
|
|
手机屏幕亮起 |
|
|
是你发来的消息 |
|
|
简单的几个字 |
|
|
却让我泪流满面 |
|
|
曾经的拥抱温暖 |
|
|
如今却变得遥远 |
|
|
我多想回到从前 |
|
|
重新拥有你的陪伴 |
|
|
|
|
|
[chorus] |
|
|
回忆的温度还在 |
|
|
你却已不在 |
|
|
我的心被爱填满 |
|
|
却又被思念刺痛 |
|
|
音乐的节奏奏响 |
|
|
我的心却在流浪 |
|
|
没有你的日子 |
|
|
我该如何继续向前 |
|
|
|
|
|
[outro-medium] |
|
|
""".strip() |
|
|
|
|
|
with open(op.join(APP_DIR, 'conf/vocab.yaml'), 'r', encoding='utf-8') as file: |
|
|
STRUCTS = yaml.safe_load(file) |
|
|
|
|
|
|
|
|
def save_as_flac(sample_rate, audio_data): |
|
|
if isinstance(audio_data, tuple): |
|
|
sample_rate, audio_data = audio_data |
|
|
|
|
|
if audio_data.dtype == np.float64: |
|
|
audio_data = audio_data.astype(np.float32) |
|
|
|
|
|
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix=".flac") |
|
|
sf.write(temp_file, audio_data, sample_rate, format='FLAC') |
|
|
return temp_file.name |
|
|
|
|
|
|
|
|
|
|
|
def generate_song(lyric, description=None, prompt_audio=None, genre=None, cfg_coef=None, temperature=0.1, top_k=-1, gen_type="mixed", progress=gr.Progress(track_tqdm=True)): |
|
|
global MODEL |
|
|
global STRUCTS |
|
|
params = {'cfg_coef':cfg_coef, 'temperature':temperature, 'top_k':top_k} |
|
|
params = {k:v for k,v in params.items() if v is not None} |
|
|
vocal_structs = ['[verse]', '[chorus]', '[bridge]'] |
|
|
sample_rate = MODEL.cfg.sample_rate |
|
|
|
|
|
|
|
|
lyric = lyric.replace("[intro]", "[intro-short]").replace("[inst]", "[inst-short]").replace("[outro]", "[outro-short]") |
|
|
paragraphs = [p.strip() for p in lyric.strip().split('\n\n') if p.strip()] |
|
|
if len(paragraphs) < 1: |
|
|
return None, json.dumps("Lyrics can not be left blank") |
|
|
paragraphs_norm = [] |
|
|
vocal_flag = False |
|
|
for para in paragraphs: |
|
|
lines = para.splitlines() |
|
|
struct_tag = lines[0].strip().lower() |
|
|
if struct_tag not in STRUCTS: |
|
|
return None, json.dumps(f"Segments should start with a structure tag in {STRUCTS}") |
|
|
if struct_tag in vocal_structs: |
|
|
vocal_flag = True |
|
|
if len(lines) < 2 or not [line.strip() for line in lines[1:] if line.strip()]: |
|
|
return None, json.dumps("The following segments require lyrics: [verse], [chorus], [bridge]") |
|
|
else: |
|
|
new_para_list = [] |
|
|
for line in lines[1:]: |
|
|
new_para_list.append(re.sub(r"[^\w\s\[\]\-\u4e00-\u9fff\u3040-\u309f\u30a0-\u30ff\uac00-\ud7af\u00c0-\u017f]", "", line)) |
|
|
new_para_str = f"{struct_tag} {'.'.join(new_para_list)}" |
|
|
else: |
|
|
if len(lines) > 1: |
|
|
return None, json.dumps("The following segments should not contain lyrics: [intro], [intro-short], [intro-medium], [inst], [inst-short], [inst-medium], [outro], [outro-short], [outro-medium]") |
|
|
else: |
|
|
new_para_str = struct_tag |
|
|
paragraphs_norm.append(new_para_str) |
|
|
if not vocal_flag: |
|
|
return None, json.dumps(f"The lyric must contain at least one of the following structures: {vocal_structs}") |
|
|
lyric_norm = " ; ".join(paragraphs_norm) |
|
|
|
|
|
|
|
|
if prompt_audio is not None: |
|
|
genre = None |
|
|
description = None |
|
|
elif description is not None and description != "": |
|
|
genre = None |
|
|
|
|
|
progress(0.0, "Start Generation") |
|
|
start = time.time() |
|
|
|
|
|
audio_data = MODEL(lyric_norm, description, prompt_audio, genre, op.join(APP_DIR, "tools/new_prompt.pt"), gen_type, params).cpu().permute(1, 0).float().numpy() |
|
|
|
|
|
end = time.time() |
|
|
|
|
|
|
|
|
input_config = { |
|
|
"lyric": lyric_norm, |
|
|
"genre": genre, |
|
|
"prompt_audio": prompt_audio, |
|
|
"description": description, |
|
|
"params": params, |
|
|
"inference_duration": end - start, |
|
|
"timestamp": datetime.now().isoformat(), |
|
|
} |
|
|
|
|
|
filepath = save_as_flac(sample_rate, audio_data) |
|
|
return filepath, json.dumps(input_config, indent=2) |
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks(title="SongGeneration Demo Space") as demo: |
|
|
gr.Markdown("# 🎵 SongGeneration Demo Space") |
|
|
gr.Markdown("Demo interface for the song generation model. Provide a lyrics, and optionally an audio or text prompt, to generate a custom song. The code is in [GIT](https://github.com/tencent-ailab/SongGeneration)") |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
lyric = gr.Textbox( |
|
|
label="Lyrics", |
|
|
lines=5, |
|
|
max_lines=15, |
|
|
value=EXAMPLE_LYRICS, |
|
|
info="Each paragraph represents a segment starting with a structure tag and ending with a blank line, each line is a sentence without punctuation, segments [intro], [inst], [outro] should not contain lyrics, while [verse], [chorus], and [bridge] require lyrics.", |
|
|
placeholder="""Lyric Format |
|
|
''' |
|
|
[structure tag] |
|
|
lyrics |
|
|
|
|
|
[structure tag] |
|
|
lyrics |
|
|
''' |
|
|
1. One paragraph represents one segments, starting with a structure tag and ending with a blank line |
|
|
2. One line represents one sentence, punctuation is not recommended inside the sentence |
|
|
3. The following segments should not contain lyrics: [intro-short], [intro-medium], [inst-short], [inst-medium], [outro-short], [outro-medium] |
|
|
4. The following segments require lyrics: [verse], [chorus], [bridge] |
|
|
""" |
|
|
) |
|
|
|
|
|
with gr.Tabs(elem_id="extra-tabs"): |
|
|
with gr.Tab("Genre Select"): |
|
|
genre = gr.Radio( |
|
|
choices=["Auto", "Pop", "R&B", "Dance", "Jazz", "Folk", "Rock", "Chinese Style", "Chinese Tradition", "Metal", "Reggae", "Chinese Opera"], |
|
|
label="Genre Select(Optional)", |
|
|
value="Auto", |
|
|
interactive=True, |
|
|
elem_id="single-select-radio" |
|
|
) |
|
|
with gr.Tab("Audio Prompt"): |
|
|
prompt_audio = gr.Audio( |
|
|
label="Prompt Audio (Optional)", |
|
|
type="filepath", |
|
|
elem_id="audio-prompt" |
|
|
) |
|
|
with gr.Tab("Text Prompt"): |
|
|
gr.Markdown("For detailed usage, please refer to [here](https://github.com/tencent-ailab/SongGeneration?tab=readme-ov-file#-description-input-format)") |
|
|
description = gr.Textbox( |
|
|
label="Song Description (Optional)", |
|
|
info="Describe the gender, timbre, genre, emotion, instrument and bpm of the song. Only English is supported currently.", |
|
|
placeholder="female, dark, pop, sad, piano and drums, the bpm is 125.", |
|
|
lines=1, |
|
|
max_lines=2 |
|
|
) |
|
|
|
|
|
with gr.Accordion("Advanced Config", open=False): |
|
|
cfg_coef = gr.Slider( |
|
|
label="CFG Coefficient", |
|
|
minimum=0.1, |
|
|
maximum=3.0, |
|
|
step=0.1, |
|
|
value=1.5, |
|
|
interactive=True, |
|
|
elem_id="cfg-coef", |
|
|
) |
|
|
temperature = gr.Slider( |
|
|
label="Temperature", |
|
|
minimum=0.1, |
|
|
maximum=2.0, |
|
|
step=0.1, |
|
|
value=0.8, |
|
|
interactive=True, |
|
|
elem_id="temperature", |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Row(): |
|
|
generate_btn = gr.Button("Generate Song", variant="primary") |
|
|
generate_bgm_btn = gr.Button("Generate Pure Music", variant="primary") |
|
|
|
|
|
with gr.Column(): |
|
|
output_audio = gr.Audio(label="Generated Song", type="filepath") |
|
|
output_json = gr.JSON(label="Generated Info") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
generate_btn.click( |
|
|
fn=generate_song, |
|
|
inputs=[lyric, description, prompt_audio, genre, cfg_coef, temperature, gr.State(50)], |
|
|
outputs=[output_audio, output_json] |
|
|
) |
|
|
generate_bgm_btn.click( |
|
|
fn=generate_song, |
|
|
inputs=[lyric, description, prompt_audio, genre, cfg_coef, temperature, gr.State(50), gr.State("bgm")], |
|
|
outputs=[output_audio, output_json] |
|
|
) |
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
torch.set_num_threads(1) |
|
|
demo.launch(server_name="0.0.0.0", server_port=7860) |
|
|
|
|
|
|