sounddromcom / app.py
Alexxggs's picture
Update app.py
b2b39bb
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
# This source code is licensed under the license found in the
# LICENSE file in the root directory of this source tree.
# Updated to account for UI changes from https://github.com/rkfg/audiocraft/blob/long/app.py
# also released under the MIT license.
import argparse
from concurrent.futures import ProcessPoolExecutor
import os
import subprocess as sp
from tempfile import NamedTemporaryFile
import time
import warnings
import torch
import gradio as gr
from audiocraft.data.audio_utils import convert_audio
from audiocraft.data.audio import audio_write
from audiocraft.models import MusicGen
MODEL = None # Last used model
IS_BATCHED = "facebook/MusicGen" in os.environ.get('SPACE_ID', '')
MAX_BATCH_SIZE = 6
BATCHED_DURATION = 15
INTERRUPTING = False
# We have to wrap subprocess call to clean a bit the log when using gr.make_waveform
_old_call = sp.call
def _call_nostderr(*args, **kwargs):
# Avoid ffmpeg vomitting on the logs.
kwargs['stderr'] = sp.DEVNULL
kwargs['stdout'] = sp.DEVNULL
_old_call(*args, **kwargs)
sp.call = _call_nostderr
# Preallocating the pool of processes.
pool = ProcessPoolExecutor(3)
pool.__enter__()
def interrupt():
global INTERRUPTING
INTERRUPTING = True
def make_waveform(*args, **kwargs):
# Further remove some warnings.
be = time.time()
with warnings.catch_warnings():
warnings.simplefilter('ignore')
out = gr.make_waveform(*args, **kwargs)
print("Make a video took", time.time() - be)
return out
def load_model(version='melody'):
global MODEL
print("Loading model", version)
if MODEL is None or MODEL.name != version:
MODEL = MusicGen.get_pretrained(version)
def _do_predictions(texts, melodies, duration, progress=False, **gen_kwargs):
MODEL.set_generation_params(duration=duration, **gen_kwargs)
print("new batch", len(texts), texts, [None if m is None else (m[0], m[1].shape) for m in melodies])
be = time.time()
processed_melodies = []
target_sr = 32000
target_ac = 1
for melody in melodies:
if melody is None:
processed_melodies.append(None)
else:
sr, melody = melody[0], torch.from_numpy(melody[1]).to(MODEL.device).float().t()
if melody.dim() == 1:
melody = melody[None]
melody = melody[..., :int(sr * duration)]
melody = convert_audio(melody, sr, target_sr, target_ac)
processed_melodies.append(melody)
if any(m is not None for m in processed_melodies):
outputs = MODEL.generate_with_chroma(
descriptions=texts,
melody_wavs=processed_melodies,
melody_sample_rate=target_sr,
progress=progress,
)
else:
outputs = MODEL.generate(texts, progress=progress)
outputs = outputs.detach().cpu().float()
out_files = []
for output in outputs:
with NamedTemporaryFile("wb", suffix=".wav", delete=False) as file:
audio_write(
file.name, output, MODEL.sample_rate, strategy="loudness",
loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
out_files.append(pool.submit(make_waveform, file.name))
res = [out_file.result() for out_file in out_files]
print("batch finished", len(texts), time.time() - be)
return res
def predict_batched(texts, melodies):
max_text_length = 512
texts = [text[:max_text_length] for text in texts]
load_model('melody')
res = _do_predictions(texts, melodies, BATCHED_DURATION)
return [res]
def predict_full(model, text, melody, duration, topk, topp, temperature, cfg_coef, progress=gr.Progress()):
global INTERRUPTING
INTERRUPTING = False
topk = int(topk)
load_model(model)
def _progress(generated, to_generate):
progress((generated, to_generate))
if INTERRUPTING:
raise gr.Error("Interrupted.")
MODEL.set_custom_progress_callback(_progress)
outs = _do_predictions(
[text], [melody], duration, progress=True,
top_k=topk, top_p=topp, temperature=temperature, cfg_coef=cfg_coef)
return outs[0]
def ui_full(launch_kwargs):
with gr.Blocks() as interface:
gr.Markdown(
"""
"""
)
with gr.Row():
with gr.Column():
with gr.Row():
text = gr.Text(label="Текст пример (bass drum cyberpunk)", interactive=True)
melody = gr.Audio(source="upload", type="numpy", label="Melody Condition (необязательно)", interactive=True)
with gr.Row():
submit = gr.Button("Создать")
# Adapted from https://github.com/rkfg/audiocraft/blob/long/app.py, MIT license.
_ = gr.Button("Остановить").click(fn=interrupt, queue=False)
with gr.Row():
model = gr.Radio(["melody", "medium", "small", "large"], label="Тип трека", value="melody", interactive=True)
with gr.Row():
duration = gr.Slider(minimum=1, maximum=120, value=10, label="Время трека(seconds)", interactive=True)
with gr.Row():
topk = gr.Number(label="Top-k", value=250, interactive=True)
topp = gr.Number(label="Top-p", value=0, interactive=True)
temperature = gr.Number(label="Temperature", value=1.0, interactive=True)
cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True)
with gr.Column():
output = gr.Video(label="MP3 файл генерации")
submit.click(predict_full, inputs=[model, text, melody, duration, topk, topp, temperature, cfg_coef], outputs=[output])
gr.Markdown(
"""
"""
)
interface.queue().launch(**launch_kwargs)
def ui_batched(launch_kwargs):
with gr.Blocks() as demo:
gr.Markdown(
"""
"""
)
with gr.Row():
with gr.Column():
with gr.Row():
text = gr.Text(label="Describe your music", lines=2, interactive=True)
melody = gr.Audio(source="upload", type="numpy", label="Condition on a melody (optional)", interactive=True)
with gr.Row():
submit = gr.Button("Generate")
with gr.Column():
output = gr.Video(label="Generated Music")
submit.click(predict_batched, inputs=[text, melody], outputs=[output], batch=True, max_batch_size=MAX_BATCH_SIZE)
gr.Markdown("""
""")
demo.queue(max_size=8 * 4).launch(**launch_kwargs)
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
'--listen',
type=str,
default='0.0.0.0' if 'SPACE_ID' in os.environ else '127.0.0.1',
help='IP to listen on for connections to Gradio',
)
parser.add_argument(
'--username', type=str, default='', help='Username for authentication'
)
parser.add_argument(
'--password', type=str, default='', help='Password for authentication'
)
parser.add_argument(
'--server_port',
type=int,
default=0,
help='Port to run the server listener on',
)
parser.add_argument(
'--inbrowser', action='store_true', help='Open in browser'
)
parser.add_argument(
'--share', action='store_true', help='Share the gradio UI'
)
args = parser.parse_args()
launch_kwargs = {}
launch_kwargs['server_name'] = args.listen
if args.username and args.password:
launch_kwargs['auth'] = (args.username, args.password)
if args.server_port:
launch_kwargs['server_port'] = args.server_port
if args.inbrowser:
launch_kwargs['inbrowser'] = args.inbrowser
if args.share:
launch_kwargs['share'] = args.share
# Show the interface
if IS_BATCHED:
ui_batched(launch_kwargs)
else:
ui_full(launch_kwargs)