Spaces:

sweetcocoa
/

pop2piano

Sleeping

App Files Files Community

pop2piano / app.py

sweetcocoa

refactor ui

71a2b8b 11 months ago

raw

history blame contribute delete

No virus

6.71 kB

	import os
	import binascii
	import warnings

	import gradio as gr
	import librosa
	import numpy as np
	import torch
	import pretty_midi
	import pytube as pt

	from pytube.exceptions import VideoUnavailable
	from transformers import Pop2PianoForConditionalGeneration, Pop2PianoProcessor

	from utils import mp3_write, normalize

	yt_video_dir = "./yt_dir"
	outputs_dir = "./midi_wav_outputs"
	os.makedirs(outputs_dir, exist_ok=True)
	os.makedirs(yt_video_dir, exist_ok=True)

	device = "cuda" if torch.cuda.is_available() else "cpu"
	model = Pop2PianoForConditionalGeneration.from_pretrained("sweetcocoa/pop2piano").to(device)
	processor = Pop2PianoProcessor.from_pretrained("sweetcocoa/pop2piano")
	composers = model.generation_config.composer_to_feature_token.keys()


	def get_audio_from_yt_video(yt_link: str):
	try:
	yt = pt.YouTube(yt_link)
	t = yt.streams.filter(only_audio=True)
	filename = os.path.join(yt_video_dir, binascii.hexlify(os.urandom(8)).decode() + ".mp4")
	t[0].download(filename=filename)
	except VideoUnavailable as e:
	warnings.warn(f"Video Not Found at {yt_link} ({e})")
	filename = None

	return filename, filename


	def inference(file_uploaded, composer):
	# to save the native sampling rate of the file, sr=None is used, but this can cause some silent errors where the
	# generated output will not be upto the desired quality. If that happens please consider switching sr to 44100 Hz.
	pop_y, sr = librosa.load(file_uploaded, sr=None)

	inputs = processor(audio=pop_y, sampling_rate=sr, return_tensors="pt").to(device)
	model_output = model.generate(input_features=inputs["input_features"], composer=composer)
	tokenizer_output = processor.batch_decode(
	token_ids=model_output.to("cpu"), feature_extractor_output=inputs.to("cpu")
	)["pretty_midi_objects"]

	return prepare_output_file(tokenizer_output, sr, pop_y)


	def prepare_output_file(tokenizer_output: pretty_midi.PrettyMIDI, sr: int, pop_y: np.ndarray):
	# Add some random values so that no two file names are same
	output_file_name = "p2p_" + binascii.hexlify(os.urandom(8)).decode()
	midi_output = os.path.join(outputs_dir, output_file_name + ".mid")

	# write the .mid and its wav files
	tokenizer_output[0].write(midi_output)
	midi_y: np.ndarray = tokenizer_output[0].fluidsynth(sr)
	midi_y_path: str = midi_output.replace(".mid", ".mp3")
	mp3_write(midi_y_path, sr, normalize(midi_y), normalized=True)

	# stack stereo audio
	if len(pop_y) > len(midi_y):
	midi_y = np.pad(midi_y, (0, len(pop_y) - len(midi_y)))
	elif len(pop_y) < len(midi_y):
	pop_y = np.pad(pop_y, (0, -len(pop_y) + len(midi_y)))
	stereo = np.stack((midi_y, pop_y * 0.5))

	# write stereo audio
	stereo_path = midi_output.replace(".mid", ".mix.mp3")
	mp3_write(stereo_path, sr, normalize(stereo.T), normalized=True)

	return midi_y_path, midi_y_path, midi_output, stereo_path, stereo_path


	block = gr.Blocks()

	with block:
	gr.HTML(
	"""
	<div style="text-align: center; max-width: 700px; margin: 0 auto;">
	<div
	style="
	display: inline-flex;
	align-items: center;
	gap: 0.8rem;
	font-size: 1.75rem;
	"
	>
	<h1 style="font-weight: 900; margin-bottom: 7px;">
	Pop2piano
	</h1>
	</div>
	<p style="margin-bottom: 10px; font-size: 94%">
	A demo for Pop2Piano:Pop Audio-based Piano Cover Generation.<br>
	Please select the composer(Arranger) and upload the pop audio or enter the YouTube link and then click Generate.
	</p>
	</div>
	"""
	)
	with gr.Group():
	with gr.Column():
	with gr.Blocks() as audio_select:
	with gr.Tab("Upload Audio"):
	file_uploaded = gr.Audio(label="Upload an audio", type="filepath")
	with gr.Tab("YouTube url"):
	with gr.Row():
	yt_link = gr.Textbox(
	label="Enter YouTube Link of the Video", autofocus=True, lines=3
	)
	yt_btn = gr.Button("Download Audio from YouTube Link", size="lg")
	yt_audio_path = gr.Audio(
	label="Audio Extracted from the YouTube Video", interactive=False
	)
	yt_btn.click(
	get_audio_from_yt_video,
	inputs=[yt_link],
	outputs=[yt_audio_path, file_uploaded],
	)
	with gr.Column():
	composer = gr.Dropdown(label="Arranger", choices=composers, value="composer1")
	generate_btn = gr.Button("Generate")

	with gr.Group():
	gr.HTML(
	"""
	<div> <h3> <center> Listen to the generated MIDI. </h3> </div>
	"""
	)
	with gr.Row().style(mobile_collapse=False, equal_height=True):
	stereo_mix1 = gr.Audio(label="Listen to the Stereo Mix")
	wav_output1 = gr.Audio(label="Listen to the Generated MIDI")

	with gr.Row():
	stereo_mix2 = gr.File(label="Download the Stereo Mix (.mp3")
	wav_output2 = gr.File(label="Download the Generated MIDI (.mp3)")
	midi_output = gr.File(label="Download the Generated MIDI (.mid)")
	generate_btn.click(
	inference,
	inputs=[file_uploaded, composer],
	outputs=[wav_output1, wav_output2, midi_output, stereo_mix1, stereo_mix2],
	)

	with gr.Group():
	gr.Examples(
	[
	["./examples/custom_song.mp3", "composer1"],
	],
	fn=inference,
	inputs=[file_uploaded, composer],
	outputs=[wav_output1, wav_output2, midi_output, stereo_mix1, stereo_mix2],
	cache_examples=True,
	)

	gr.HTML(
	"""
	<div class="footer">
	<center><p><a href="http://sweetcocoa.github.io/pop2piano_samples" style="text-decoration: underline;" target="_blank">Project Page</a>
	<center><a href="https://huggingface.co/docs/transformers/main/model_doc/pop2piano" style="text-decoration: underline;" target="_blank">HuggingFace Model Docs</a>
	<center><a href="https://github.com/sweetcocoa/pop2piano" style="text-decoration: underline;" target="_blank">Github</a>
	</p>
	</div>
	"""
	)

	block.launch(debug=False)