Spaces:

yiwv
/

music-gen-kit

Runtime error

App Files Files Community

music-gen-kit / app.py

yiwv

not use fluidsynth for haggingface space

4f0fbd3 9 months ago

raw

history blame contribute delete

No virus

4.06 kB

	import gradio as gr
	from tensorflow.keras.models import load_model
	import numpy as np
	import pretty_midi
	import tensorflow as tf
	import soundfile as sf
	# import fluidsynth
	import subprocess
	from huggingface_hub import from_pretrained_keras


	def mse_with_positive_pressure(y_true: tf.Tensor, y_pred: tf.Tensor):
	mse = (y_true - y_pred) ** 2
	positive_pressure = 10 * tf.maximum(-y_pred, 0.0)
	return tf.reduce_mean(mse + positive_pressure)


	model = from_pretrained_keras("yiwv/music-gen-kit-model", custom_objects={'mse_with_positive_pressure': mse_with_positive_pressure})


	def predict_next_note(notes, keras_model, temperature=1.0):
	assert temperature > 0
	inputs = tf.expand_dims(notes, 0)
	predictions = model.predict(inputs)
	pitch_logits = predictions['pitch']
	step = predictions['step']
	duration = predictions['duration']

	pitch_logits /= temperature
	pitch = tf.random.categorical(pitch_logits, num_samples=1)
	pitch = tf.squeeze(pitch, axis=-1)
	duration = tf.squeeze(duration, axis=-1)
	step = tf.squeeze(step, axis=-1)

	step = tf.maximum(0, step)
	duration = tf.maximum(0, duration)

	return int(pitch.numpy()), float(step.numpy()), float(duration.numpy())



	# def convert_midi_to_wav(midi_path, wav_path, soundfont_path):
	# cmd = ["fluidsynth", "-ni", soundfont_path, midi_path, "-F", wav_path, "-r", "44100"]
	# subprocess.run(cmd)


	def notes_to_midi(notes, out_file, instrument_name):
	pm = pretty_midi.PrettyMIDI()
	instrument = pretty_midi.Instrument(program=pretty_midi.instrument_name_to_program(instrument_name))

	prev_start = 0
	for note_data in notes:
	pitch = int(note_data[0])
	step = note_data[1]
	duration = note_data[2]
	start = prev_start + step
	end = start + duration
	note = pretty_midi.Note(velocity=100, pitch=pitch, start=start, end=end)
	instrument.notes.append(note)
	prev_start = start

	pm.instruments.append(instrument)
	pm.write(out_file)
	return out_file



	def convert_midi_to_wav_with_pretty_midi(midi_path, wav_path):
	# MIDIファイルを読み込む
	midi_data = pretty_midi.PrettyMIDI(midi_path)
	# オーディオ波形を取得する
	audio_data = midi_data.synthesize()
	# WAVファイルとして保存する
	sf.write(wav_path, audio_data.T, 44100)


	def generate_music(input_text, instrument_name="Acoustic Grand Piano"):
	"""
	input_text example: 60,0.5,0.5,62,0.5,0.5,64,0.5,0.5
	"""
	input_sequence = np.fromstring(input_text, sep=',')
	temperature = 2.0
	num_predictions = 120

	seq_length = 25
	vocab_size = 128

	generated_notes = []
	prev_start = 0

	# 入力テキストを数値のリストに変換
	input_values = [float(val) for val in input_text.split(",")]

	# 入力データの形状を動的に調整
	num_notes = len(input_values) // 3
	if num_notes > 25:
	input_data = np.zeros((num_notes, 3))
	else:
	input_data = np.zeros((25, 3))

	input_data[-num_notes:] = np.array(input_values).reshape(-1, 3)

	# 音楽生成のループ
	generated_notes = []
	for _ in range(num_predictions):
	pitch, step, duration = predict_next_note(input_data[-25:], model, temperature)
	generated_notes.append((pitch, step, duration))
	new_note = np.array([[pitch, step, duration]])
	input_data = np.vstack([input_data, new_note])

	# 生成されたノートをMIDIファイルに変換
	generated_notes_array = np.array(generated_notes)
	output_file_name = "generated_music.mid"
	notes_to_midi(generated_notes_array, output_file_name, instrument_name)

	# try:
	# convert_midi_to_wav(output_file_name, 'output.wav', "GeneralUserGS.sf2")
	# except:
	convert_midi_to_wav_with_pretty_midi(output_file_name, 'output.wav')

	return 'output.wav'



	# Gradioインターフェースの定義
	iface = gr.Interface(fn=generate_music, inputs='text', outputs='audio')
	# iface.launch()
	iface.launch(debug=True, share=True)