music-gen-kit / script.py
yiwv's picture
wip: use model in magenta html
fde0931
raw
history blame contribute delete
No virus
3.5 kB
from tensorflow.keras.models import load_model
import numpy as np
import pretty_midi
import tensorflow as tf
from huggingface_hub import from_pretrained_keras
def mse_with_positive_pressure(y_true: tf.Tensor, y_pred: tf.Tensor):
mse = (y_true - y_pred) ** 2
positive_pressure = 10 * tf.maximum(-y_pred, 0.0)
return tf.reduce_mean(mse + positive_pressure)
model = from_pretrained_keras("yiwv/music-gen-kit-model", custom_objects={'mse_with_positive_pressure': mse_with_positive_pressure})
def predict_next_note(notes, keras_model, temperature=1.0):
assert temperature > 0
inputs = tf.expand_dims(notes, 0)
predictions = model.predict(inputs)
pitch_logits = predictions['pitch']
step = predictions['step']
duration = predictions['duration']
# print("predictions: ", predictions)
# print("Shape of pitch logits:", predictions['pitch'].shape)
# print("Content of pitch logits:", predictions['pitch'])
# print("step:", step)
# print("duration:", duration)
pitch_logits /= temperature
pitch = tf.random.categorical(pitch_logits, num_samples=1)
pitch = tf.squeeze(pitch, axis=-1)
duration = tf.squeeze(duration, axis=-1)
step = tf.squeeze(step, axis=-1)
step = tf.maximum(0, step)
duration = tf.maximum(0, duration)
print('pitch: ', pitch)
print('int(pitch.numpy()): ', int(pitch.numpy()))
return int(pitch.numpy()), float(step.numpy()), float(duration.numpy())
def notes_to_midi(notes, out_file, instrument_name):
pm = pretty_midi.PrettyMIDI()
instrument = pretty_midi.Instrument(program=pretty_midi.instrument_name_to_program(instrument_name))
prev_start = 0
for note_data in notes:
pitch = int(note_data[0])
step = note_data[1]
duration = note_data[2]
start = prev_start + step
end = start + duration
note = pretty_midi.Note(velocity=100, pitch=pitch, start=start, end=end)
instrument.notes.append(note)
prev_start = start
pm.instruments.append(instrument)
pm.write(out_file)
return out_file
def generate_music(input_text, instrument_name="Acoustic Grand Piano"):
"""
input_text example: 60,0.5,0.5,62,0.5,0.5,64,0.5,0.5
"""
input_sequence = np.fromstring(input_text, sep=',')
temperature = 2.0
num_predictions = 120
seq_length = 25
vocab_size = 128
generated_notes = []
prev_start = 0
# 入力テキストを数値のリストに変換
input_values = [float(val) for val in input_text.split(",")]
# 入力データの形状を動的に調整
num_notes = len(input_values) // 3
if num_notes > 25:
input_data = np.zeros((num_notes, 3))
else:
input_data = np.zeros((25, 3))
input_data[-num_notes:] = np.array(input_values).reshape(-1, 3)
# 音楽生成のループ
generated_notes = []
for _ in range(num_predictions):
pitch, step, duration = predict_next_note(input_data[-25:], model, temperature)
generated_notes.append((pitch, step, duration))
new_note = np.array([[pitch, step, duration]])
input_data = np.vstack([input_data, new_note])
# 生成されたノートをMIDIファイルに変換
generated_notes_array = np.array(generated_notes)
output_file_name = "generated_music.mid"
notes_to_midi(generated_notes_array, output_file_name, instrument_name)
return output_file_name
generate_music("60,0.5,0.5,62,0.5,0.5,64,0.5,0.5")