import gradio as gr from tensorflow.keras.models import load_model import numpy as np import pretty_midi import tensorflow as tf import soundfile as sf # import fluidsynth import subprocess from huggingface_hub import from_pretrained_keras def mse_with_positive_pressure(y_true: tf.Tensor, y_pred: tf.Tensor): mse = (y_true - y_pred) ** 2 positive_pressure = 10 * tf.maximum(-y_pred, 0.0) return tf.reduce_mean(mse + positive_pressure) model = from_pretrained_keras("yiwv/music-gen-kit-model", custom_objects={'mse_with_positive_pressure': mse_with_positive_pressure}) def predict_next_note(notes, keras_model, temperature=1.0): assert temperature > 0 inputs = tf.expand_dims(notes, 0) predictions = model.predict(inputs) pitch_logits = predictions['pitch'] step = predictions['step'] duration = predictions['duration'] pitch_logits /= temperature pitch = tf.random.categorical(pitch_logits, num_samples=1) pitch = tf.squeeze(pitch, axis=-1) duration = tf.squeeze(duration, axis=-1) step = tf.squeeze(step, axis=-1) step = tf.maximum(0, step) duration = tf.maximum(0, duration) return int(pitch.numpy()), float(step.numpy()), float(duration.numpy()) # def convert_midi_to_wav(midi_path, wav_path, soundfont_path): # cmd = ["fluidsynth", "-ni", soundfont_path, midi_path, "-F", wav_path, "-r", "44100"] # subprocess.run(cmd) def notes_to_midi(notes, out_file, instrument_name): pm = pretty_midi.PrettyMIDI() instrument = pretty_midi.Instrument(program=pretty_midi.instrument_name_to_program(instrument_name)) prev_start = 0 for note_data in notes: pitch = int(note_data[0]) step = note_data[1] duration = note_data[2] start = prev_start + step end = start + duration note = pretty_midi.Note(velocity=100, pitch=pitch, start=start, end=end) instrument.notes.append(note) prev_start = start pm.instruments.append(instrument) pm.write(out_file) return out_file def convert_midi_to_wav_with_pretty_midi(midi_path, wav_path): # MIDIファイルを読み込む midi_data = pretty_midi.PrettyMIDI(midi_path) # オーディオ波形を取得する audio_data = midi_data.synthesize() # WAVファイルとして保存する sf.write(wav_path, audio_data.T, 44100) def generate_music(input_text, instrument_name="Acoustic Grand Piano"): """ input_text example: 60,0.5,0.5,62,0.5,0.5,64,0.5,0.5 """ input_sequence = np.fromstring(input_text, sep=',') temperature = 2.0 num_predictions = 120 seq_length = 25 vocab_size = 128 generated_notes = [] prev_start = 0 # 入力テキストを数値のリストに変換 input_values = [float(val) for val in input_text.split(",")] # 入力データの形状を動的に調整 num_notes = len(input_values) // 3 if num_notes > 25: input_data = np.zeros((num_notes, 3)) else: input_data = np.zeros((25, 3)) input_data[-num_notes:] = np.array(input_values).reshape(-1, 3) # 音楽生成のループ generated_notes = [] for _ in range(num_predictions): pitch, step, duration = predict_next_note(input_data[-25:], model, temperature) generated_notes.append((pitch, step, duration)) new_note = np.array([[pitch, step, duration]]) input_data = np.vstack([input_data, new_note]) # 生成されたノートをMIDIファイルに変換 generated_notes_array = np.array(generated_notes) output_file_name = "generated_music.mid" notes_to_midi(generated_notes_array, output_file_name, instrument_name) # try: # convert_midi_to_wav(output_file_name, 'output.wav', "GeneralUserGS.sf2") # except: convert_midi_to_wav_with_pretty_midi(output_file_name, 'output.wav') return 'output.wav' # Gradioインターフェースの定義 iface = gr.Interface(fn=generate_music, inputs='text', outputs='audio') # iface.launch() iface.launch(debug=True, share=True)