Spaces:

yiwv
/

music-gen-kit

Runtime error

App Files Files Community

yiwv commited on Oct 10, 2023

Commit

fef5319

•

1 Parent(s): f6a5806

fix: errors

Browse files

Files changed (6) hide show

.gitattributes +1 -0
.gitignore +3 -0
GeneralUserGS.sf2 +3 -0
app.py +65 -37
requirements.txt +2 -0
script.py +103 -0

.gitattributes CHANGED Viewed

@@ -40,3 +40,4 @@ model/saved_model.pb filter=lfs diff=lfs merge=lfs -text
 model/variables filter=lfs diff=lfs merge=lfs -text
 model/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
 model/variables/variables.index filter=lfs diff=lfs merge=lfs -text

 model/variables filter=lfs diff=lfs merge=lfs -text
 model/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
 model/variables/variables.index filter=lfs diff=lfs merge=lfs -text
+GeneralUserGS.sf2 filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,3 @@


1	+
2	+ generated_music.mid
3	+ output.wav

GeneralUserGS.sf2 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f45b6b4a68b6bf3d792fcbb6d7de24dc701a0f89c5900a21ef3aaece993b839a
+size 31281186

app.py CHANGED Viewed

@@ -3,31 +3,9 @@ from tensorflow.keras.models import load_model
 import numpy as np
 import pretty_midi
 import tensorflow as tf
-def mse_with_positive_pressure(y_true, y_pred):
-    # 通常のMSE
-    mse = tf.reduce_mean(tf.square(y_true - y_pred))
-    return mse
-    # # 予測が真の値よりも大きい場合のペナルティ
-    # positive_pressure = tf.reduce_mean(tf.maximum(0.0, y_pred - y_true))
-    # # 通常のMSEにペナルティを加える
-    # return mse + positive_pressure
-# model = load_model('model/', custom_objects={'mse_with_positive_pressure': mse_with_positive_pressure})
-# model = load_model('model/')
-# model = load_model('yiwv/music-gen-kit-model/')
-# from huggingface_hub import hf_hub_download
-# model_dir = hf_hub_download(repo_id="yiwv/music-gen-kit-model", revision="main") #, subfolder="model")
-# model = tf.keras.models.load_model(model_dir)
 from huggingface_hub import from_pretrained_keras
@@ -60,38 +38,88 @@ def predict_next_note(notes, keras_model, temperature=1.0):
     return int(pitch.numpy()), float(step.numpy()), float(duration.numpy())
-def notes_to_midi(notes, out_file, instrument_name="Acoustic Grand Piano"):
     pm = pretty_midi.PrettyMIDI()
     instrument = pretty_midi.Instrument(program=pretty_midi.instrument_name_to_program(instrument_name))
     for note_data in notes:
-        note = pretty_midi.Note(velocity=100, pitch=int(note_data[0]), start=note_data[3], end=note_data[4])
         instrument.notes.append(note)
     pm.instruments.append(instrument)
     pm.write(out_file)
     return out_file
-def generate_music(input_text):
     input_sequence = np.fromstring(input_text, sep=',')
     temperature = 2.0
     num_predictions = 120
     generated_notes = []
     prev_start = 0
     for _ in range(num_predictions):
-        pitch, step, duration = predict_next_note(input_sequence[-3:], model, temperature)
-        start = prev_start + step
-        end = start + duration
-        generated_notes.append([pitch, step, duration, start, end])
-        input_sequence = np.append(input_sequence, [pitch, step, duration])
-        prev_start = start
-    output_file_name = 'output.mid'
-    notes_to_midi(generated_notes, output_file_name)
-    return output_file_name
 # Gradioインターフェースの定義

 import numpy as np
 import pretty_midi
 import tensorflow as tf
+import soundfile as sf
+import fluidsynth
+import subprocess
 from huggingface_hub import from_pretrained_keras
     return int(pitch.numpy()), float(step.numpy()), float(duration.numpy())
+def convert_midi_to_wav(midi_path, wav_path, soundfont_path):
+    cmd = ["fluidsynth", "-ni", soundfont_path, midi_path, "-F", wav_path, "-r", "44100"]
+    subprocess.run(cmd)
+def notes_to_midi(notes, out_file, instrument_name):
     pm = pretty_midi.PrettyMIDI()
     instrument = pretty_midi.Instrument(program=pretty_midi.instrument_name_to_program(instrument_name))
+    prev_start = 0
     for note_data in notes:
+        pitch = int(note_data[0])
+        step = note_data[1]
+        duration = note_data[2]
+        start = prev_start + step
+        end = start + duration
+        note = pretty_midi.Note(velocity=100, pitch=pitch, start=start, end=end)
         instrument.notes.append(note)
+        prev_start = start
     pm.instruments.append(instrument)
     pm.write(out_file)
     return out_file
+def convert_midi_to_wav_with_pretty_midi(midi_path, wav_path):
+    # MIDIファイルを読み込む
+    midi_data = pretty_midi.PrettyMIDI(midi_path)
+    # オーディオ波形を取得する
+    audio_data = midi_data.synthesize()
+    # WAVファイルとして保存する
+    sf.write(wav_path, audio_data.T, 44100)
+def generate_music(input_text, instrument_name="Acoustic Grand Piano"):
+    """
+    input_text example: 60,0.5,0.5,62,0.5,0.5,64,0.5,0.5
+    """
     input_sequence = np.fromstring(input_text, sep=',')
     temperature = 2.0
     num_predictions = 120
+    seq_length = 25
+    vocab_size = 128
     generated_notes = []
     prev_start = 0
+    # 入力テキストを数値のリストに変換
+    input_values = [float(val) for val in input_text.split(",")]
+    # 入力データの形状を動的に調整
+    num_notes = len(input_values) // 3
+    if num_notes > 25:
+        input_data = np.zeros((num_notes, 3))
+    else:
+        input_data = np.zeros((25, 3))
+    input_data[-num_notes:] = np.array(input_values).reshape(-1, 3)
+    # 音楽生成のループ
+    generated_notes = []
     for _ in range(num_predictions):
+        pitch, step, duration = predict_next_note(input_data[-25:], model, temperature)
+        generated_notes.append((pitch, step, duration))
+        new_note = np.array([[pitch, step, duration]])
+        input_data = np.vstack([input_data, new_note])
+    # 生成されたノートをMIDIファイルに変換
+    generated_notes_array = np.array(generated_notes)
+    output_file_name = "generated_music.mid"
+    notes_to_midi(generated_notes_array, output_file_name, instrument_name)
+    try:
+        convert_midi_to_wav(output_file_name, 'output.wav', "GeneralUserGS.sf2")
+    except:
+        convert_midi_to_wav_with_pretty_midi(output_file_name, 'output.wav')
+    return 'output.wav'
 # Gradioインターフェースの定義

requirements.txt CHANGED Viewed

@@ -3,3 +3,5 @@ pyFluidSynth==1.3.2
 pretty-midi==0.2.10
 tensorflow==2.13.0
 keras==2.13.1

 pretty-midi==0.2.10
 tensorflow==2.13.0
 keras==2.13.1
+huggingface_hub
+soundfile

script.py ADDED Viewed

	@@ -0,0 +1,103 @@

+from tensorflow.keras.models import load_model
+import numpy as np
+import pretty_midi
+import tensorflow as tf
+from huggingface_hub import from_pretrained_keras
+def mse_with_positive_pressure(y_true: tf.Tensor, y_pred: tf.Tensor):
+    mse = (y_true - y_pred) ** 2
+    positive_pressure = 10 * tf.maximum(-y_pred, 0.0)
+    return tf.reduce_mean(mse + positive_pressure)
+model = from_pretrained_keras("yiwv/music-gen-kit-model", custom_objects={'mse_with_positive_pressure': mse_with_positive_pressure})
+def predict_next_note(notes, keras_model, temperature=1.0):
+    assert temperature > 0
+    inputs = tf.expand_dims(notes, 0)
+    predictions = model.predict(inputs)
+    pitch_logits = predictions['pitch']
+    step = predictions['step']
+    duration = predictions['duration']
+    pitch_logits /= temperature
+    pitch = tf.random.categorical(pitch_logits, num_samples=1)
+    pitch = tf.squeeze(pitch, axis=-1)
+    duration = tf.squeeze(duration, axis=-1)
+    step = tf.squeeze(step, axis=-1)
+    step = tf.maximum(0, step)
+    duration = tf.maximum(0, duration)
+    return int(pitch.numpy()), float(step.numpy()), float(duration.numpy())
+def notes_to_midi(notes, out_file, instrument_name):
+    pm = pretty_midi.PrettyMIDI()
+    instrument = pretty_midi.Instrument(program=pretty_midi.instrument_name_to_program(instrument_name))
+    prev_start = 0
+    for note_data in notes:
+        pitch = int(note_data[0])
+        step = note_data[1]
+        duration = note_data[2]
+        start = prev_start + step
+        end = start + duration
+        note = pretty_midi.Note(velocity=100, pitch=pitch, start=start, end=end)
+        instrument.notes.append(note)
+        prev_start = start
+    pm.instruments.append(instrument)
+    pm.write(out_file)
+    return out_file
+def generate_music(input_text, instrument_name="Acoustic Grand Piano"):
+    """
+    input_text example: 60,0.5,0.5,62,0.5,0.5,64,0.5,0.5
+    """
+    input_sequence = np.fromstring(input_text, sep=',')
+    temperature = 2.0
+    num_predictions = 120
+    seq_length = 25
+    vocab_size = 128
+    generated_notes = []
+    prev_start = 0
+    # 入力テキストを数値のリストに変換
+    input_values = [float(val) for val in input_text.split(",")]
+    # 入力データの形状を動的に調整
+    num_notes = len(input_values) // 3
+    if num_notes > 25:
+        input_data = np.zeros((num_notes, 3))
+    else:
+        input_data = np.zeros((25, 3))
+    input_data[-num_notes:] = np.array(input_values).reshape(-1, 3)
+    # 音楽生成のループ
+    generated_notes = []
+    for _ in range(num_predictions):
+        pitch, step, duration = predict_next_note(input_data[-25:], model, temperature)
+        generated_notes.append((pitch, step, duration))
+        new_note = np.array([[pitch, step, duration]])
+        input_data = np.vstack([input_data, new_note])
+    # 生成されたノートをMIDIファイルに変換
+    generated_notes_array = np.array(generated_notes)
+    output_file_name = "generated_music.mid"
+    notes_to_midi(generated_notes_array, output_file_name, instrument_name)
+    return output_file_name
+generate_music("60,0.5,0.5,62,0.5,0.5,64,0.5,0.5")