yiwv commited on
Commit
fef5319
1 Parent(s): f6a5806

fix: errors

Browse files
Files changed (6) hide show
  1. .gitattributes +1 -0
  2. .gitignore +3 -0
  3. GeneralUserGS.sf2 +3 -0
  4. app.py +65 -37
  5. requirements.txt +2 -0
  6. script.py +103 -0
.gitattributes CHANGED
@@ -40,3 +40,4 @@ model/saved_model.pb filter=lfs diff=lfs merge=lfs -text
40
  model/variables filter=lfs diff=lfs merge=lfs -text
41
  model/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
42
  model/variables/variables.index filter=lfs diff=lfs merge=lfs -text
 
 
40
  model/variables filter=lfs diff=lfs merge=lfs -text
41
  model/variables/variables.data-00000-of-00001 filter=lfs diff=lfs merge=lfs -text
42
  model/variables/variables.index filter=lfs diff=lfs merge=lfs -text
43
+ GeneralUserGS.sf2 filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+
2
+ generated_music.mid
3
+ output.wav
GeneralUserGS.sf2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f45b6b4a68b6bf3d792fcbb6d7de24dc701a0f89c5900a21ef3aaece993b839a
3
+ size 31281186
app.py CHANGED
@@ -3,31 +3,9 @@ from tensorflow.keras.models import load_model
3
  import numpy as np
4
  import pretty_midi
5
  import tensorflow as tf
6
-
7
-
8
-
9
- def mse_with_positive_pressure(y_true, y_pred):
10
- # 通常のMSE
11
- mse = tf.reduce_mean(tf.square(y_true - y_pred))
12
-
13
- return mse
14
- # # 予測が真の値よりも大きい場合のペナルティ
15
- # positive_pressure = tf.reduce_mean(tf.maximum(0.0, y_pred - y_true))
16
-
17
- # # 通常のMSEにペナルティを加える
18
- # return mse + positive_pressure
19
-
20
- # model = load_model('model/', custom_objects={'mse_with_positive_pressure': mse_with_positive_pressure})
21
-
22
- # model = load_model('model/')
23
-
24
- # model = load_model('yiwv/music-gen-kit-model/')
25
-
26
- # from huggingface_hub import hf_hub_download
27
-
28
- # model_dir = hf_hub_download(repo_id="yiwv/music-gen-kit-model", revision="main") #, subfolder="model")
29
- # model = tf.keras.models.load_model(model_dir)
30
-
31
  from huggingface_hub import from_pretrained_keras
32
 
33
 
@@ -60,38 +38,88 @@ def predict_next_note(notes, keras_model, temperature=1.0):
60
  return int(pitch.numpy()), float(step.numpy()), float(duration.numpy())
61
 
62
 
63
- def notes_to_midi(notes, out_file, instrument_name="Acoustic Grand Piano"):
 
 
 
 
 
 
64
  pm = pretty_midi.PrettyMIDI()
65
  instrument = pretty_midi.Instrument(program=pretty_midi.instrument_name_to_program(instrument_name))
66
 
 
67
  for note_data in notes:
68
- note = pretty_midi.Note(velocity=100, pitch=int(note_data[0]), start=note_data[3], end=note_data[4])
 
 
 
 
 
69
  instrument.notes.append(note)
 
70
 
71
  pm.instruments.append(instrument)
72
  pm.write(out_file)
73
  return out_file
74
 
75
 
76
- def generate_music(input_text):
 
 
 
 
 
 
 
 
 
 
 
 
 
77
  input_sequence = np.fromstring(input_text, sep=',')
78
  temperature = 2.0
79
  num_predictions = 120
 
 
 
 
80
  generated_notes = []
81
  prev_start = 0
82
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
83
  for _ in range(num_predictions):
84
- pitch, step, duration = predict_next_note(input_sequence[-3:], model, temperature)
85
- start = prev_start + step
86
- end = start + duration
87
- generated_notes.append([pitch, step, duration, start, end])
88
- input_sequence = np.append(input_sequence, [pitch, step, duration])
89
- prev_start = start
 
 
 
 
 
 
 
 
90
 
91
- output_file_name = 'output.mid'
92
- notes_to_midi(generated_notes, output_file_name)
93
 
94
- return output_file_name
95
 
96
 
97
  # Gradioインターフェースの定義
 
3
  import numpy as np
4
  import pretty_midi
5
  import tensorflow as tf
6
+ import soundfile as sf
7
+ import fluidsynth
8
+ import subprocess
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
  from huggingface_hub import from_pretrained_keras
10
 
11
 
 
38
  return int(pitch.numpy()), float(step.numpy()), float(duration.numpy())
39
 
40
 
41
+
42
+ def convert_midi_to_wav(midi_path, wav_path, soundfont_path):
43
+ cmd = ["fluidsynth", "-ni", soundfont_path, midi_path, "-F", wav_path, "-r", "44100"]
44
+ subprocess.run(cmd)
45
+
46
+
47
+ def notes_to_midi(notes, out_file, instrument_name):
48
  pm = pretty_midi.PrettyMIDI()
49
  instrument = pretty_midi.Instrument(program=pretty_midi.instrument_name_to_program(instrument_name))
50
 
51
+ prev_start = 0
52
  for note_data in notes:
53
+ pitch = int(note_data[0])
54
+ step = note_data[1]
55
+ duration = note_data[2]
56
+ start = prev_start + step
57
+ end = start + duration
58
+ note = pretty_midi.Note(velocity=100, pitch=pitch, start=start, end=end)
59
  instrument.notes.append(note)
60
+ prev_start = start
61
 
62
  pm.instruments.append(instrument)
63
  pm.write(out_file)
64
  return out_file
65
 
66
 
67
+
68
+ def convert_midi_to_wav_with_pretty_midi(midi_path, wav_path):
69
+ # MIDIファイルを読み込む
70
+ midi_data = pretty_midi.PrettyMIDI(midi_path)
71
+ # オーディオ波形を取得する
72
+ audio_data = midi_data.synthesize()
73
+ # WAVファイルとして保存する
74
+ sf.write(wav_path, audio_data.T, 44100)
75
+
76
+
77
+ def generate_music(input_text, instrument_name="Acoustic Grand Piano"):
78
+ """
79
+ input_text example: 60,0.5,0.5,62,0.5,0.5,64,0.5,0.5
80
+ """
81
  input_sequence = np.fromstring(input_text, sep=',')
82
  temperature = 2.0
83
  num_predictions = 120
84
+
85
+ seq_length = 25
86
+ vocab_size = 128
87
+
88
  generated_notes = []
89
  prev_start = 0
90
 
91
+ # 入力テキストを数値のリストに変換
92
+ input_values = [float(val) for val in input_text.split(",")]
93
+
94
+ # 入力データの形状を動的に調整
95
+ num_notes = len(input_values) // 3
96
+ if num_notes > 25:
97
+ input_data = np.zeros((num_notes, 3))
98
+ else:
99
+ input_data = np.zeros((25, 3))
100
+
101
+ input_data[-num_notes:] = np.array(input_values).reshape(-1, 3)
102
+
103
+ # 音楽生成のループ
104
+ generated_notes = []
105
  for _ in range(num_predictions):
106
+ pitch, step, duration = predict_next_note(input_data[-25:], model, temperature)
107
+ generated_notes.append((pitch, step, duration))
108
+ new_note = np.array([[pitch, step, duration]])
109
+ input_data = np.vstack([input_data, new_note])
110
+
111
+ # 生成されたノートをMIDIファイルに変換
112
+ generated_notes_array = np.array(generated_notes)
113
+ output_file_name = "generated_music.mid"
114
+ notes_to_midi(generated_notes_array, output_file_name, instrument_name)
115
+
116
+ try:
117
+ convert_midi_to_wav(output_file_name, 'output.wav', "GeneralUserGS.sf2")
118
+ except:
119
+ convert_midi_to_wav_with_pretty_midi(output_file_name, 'output.wav')
120
 
121
+ return 'output.wav'
 
122
 
 
123
 
124
 
125
  # Gradioインターフェースの定義
requirements.txt CHANGED
@@ -3,3 +3,5 @@ pyFluidSynth==1.3.2
3
  pretty-midi==0.2.10
4
  tensorflow==2.13.0
5
  keras==2.13.1
 
 
 
3
  pretty-midi==0.2.10
4
  tensorflow==2.13.0
5
  keras==2.13.1
6
+ huggingface_hub
7
+ soundfile
script.py ADDED
@@ -0,0 +1,103 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ from tensorflow.keras.models import load_model
3
+ import numpy as np
4
+ import pretty_midi
5
+ import tensorflow as tf
6
+
7
+ from huggingface_hub import from_pretrained_keras
8
+
9
+
10
+ def mse_with_positive_pressure(y_true: tf.Tensor, y_pred: tf.Tensor):
11
+ mse = (y_true - y_pred) ** 2
12
+ positive_pressure = 10 * tf.maximum(-y_pred, 0.0)
13
+ return tf.reduce_mean(mse + positive_pressure)
14
+
15
+
16
+ model = from_pretrained_keras("yiwv/music-gen-kit-model", custom_objects={'mse_with_positive_pressure': mse_with_positive_pressure})
17
+
18
+
19
+ def predict_next_note(notes, keras_model, temperature=1.0):
20
+ assert temperature > 0
21
+ inputs = tf.expand_dims(notes, 0)
22
+ predictions = model.predict(inputs)
23
+ pitch_logits = predictions['pitch']
24
+ step = predictions['step']
25
+ duration = predictions['duration']
26
+
27
+ pitch_logits /= temperature
28
+ pitch = tf.random.categorical(pitch_logits, num_samples=1)
29
+ pitch = tf.squeeze(pitch, axis=-1)
30
+ duration = tf.squeeze(duration, axis=-1)
31
+ step = tf.squeeze(step, axis=-1)
32
+
33
+ step = tf.maximum(0, step)
34
+ duration = tf.maximum(0, duration)
35
+
36
+ return int(pitch.numpy()), float(step.numpy()), float(duration.numpy())
37
+
38
+
39
+ def notes_to_midi(notes, out_file, instrument_name):
40
+ pm = pretty_midi.PrettyMIDI()
41
+ instrument = pretty_midi.Instrument(program=pretty_midi.instrument_name_to_program(instrument_name))
42
+
43
+ prev_start = 0
44
+ for note_data in notes:
45
+ pitch = int(note_data[0])
46
+ step = note_data[1]
47
+ duration = note_data[2]
48
+ start = prev_start + step
49
+ end = start + duration
50
+ note = pretty_midi.Note(velocity=100, pitch=pitch, start=start, end=end)
51
+ instrument.notes.append(note)
52
+ prev_start = start
53
+
54
+ pm.instruments.append(instrument)
55
+ pm.write(out_file)
56
+ return out_file
57
+
58
+
59
+
60
+ def generate_music(input_text, instrument_name="Acoustic Grand Piano"):
61
+ """
62
+ input_text example: 60,0.5,0.5,62,0.5,0.5,64,0.5,0.5
63
+ """
64
+ input_sequence = np.fromstring(input_text, sep=',')
65
+ temperature = 2.0
66
+ num_predictions = 120
67
+
68
+ seq_length = 25
69
+ vocab_size = 128
70
+
71
+ generated_notes = []
72
+ prev_start = 0
73
+
74
+ # 入力テキストを数値のリストに変換
75
+ input_values = [float(val) for val in input_text.split(",")]
76
+
77
+
78
+ # 入力データの形状を動的に調整
79
+ num_notes = len(input_values) // 3
80
+ if num_notes > 25:
81
+ input_data = np.zeros((num_notes, 3))
82
+ else:
83
+ input_data = np.zeros((25, 3))
84
+
85
+ input_data[-num_notes:] = np.array(input_values).reshape(-1, 3)
86
+
87
+ # 音楽生成のループ
88
+ generated_notes = []
89
+ for _ in range(num_predictions):
90
+ pitch, step, duration = predict_next_note(input_data[-25:], model, temperature)
91
+ generated_notes.append((pitch, step, duration))
92
+ new_note = np.array([[pitch, step, duration]])
93
+ input_data = np.vstack([input_data, new_note])
94
+
95
+ # 生成されたノートをMIDIファイルに変換
96
+ generated_notes_array = np.array(generated_notes)
97
+ output_file_name = "generated_music.mid"
98
+ notes_to_midi(generated_notes_array, output_file_name, instrument_name)
99
+
100
+ return output_file_name
101
+
102
+
103
+ generate_music("60,0.5,0.5,62,0.5,0.5,64,0.5,0.5")