Spaces:

asigalov61
/

Inpaint-Music-Transformer

Running on Zero

App Files Files Community

asigalov61 commited on Apr 5

Commit

65d99ea

•

1 Parent(s): 28c7a7a

Update app.py

Browse files

Files changed (1) hide show

app.py +120 -125

app.py CHANGED Viewed

@@ -23,7 +23,7 @@ in_space = os.getenv("SYSTEM") == "spaces"
 # =================================================================================================
 @spaces.GPU
-def GenerateAccompaniment(input_midi, input_num_tokens, input_conditioning_type, input_strip_notes):
     print('=' * 70)
     print('Req start time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT)))
     start_time = reqtime.time()
@@ -31,7 +31,7 @@ def GenerateAccompaniment(input_midi, input_num_tokens, input_conditioning_type,
     print('Loading model...')
     SEQ_LEN = 8192 # Models seq len
-    PAD_IDX = 707 # Models pad index
     DEVICE = 'cuda' # 'cuda'
     # instantiate the model
@@ -39,7 +39,7 @@ def GenerateAccompaniment(input_midi, input_num_tokens, input_conditioning_type,
     model = TransformerWrapper(
         num_tokens = PAD_IDX+1,
         max_seq_len = SEQ_LEN,
-        attn_layers = Decoder(dim = 2048, depth = 4, heads = 16, attn_flash = True)
         )
     model = AutoregressiveWrapper(model, ignore_index = PAD_IDX)
@@ -50,7 +50,7 @@ def GenerateAccompaniment(input_midi, input_num_tokens, input_conditioning_type,
     print('Loading model checkpoint...')
     model.load_state_dict(
-        torch.load('Chords_Progressions_Transformer_Small_2048_Trained_Model_12947_steps_0.9316_loss_0.7386_acc.pth',
                    map_location=DEVICE))
     print('=' * 70)
@@ -59,7 +59,7 @@ def GenerateAccompaniment(input_midi, input_num_tokens, input_conditioning_type,
     if DEVICE == 'cpu':
         dtype = torch.bfloat16
     else:
-        dtype = torch.float16
     ctx = torch.amp.autocast(device_type=DEVICE, dtype=dtype)
@@ -69,13 +69,12 @@ def GenerateAccompaniment(input_midi, input_num_tokens, input_conditioning_type,
     fn = os.path.basename(input_midi.name)
     fn1 = fn.split('.')[0]
-    input_num_tokens = max(4, min(128, input_num_tokens))
     print('-' * 70)
     print('Input file name:', fn)
-    print('Req num toks:', input_num_tokens)
-    print('Conditioning type:', input_conditioning_type)
-    print('Strip notes:', input_strip_notes)
     print('-' * 70)
     #===============================================================================
@@ -84,124 +83,121 @@ def GenerateAccompaniment(input_midi, input_num_tokens, input_conditioning_type,
     #===============================================================================
     # Enhanced score notes
-    escore_notes = TMIDIX.advanced_score_processor(raw_score, return_enhanced_score_notes=True)[0]
-    no_drums_escore_notes = [e for e in escore_notes if e[6] < 80]
-    if len(no_drums_escore_notes) > 0:
-        #=======================================================
-        # PRE-PROCESSING
-        #===============================================================================
-        # Augmented enhanced score notes
-        no_drums_escore_notes = TMIDIX.augment_enhanced_score_notes(no_drums_escore_notes)
-        cscore = TMIDIX.chordify_score([1000, no_drums_escore_notes])
-        clean_cscore = []
-        for c in cscore:
-            pitches = []
-            cho = []
-            for cc in c:
-                if cc[4] not in pitches:
-                    cho.append(cc)
-                    pitches.append(cc[4])
-            clean_cscore.append(cho)
-        #=======================================================
-        # FINAL PROCESSING
-        melody_chords = []
-        chords = []
-        times = [0]
-        durs = []
-        #=======================================================
-        # MAIN PROCESSING CYCLE
-        #=======================================================
-        pe = clean_cscore[0][0]
-        first_chord = True
-        for c in clean_cscore:
-            # Chords
-            c.sort(key=lambda x: x[4], reverse=True)
-            tones_chord = sorted(set([cc[4] % 12 for cc in c]))
-            try:
-                chord_token = TMIDIX.ALL_CHORDS_SORTED.index(tones_chord)
-            except:
-                checked_tones_chord = TMIDIX.check_and_fix_tones_chord(tones_chord)
-                chord_token = TMIDIX.ALL_CHORDS_SORTED.index(checked_tones_chord)
-            melody_chords.extend([chord_token+384])
-            if input_strip_notes:
-              if len(tones_chord) > 1:
-                chords.extend([chord_token+384])
-            else:
-              chords.extend([chord_token+384])
-            if first_chord:
-                    melody_chords.extend([0])
-                    first_chord = False
-            for e in c:
-                #=======================================================
-                # Timings...
-                time = e[1]-pe[1]
-                dur = e[2]
-                if time != 0 and time % 2 != 0:
-                    time += 1
-                if dur % 2 != 0:
-                    dur += 1
-                delta_time = int(max(0, min(255, time)) / 2)
-                # Durations
-                dur = int(max(0, min(255, dur)) / 2)
-                # Pitches
-                ptc = max(1, min(127, e[4]))
-                #=======================================================
-                # FINAL NOTE SEQ
-                # Writing final note asynchronously
-                if delta_time != 0:
-                    melody_chords.extend([delta_time, dur+128, ptc+256])
-                    if input_strip_notes:
-                      if len(c) > 1:
-                        times.append(delta_time)
-                        durs.append(dur+128)
-                    else:
-                        times.append(delta_time)
-                        durs.append(dur+128)
-                else:
-                    melody_chords.extend([dur+128, ptc+256])
-                pe = e
     #==================================================================
     print('=' * 70)
     print('Sample output events', melody_chords[:5])
     print('=' * 70)
     print('Generating...')
@@ -226,7 +222,7 @@ def GenerateAccompaniment(input_midi, input_num_tokens, input_conditioning_type,
         if input_conditioning_type == 'Chords-Times-Durations':
           output.append(durs[idx])
-        x = torch.tensor([output] * 1, dtype=torch.long, device='cuda')
         o = 0
@@ -376,9 +372,8 @@ if __name__ == "__main__":
         gr.Markdown("## Upload your MIDI or select a sample example MIDI")
         input_midi = gr.File(label="Input MIDI", file_types=[".midi", ".mid", ".kar"])
-        input_num_tokens = gr.Slider(4, 128, value=32, step=1, label="Number of composition chords to generate progression for")
-        input_conditioning_type = gr.Radio(["Chords", "Chords-Times", "Chords-Times-Durations"], label="Conditioning type")
-        input_strip_notes = gr.Checkbox(label="Strip notes from the composition")
         run_btn = gr.Button("generate", variant="primary")
@@ -391,7 +386,7 @@ if __name__ == "__main__":
         output_midi = gr.File(label="Output MIDI file", file_types=[".mid"])
-        run_event = run_btn.click(GenerateAccompaniment, [input_midi, input_num_tokens, input_conditioning_type, input_strip_notes],
                                   [output_midi_title, output_midi_summary, output_midi, output_audio, output_plot])
         gr.Examples(

 # =================================================================================================
 @spaces.GPU
+def InpaintPitches(input_midi, input_num_of_notes, input_patch_number):
     print('=' * 70)
     print('Req start time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT)))
     start_time = reqtime.time()
     print('Loading model...')
     SEQ_LEN = 8192 # Models seq len
+    PAD_IDX = 19463 # Models pad index
     DEVICE = 'cuda' # 'cuda'
     # instantiate the model
     model = TransformerWrapper(
         num_tokens = PAD_IDX+1,
         max_seq_len = SEQ_LEN,
+        attn_layers = Decoder(dim = 1024, depth = 32, heads = 32, attn_flash = True)
         )
     model = AutoregressiveWrapper(model, ignore_index = PAD_IDX)
     print('Loading model checkpoint...')
     model.load_state_dict(
+        torch.load('Giant_Music_Transformer_Large_Trained_Model_36074_steps_0.3067_loss_0.927_acc.pth',
                    map_location=DEVICE))
     print('=' * 70)
     if DEVICE == 'cpu':
         dtype = torch.bfloat16
     else:
+        dtype = torch.bfloat16
     ctx = torch.amp.autocast(device_type=DEVICE, dtype=dtype)
     fn = os.path.basename(input_midi.name)
     fn1 = fn.split('.')[0]
+    input_num_of_notes = max(8, min(2048, input_num_of_notes))
     print('-' * 70)
     print('Input file name:', fn)
+    print('Req num of notes:', input_num_of_notes)
+    print('Req patch number:', input_patch_number)
     print('-' * 70)
     #===============================================================================
     #===============================================================================
     # Enhanced score notes
+    events_matrix1 = TMIDIX.advanced_score_processor(raw_score, return_enhanced_score_notes=True)[0]
+    #=======================================================
+    # PRE-PROCESSING
+    # checking number of instruments in a composition
+    instruments_list_without_drums = list(set([y[3] for y in events_matrix1 if y[3] != 9]))
+    instruments_list = list(set([y[3] for y in events_matrix1]))
+    if len(events_matrix1) > 0 and len(instruments_list_without_drums) > 0:
+      #======================================
+      events_matrix2 = []
+      # Recalculating timings
+      for e in events_matrix1:
+          # Original timings
+          e[1] = int(e[1] / 16)
+          e[2] = int(e[2] / 16)
+      #===================================
+      # ORIGINAL COMPOSITION
+      #===================================
+      # Sorting by patch, pitch, then by start-time
+      events_matrix1.sort(key=lambda x: x[6])
+      events_matrix1.sort(key=lambda x: x[4], reverse=True)
+      events_matrix1.sort(key=lambda x: x[1])
+      #=======================================================
+      # FINAL PROCESSING
+      melody_chords = []
+      melody_chords2 = []
+      # Break between compositions / Intro seq
+      if 9 in instruments_list:
+          drums_present = 19331 # Yes
+      else:
+          drums_present = 19330 # No
+      if events_matrix1[0][3] != 9:
+          pat = events_matrix1[0][6]
+      else:
+          pat = 128
+      melody_chords.extend([19461, drums_present, 19332+pat]) # Intro seq
+      #=======================================================
+      # MAIN PROCESSING CYCLE
+      #=======================================================
+      abs_time = 0
+      pbar_time = 0
+      pe = events_matrix1[0]
+      chords_counter = 1
+      comp_chords_len = len(list(set([y[1] for y in events_matrix1])))
+      for e in events_matrix1:
+          #=======================================================
+          # Timings...
+          # Cliping all values...
+          delta_time = max(0, min(255, e[1]-pe[1]))
+          # Durations and channels
+          dur = max(0, min(255, e[2]))
+          cha = max(0, min(15, e[3]))
+          # Patches
+          if cha == 9: # Drums patch will be == 128
+              pat = 128
+          else:
+              pat = e[6]
+          # Pitches
+          ptc = max(1, min(127, e[4]))
+          # Velocities
+          # Calculating octo-velocity
+          vel = max(8, min(127, e[5]))
+          velocity = round(vel / 15)-1
+          #=======================================================
+          # FINAL NOTE SEQ
+          # Writing final note asynchronously
+          dur_vel = (8 * dur) + velocity
+          pat_ptc = (129 * pat) + ptc
+          melody_chords.extend([delta_time, dur_vel+256, pat_ptc+2304])
+          melody_chords2.append([delta_time, dur_vel+256, pat_ptc+2304])
+          pe = e
     #==================================================================
     print('=' * 70)
+    print('Number of tokens:', len(melody_chords))
+    print('Number of notes:', len(melody_chords2))
     print('Sample output events', melody_chords[:5])
     print('=' * 70)
     print('Generating...')
         if input_conditioning_type == 'Chords-Times-Durations':
           output.append(durs[idx])
+        x = torch.tensor([output] * 1, dtype=torch.long, device=DEVICE)
         o = 0
         gr.Markdown("## Upload your MIDI or select a sample example MIDI")
         input_midi = gr.File(label="Input MIDI", file_types=[".midi", ".mid", ".kar"])
+        input_num_of_notes = gr.Slider(8, 2048, value=128, step=8, label="Number of composition notes to inpaint")
+        input_patch_number = gr.Slider(0, 127, value=0, step=1, label="Composition MIDI patch to inpaint")
         run_btn = gr.Button("generate", variant="primary")
         output_midi = gr.File(label="Output MIDI file", file_types=[".mid"])
+        run_event = run_btn.click(InpaintPitches, [input_midi, input_num_of_notes, input_patch_number],
                                   [output_midi_title, output_midi_summary, output_midi, output_audio, output_plot])
         gr.Examples(