Monophonic-MIDI-Melody-Harmonizer

Running

App Files Files Community

asigalov61 commited on Jul 23

Commit

df2f0e4

•

1 Parent(s): 4d766d5

Update app.py

Browse files

Files changed (1) hide show

app.py +155 -111

app.py CHANGED Viewed

@@ -25,82 +25,8 @@ in_space = os.getenv("SYSTEM") == "spaces"
 # =================================================================================================
 @spaces.GPU
-def classify_GPU(input_data):
-    print('Loading model...')
-    SEQ_LEN = 1026
-    PAD_IDX = 940
-    DEVICE = 'cuda' # 'cuda'
-    # instantiate the model
-    model = TransformerWrapper(
-        num_tokens = PAD_IDX+1,
-        max_seq_len = SEQ_LEN,
-        attn_layers = Decoder(dim = 1024, depth = 24, heads = 32, attn_flash = True)
-    )
-    model = AutoregressiveWrapper(model, ignore_index=PAD_IDX, pad_value=PAD_IDX)
-    model = torch.nn.DataParallel(model)
-    model.to(DEVICE)
-    print('=' * 70)
-    print('Loading model checkpoint...')
-    model.load_state_dict(
-        torch.load('Ultimate_MIDI_Classifier_Trained_Model_29886_steps_0.556_loss_0.8339_acc.pth',
-                   map_location=DEVICE))
-    print('=' * 70)
-    model.eval()
-    if DEVICE == 'cpu':
-        dtype = torch.bfloat16
-    else:
-        dtype = torch.bfloat16
-    ctx = torch.amp.autocast(device_type=DEVICE, dtype=dtype)
-    print('Done!')
-    print('=' * 70)
-    #==================================================================
-    print('=' * 70)
-    print('Ultimate MIDI Classifier')
-    print('=' * 70)
-    print('Classifying...')
-    torch.cuda.empty_cache()
-    model.eval()
-    x = torch.tensor(input_data[:1022], dtype=torch.long, device=DEVICE)
-    with ctx:
-      out = model.module.generate(x,
-                                  2,
-                                  filter_logits_fn=top_k,
-                                  filter_kwargs={'k': 1},
-                                  temperature=0.9,
-                                  return_prime=False,
-                                  verbose=False)
-    result = tuple(out[0].tolist())
-    return result
-# =================================================================================================
 def ClassifyMIDI(input_midi):
-    SEQ_LEN = 1024
-    PAD_IDX = 14627
     print('=' * 70)
     print('Req start time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT)))
     start_time = reqtime.time()
@@ -122,60 +48,108 @@ def ClassifyMIDI(input_midi):
     escore_notes = TMIDIX.advanced_score_processor(raw_score, return_enhanced_score_notes=True)[0]
-    escore = [e for e in TMIDIX.augment_enhanced_score_notes(escore_notes, timings_divider=32) if e[6] < 80]
-    cscore = TMIDIX.chordify_score([1000, escore])
     #=======================================================
-    # MAIN PROCESSING CYCLE
     #=======================================================
     melody_chords = []
-    pe = cscore[0][0]
-    for c in cscore:
-        pitches = []
-        for e in c:
-          if e[4] not in pitches:
-            dtime = max(0, min(127, e[1]-pe[1]))
-            dur = max(1, min(127, e[2]))
             ptc = max(1, min(127, e[4]))
-            melody_chords.append([dtime, dur, ptc])
-            pitches.append(ptc)
-            pe = e
-    #==============================================================
-    seq = []
     input_data = []
-    notes_counter = 0
-    for mm in melody_chords:
-        time = mm[0]
-        dur = mm[1]
-        ptc = mm[2]
-        seq.extend([time, dur+128, ptc+256])
-        notes_counter += 1
-    for i in range(0, len(seq)-SEQ_LEN-4, (SEQ_LEN-4) // 4):
-      schunk = seq[i:i+SEQ_LEN-4]
-      input_data.append([14624] + schunk + [14625])
     print('Done!')
     print('=' * 70)
     #==============================================================
     classification_summary_string = '=' * 70
@@ -194,7 +168,77 @@ def ClassifyMIDI(input_midi):
     classification_summary_string += '=' * 70
     classification_summary_string += '\n'
-    output, results = classify_GPU(input_data)
     all_results_labels = [classifier_labels[0][r-384] for r in results]
     final_result = mode(results)

 # =================================================================================================
 @spaces.GPU
 def ClassifyMIDI(input_midi):
     print('=' * 70)
     print('Req start time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT)))
     start_time = reqtime.time()
     escore_notes = TMIDIX.advanced_score_processor(raw_score, return_enhanced_score_notes=True)[0]
+    #===============================================================================
+    # Augmented enhanced score notes
+    escore_notes = TMIDIX.augment_enhanced_score_notes(escore_notes, timings_divider=32)
+    escore_notes = [e for e in escore_notes if e[6] < 80 or e[6] == 128]
     #=======================================================
+    # Augmentation
     #=======================================================
+    # FINAL PROCESSING
     melody_chords = []
+    #=======================================================
+    # MAIN PROCESSING CYCLE
+    #=======================================================
+    pe = escore_notes[0]
+    pitches = []
+    notes_counter = 0
+    for e in escore_notes:
+        #=======================================================
+        # Timings...
+        delta_time = max(0, min(127, e[1]-pe[1]))
+        if delta_time != 0:
+            pitches = []
+        # Durations and channels
+        dur = max(1, min(127, e[2]))
+        # Patches
+        pat = max(0, min(128, e[6]))
+        # Pitches
+        if pat == 128:
+            ptc = max(1, min(127, e[4]))+128
+        else:
             ptc = max(1, min(127, e[4]))
+        #=======================================================
+        # FINAL NOTE SEQ
+        # Writing final note synchronously
+        if ptc not in pitches:
+            melody_chords.extend([delta_time, dur+128, ptc+256])
+            pitches.append(ptc)
+            notes_counter += 1
+        pe = e
+    #==============================================================
+    print('Done!')
+    print('=' * 70)
+    print('Composition has', notes_counter, 'notes')
+    print('=' * 70)
+    print('=' * 70)
+    print('Ultimate MIDI Classifier')
+    print('=' * 70)
+    print('Input MIDI file name:', midi_name)
+    print('=' * 70)
+    print('Sampling score...')
+    chunk_size = 1020
+    score = melody_chords
     input_data = []
+    for i in range(0, len(score)-chunk_size, chunk_size // classification_sampling_resolution):
+        schunk = score[i:i+chunk_size]
+        if len(schunk) == chunk_size:
+            td = [937]
+            td.extend(schunk)
+            td.extend([938])
+            input_data.append(td)
     print('Done!')
     print('=' * 70)
+    print('Composition was split into' , len(input_data), 'samples', 'of 340 notes each with', 340 - chunk_size // classification_sampling_resolution // 3, 'notes overlap')
+    print('=' * 70)
+    print('Number of notes in all composition samples:', len(input_data) * 340)
+    print('=' * 70)
     #==============================================================
     classification_summary_string = '=' * 70
     classification_summary_string += '=' * 70
     classification_summary_string += '\n'
+    print('Loading model...')
+    SEQ_LEN = 1026
+    PAD_IDX = 940
+    DEVICE = 'cuda' # 'cuda'
+    # instantiate the model
+    model = TransformerWrapper(
+        num_tokens = PAD_IDX+1,
+        max_seq_len = SEQ_LEN,
+        attn_layers = Decoder(dim = 1024, depth = 24, heads = 32, attn_flash = True)
+    )
+    model = AutoregressiveWrapper(model, ignore_index=PAD_IDX, pad_value=PAD_IDX)
+    model = torch.nn.DataParallel(model)
+    model.to(DEVICE)
+    print('=' * 70)
+    print('Loading model checkpoint...')
+    model.load_state_dict(
+        torch.load('Ultimate_MIDI_Classifier_Trained_Model_29886_steps_0.556_loss_0.8339_acc.pth',
+                   map_location=DEVICE))
+    print('=' * 70)
+    if DEVICE == 'cpu':
+        dtype = torch.bfloat16
+    else:
+        dtype = torch.bfloat16
+    ctx = torch.amp.autocast(device_type=DEVICE, dtype=dtype)
+    print('Done!')
+    print('=' * 70)
+    #==================================================================
+    print('=' * 70)
+    print('Ultimate MIDI Classifier')
+    print('=' * 70)
+    print('Classifying...')
+    torch.cuda.empty_cache()
+    model.eval()
+    artist_results = []
+    song_results = []
+    results = []
+    for input in input_data:
+        x = torch.tensor(input[:1022], dtype=torch.long, device='cuda')
+        with ctx:
+          out = model.module.generate(x,
+                                      2,
+                                      filter_logits_fn=top_k,
+                                      filter_kwargs={'k': 1},
+                                      temperature=0.9,
+                                      return_prime=False,
+                                      verbose=False)
+        result = tuple(out[0].tolist())
+        results.append(result)
     all_results_labels = [classifier_labels[0][r-384] for r in results]
     final_result = mode(results)