import os.path import copy import time as reqtime import datetime from pytz import timezone import torch import spaces import gradio as gr from x_transformer_1_23_2 import * import random import tqdm from midi_to_colab_audio import midi_to_colab_audio import TMIDIX import matplotlib.pyplot as plt # ================================================================================================= @spaces.GPU def GenerateAccompaniment(input_midi, input_num_tokens, input_acc_type): print('=' * 70) print('Req start time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT))) start_time = reqtime.time() print('Loading model...') SEQ_LEN = 8192 # Models seq len PAD_IDX = 767 # Models pad index DEVICE = 'cuda' # 'cpu' # instantiate the model model = TransformerWrapper( num_tokens = PAD_IDX+1, max_seq_len = SEQ_LEN, attn_layers = Decoder(dim = 2048, depth = 4, heads = 16, attn_flash = True) ) model = AutoregressiveWrapper(model, ignore_index = PAD_IDX) model.to(DEVICE) print('=' * 70) print('Loading model checkpoint...') model.load_state_dict( torch.load('Ultimate_Accompaniment_Transformer_Small_Improved_Trained_Model_13649_steps_0.3229_loss_0.898_acc.pth', map_location=DEVICE)) print('=' * 70) model.eval() if DEVICE == 'cpu': dtype = torch.bfloat16 else: dtype = torch.float16 ctx = torch.amp.autocast(device_type=DEVICE, dtype=dtype) print('Done!') print('=' * 70) fn = os.path.basename(input_midi.name) fn1 = fn.split('.')[0] input_num_tokens = max(4, min(128, input_num_tokens)) print('-' * 70) print('Input file name:', fn) print('Req num toks:', input_num_tokens) print('Force acc:', input_acc_type) print('-' * 70) #=============================================================================== raw_score = TMIDIX.midi2single_track_ms_score(input_midi.name) #=============================================================================== # Enhanced score notes escore_notes = TMIDIX.advanced_score_processor(raw_score, return_enhanced_score_notes=True)[0] escore_notes = [e for e in escore_notes if e[3] != 9] if len(escore_notes) > 0: #======================================================= # PRE-PROCESSING #=============================================================================== # Augmented enhanced score notes escore_notes = TMIDIX.augment_enhanced_score_notes(escore_notes, timings_divider=32) cscore = TMIDIX.chordify_score([1000, escore_notes]) melody = TMIDIX.fix_monophonic_score_durations([sorted(e, key=lambda x: x[4], reverse=True)[0] for e in cscore]) #======================================================= # FINAL PROCESSING melody_chords = [] #======================================================= # MAIN PROCESSING CYCLE #======================================================= pe = cscore[0][0] mpe = melody[0] midx = 1 for i, c in enumerate(cscore): c.sort(key=lambda x: (x[3], x[4]), reverse=True) # Next melody note if midx < len(melody): # Time mtime = melody[midx][1]-mpe[1] mdur = melody[midx][2] mdelta_time = max(0, min(127, mtime)) # Durations mdur = max(0, min(127, mdur)) # Pitch mptc = melody[midx][4] else: mtime = 127-mpe[1] mdur = mpe[2] mdelta_time = max(0, min(127, mtime)) # Durations mdur = max(0, min(127, mdur)) # Pitch mptc = mpe[4] e = melody[i] #======================================================= # Timings... time = e[1]-pe[1] dur = e[2] delta_time = max(0, min(127, time)) # Durations dur = max(0, min(127, dur)) # Pitches ptc = max(1, min(127, e[4])) if ptc < 60: ptc = 60 + (ptc % 12) cha = e[3] #======================================================= # FINAL NOTE SEQ if midx < len(melody): melody_chords.append([delta_time, dur+128, ptc+384, mdelta_time+512, mptc+640]) mpe = melody[midx] midx += 1 else: melody_chords.append([delta_time, dur+128, ptc+384, mdelta_time+512, mptc+640]) pe = e #=============================================================================== print('=' * 70) print('Sample output events', melody_chords[:5]) print('=' * 70) print('Generating...') output = [] force_acc = input_acc_type num_toks_per_note = 32 temperature=0.9 max_drums_limit=4 num_memory_tokens=4096 output1 = [] output2 = [] ctime = 0 for m in melody_chords[:input_num_tokens]: mel = copy.deepcopy(m) mel[0] = mel[0]-ctime output1.extend(mel) input_seq = output1 if force_acc: x = torch.LongTensor([input_seq+[0]]).to(DEVICE) else: x = torch.LongTensor([input_seq]).to(DEVICE) time = input_seq[-2]-512 cur_time = 0 ctime = 0 for _ in range(num_toks_per_note): with ctx: out = model.generate(x[-num_memory_tokens:], 1, temperature=temperature, return_prime=False, verbose=False) o = out.tolist()[0][0] if 0 <= o < 128: cur_time += o if cur_time < time and o < 384: ctime = cur_time out = torch.LongTensor([[o]]).to(DEVICE) x = torch.cat((x, out), 1) else: break outy = x.tolist()[0][len(input_seq):] output1.extend(outy) output2.append(outy) print('=' * 70) print('Done!') print('=' * 70) #=============================================================================== print('Rendering results...') print('=' * 70) print('Sample INTs', output1[:12]) print('=' * 70) out1 = output2 accompaniment_MIDI_patch_number = 0 melody_MIDI_patch_number = 40 if len(out1) != 0: song = out1 song_f = [] time = 0 ntime = 0 ndur = 0 vel = 90 npitch = 0 channel = 0 patches = [0] * 16 patches[0] = accompaniment_MIDI_patch_number patches[3] = melody_MIDI_patch_number for i, ss in enumerate(song): ntime += melody_chords[i][0] * 32 ndur = (melody_chords[i][1]-128) * 32 nchannel = 1 npitch = (melody_chords[i][2]-256) % 128 vel = max(40, npitch)+20 song_f.append(['note', ntime, ndur, 3, npitch, vel, melody_MIDI_patch_number ]) time = ntime for s in ss: if 0 <= s < 128: time += s * 32 if 128 <= s < 256: dur = (s-128) * 32 if 256 <= s < 384: pitch = (s-256) vel = max(40, pitch) song_f.append(['note', time, dur, 0, pitch, vel, accompaniment_MIDI_patch_number]) fn1 = "Ultimate-Accompaniment-Transformer-Composition" detailed_stats = TMIDIX.Tegridy_ms_SONG_to_MIDI_Converter(song_f, output_signature = 'Ultimate Accompaniment Transformer', output_file_name = fn1, track_name='Project Los Angeles', list_of_MIDI_patches=patches ) new_fn = fn1+'.mid' audio = midi_to_colab_audio(new_fn, soundfont_path=soundfont, sample_rate=16000, volume_scale=10, output_for_gradio=True ) print('Done!') print('=' * 70) #======================================================== output_midi_title = str(fn1) output_midi_summary = str(song_f[:3]) output_midi = str(new_fn) output_audio = (16000, audio) output_plot = TMIDIX.plot_ms_SONG(song_f, plot_title=output_midi, return_plt=True) print('Output MIDI file name:', output_midi) print('Output MIDI title:', output_midi_title) print('Output MIDI summary:', '') print('=' * 70) #======================================================== print('-' * 70) print('Req end time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT))) print('-' * 70) print('Req execution time:', (reqtime.time() - start_time), 'sec') return output_midi_title, output_midi_summary, output_midi, output_audio, output_plot # ================================================================================================= if __name__ == "__main__": PDT = timezone('US/Pacific') print('=' * 70) print('App start time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT))) print('=' * 70) soundfont = "SGM-v2.01-YamahaGrand-Guit-Bass-v2.7.sf2" app = gr.Blocks() with app: gr.Markdown("

Ultimate Accompaniment Transformer

") gr.Markdown("

Generate unique accompaniment for any melody

") gr.Markdown( "![Visitors](https://api.visitorbadge.io/api/visitors?path=asigalov61.Ultimate-Accompaniment-Transformer&style=flat)\n\n" "Accompaniment generation for any monophonic melody\n\n" "Check out [Ultimate Drums Transformer](https://github.com/asigalov61/Ultimate-Accompaniment-Transformer) on GitHub!\n\n" "[Open In Colab]" "(https://colab.research.google.com/github/asigalov61/Ultimate-Accompaniment-Transformer/blob/main/Ultimate_Accompaniment_Transformer.ipynb)" " for faster execution and endless generation" ) gr.Markdown("## Upload your MIDI or select a sample example MIDI") input_midi = gr.File(label="Input MIDI", file_types=[".midi", ".mid", ".kar"]) input_num_tokens = gr.Slider(4, 128, value=32, step=1, label="Number of composition chords to generate accompaniment for") input_acc_type = gr.Checkbox(label='Force accompaniment generation for each melody note') run_btn = gr.Button("generate", variant="primary") gr.Markdown("## Generation results") output_midi_title = gr.Textbox(label="Output MIDI title") output_midi_summary = gr.Textbox(label="Output MIDI summary") output_audio = gr.Audio(label="Output MIDI audio", format="wav", elem_id="midi_audio") output_plot = gr.Plot(label="Output MIDI score plot") output_midi = gr.File(label="Output MIDI file", file_types=[".mid"]) run_event = run_btn.click(GenerateAccompaniment, [input_midi, input_num_tokens, input_acc_type], [output_midi_title, output_midi_summary, output_midi, output_audio, output_plot]) gr.Examples( [["Ultimate-Accompaniment-Transformer-Melody-Seed-1.mid", 128, True], ["Ultimate-Accompaniment-Transformer-Melody-Seed-2.mid", 128, False], ["Ultimate-Accompaniment-Transformer-Melody-Seed-3.mid", 128, True], ["Ultimate-Accompaniment-Transformer-Melody-Seed-4.mid", 128, False], ["Ultimate-Accompaniment-Transformer-Melody-Seed-5.mid", 128, True], ["Ultimate-Accompaniment-Transformer-Melody-Seed-6.mid", 128, False], ["Ultimate-Accompaniment-Transformer-Melody-Seed-7.mid", 128, True]], [input_midi, input_num_tokens, input_acc_type], [output_midi_title, output_midi_summary, output_midi, output_audio, output_plot], GenerateAccompaniment, cache_examples=True, ) app.queue().launch()