import os.path import time as reqtime import datetime from pytz import timezone import torch import spaces import gradio as gr import random from midi_to_colab_audio import midi_to_colab_audio import TMIDIX import matplotlib.pyplot as plt from inference import PianoTranscription from config import sample_rate from utilities import load_audio # ================================================================================================= @spaces.GPU def TranscribePianoAudio(input_file): print('=' * 70) print('Req start time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT))) start_time = reqtime.time() print('=' * 70) fn = os.path.basename(input_file) fn1 = fn.split('.')[0] out_mid = fn1+'.mid' print('-' * 70) print('Input file name:', fn) print('-' * 70) print('Loading audio...') # Load audio (audio, _) = load_audio(input_file, sr=sample_rate, mono=True) print('Done!') print('-' * 70) print('Loading transcriptor..') # Transcriptor transcriptor = PianoTranscription(device='cuda') # 'cuda' | 'cpu' print('Done!') print('-' * 70) print('Transcribing...') transcribed_dict = transcriptor.transcribe(audio, out_mid) print('Done!') print('-' * 70) #=============================================================================== raw_score = TMIDIX.midi2single_track_ms_score(out_mid) #=============================================================================== # Enhanced score notes print(raw_score) escore = TMIDIX.advanced_score_processor(raw_score, return_enhanced_score_notes=True)[0] #================================================================== print('=' * 70) print('Number of transcribed notes:', len(escore)) print('Sample trascribed MIDI events', escore[:5]) print('=' * 70) print('Done!') print('=' * 70) #=============================================================================== print('Rendering results...') print('=' * 70) audio = midi_to_colab_audio(out_mid, soundfont_path=soundfont, sample_rate=16000, volume_scale=10, output_for_gradio=True ) print('Done!') print('=' * 70) #======================================================== output_midi_title = str(fn1) output_midi_summary = str(escore[:3]) output_midi = str(out_mid) output_audio = (16000, audio) output_plot = TMIDIX.plot_ms_SONG(escore, plot_title=output_midi_title, return_plt=True) print('Output MIDI file name:', output_midi) print('Output MIDI title:', output_midi_title) print('Output MIDI summary:', output_midi_summary) print('=' * 70) #======================================================== print('-' * 70) print('Req end time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT))) print('-' * 70) print('Req execution time:', (reqtime.time() - start_time), 'sec') return output_midi_title, output_midi_summary, output_midi, output_audio, output_plot # ================================================================================================= if __name__ == "__main__": PDT = timezone('US/Pacific') print('=' * 70) print('App start time: {:%Y-%m-%d %H:%M:%S}'.format(datetime.datetime.now(PDT))) print('=' * 70) soundfont = "SGM-v2.01-YamahaGrand-Guit-Bass-v2.7.sf2" app = gr.Blocks() with app: gr.Markdown("