import gradio as gr

from musicautobot.utils.setup_musescore import play_wav
from music21.midi.translate import midiFileToStream
from pathlib import Path
from midi2audio import FluidSynth

# from musicautobot.numpy_encode import *
from musicautobot.config import default_config
from musicautobot.music_transformer import *
from musicautobot.utils.midifile import *
# from musicautobot.utils.file_processing import process_all

import pickle

import subprocess
import os

print(os.getcwd())

# Load the stored data. This is needed to generate the vocab.
print('Loading data to build vocabulary.')
data_dir = Path('.')
data = load_data(data_dir, 'data.pkl')

from huggingface_hub import hf_hub_download

print('Downloading model.')
model_cache_path = hf_hub_download(repo_id="psistolar/musicautobot-fine1", filename="model.pth")


# Default config options
config = default_config()
config['encode_position'] = True


print("Building model.")
# Load our fine-tuned model
learner = music_model_learner(
    data, 
    config=config.copy(), 
    pretrained_path=model_cache_path
)

print("Ready to use.")


def process_midi(midi_file):
    name = Path(midi_file.name)
    
    # create the model input object
    item = MusicItem.from_file(name, data.vocab);

    # full is the prediction appended to the input
    pred, full = learner.predict(item, n_words=100)
   
    # convert to stream and then MIDI file
    stream = full.to_stream()
    out = music21.midi.translate.streamToMidiFile(stream)
    
    # save MIDI file
    out.open('result.midi', 'wb')
    out.write()
    out.close()
    
    # use fluidsynth to convert MIDI to WAV so the user can hear the output
    sound_font = "/usr/share/sounds/sf2/FluidR3_GM.sf2"
    FluidSynth(sound_font).midi_to_audio('result.midi', 'result.wav')
    return 'result.wav'

iface = gr.Interface(
    fn=process_midi, 
    inputs="file", 
    outputs="audio"
)

iface.launch()