import gradio as gr
from tensorflow.keras.models import load_model
import numpy as np
import pretty_midi
import tensorflow as tf
import soundfile as sf
# import fluidsynth
import subprocess
from huggingface_hub import from_pretrained_keras


def mse_with_positive_pressure(y_true: tf.Tensor, y_pred: tf.Tensor):
    mse = (y_true - y_pred) ** 2
    positive_pressure = 10 * tf.maximum(-y_pred, 0.0)
    return tf.reduce_mean(mse + positive_pressure)


model = from_pretrained_keras("yiwv/music-gen-kit-model", custom_objects={'mse_with_positive_pressure': mse_with_positive_pressure})


def predict_next_note(notes, keras_model, temperature=1.0):
    assert temperature > 0
    inputs = tf.expand_dims(notes, 0)
    predictions = model.predict(inputs)
    pitch_logits = predictions['pitch']
    step = predictions['step']
    duration = predictions['duration']

    pitch_logits /= temperature
    pitch = tf.random.categorical(pitch_logits, num_samples=1)
    pitch = tf.squeeze(pitch, axis=-1)
    duration = tf.squeeze(duration, axis=-1)
    step = tf.squeeze(step, axis=-1)

    step = tf.maximum(0, step)
    duration = tf.maximum(0, duration)

    return int(pitch.numpy()), float(step.numpy()), float(duration.numpy())


# def convert_midi_to_wav(midi_path, wav_path, soundfont_path):
#     cmd = ["fluidsynth", "-ni", soundfont_path, midi_path, "-F", wav_path, "-r", "44100"]
#     subprocess.run(cmd)


def notes_to_midi(notes, out_file, instrument_name):
    pm = pretty_midi.PrettyMIDI()
    instrument = pretty_midi.Instrument(program=pretty_midi.instrument_name_to_program(instrument_name))

    prev_start = 0
    for note_data in notes:
        pitch = int(note_data[0])
        step = note_data[1]
        duration = note_data[2]
        start = prev_start + step
        end = start + duration
        note = pretty_midi.Note(velocity=100, pitch=pitch, start=start, end=end)
        instrument.notes.append(note)
        prev_start = start

    pm.instruments.append(instrument)
    pm.write(out_file)
    return out_file


def convert_midi_to_wav_with_pretty_midi(midi_path, wav_path):
    # MIDIファイルを読み込む
    midi_data = pretty_midi.PrettyMIDI(midi_path)
    # オーディオ波形を取得する
    audio_data = midi_data.synthesize()
    # WAVファイルとして保存する
    sf.write(wav_path, audio_data.T, 44100)


def generate_music(input_text, instrument_name="Acoustic Grand Piano"):
    """
    input_text example: 60,0.5,0.5,62,0.5,0.5,64,0.5,0.5
    """
    input_sequence = np.fromstring(input_text, sep=',')
    temperature = 2.0
    num_predictions = 120

    seq_length = 25
    vocab_size = 128

    generated_notes = []
    prev_start = 0

    # 入力テキストを数値のリストに変換
    input_values = [float(val) for val in input_text.split(",")]

    # 入力データの形状を動的に調整
    num_notes = len(input_values) // 3
    if num_notes > 25:
        input_data = np.zeros((num_notes, 3))
    else:
        input_data = np.zeros((25, 3))

    input_data[-num_notes:] = np.array(input_values).reshape(-1, 3)

    # 音楽生成のループ
    generated_notes = []
    for _ in range(num_predictions):
        pitch, step, duration = predict_next_note(input_data[-25:], model, temperature)
        generated_notes.append((pitch, step, duration))
        new_note = np.array([[pitch, step, duration]])
        input_data = np.vstack([input_data, new_note])

    # 生成されたノートをMIDIファイルに変換
    generated_notes_array = np.array(generated_notes)
    output_file_name = "generated_music.mid"
    notes_to_midi(generated_notes_array, output_file_name, instrument_name)

    # try:
    #     convert_midi_to_wav(output_file_name, 'output.wav', "GeneralUserGS.sf2")
    # except:
    convert_midi_to_wav_with_pretty_midi(output_file_name, 'output.wav')

    return 'output.wav'


# Gradioインターフェースの定義
iface = gr.Interface(fn=generate_music, inputs='text', outputs='audio')
# iface.launch()
iface.launch(debug=True, share=True)