import time
import json
import os
import openl3
import librosa
import pickle
import gradio as gr
import numpy as np
import faiss
import yt_dlp

embed_html1 = '<iframe width="560" height="315" src="https://www.youtube.com/embed/'
embed_html2 = '" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'

def download_audio_(link):
  with yt_dlp.YoutubeDL({'extract_audio': True, 'format': 'bestaudio', 'outtmpl': '%(title)s.mp3'}) as video:
    info_dict = video.extract_info(link, download = True)
    video_title = info_dict['title']
    video.download(link)    
    return video_title

def download_audio(id_video):
    id = id_video.split("?v=")[-1][:11]
    audio_file = download_audio_(id_video)
    audio_file = audio_file+'.mp3'
    embed_html_all = embed_html1 + id +embed_html2
    return audio_file, audio_file, embed_html_all

def process_url(input_path):
    # setup the client
    audio_file, audio_file, embed_html_all = download_audio(input_path)
    return process(audio_file, embed_html_all)

def process_file(input_path):
    return process(input_path, '')

def process(audio_file, embed_html_all):
    model = openl3.models.load_audio_embedding_model(input_repr="linear", content_type="music",embedding_size=512)
    audio, sr = librosa.load(audio_file, mono=True)
    emb, ts = openl3.get_audio_embedding(audio, sr, model=model)  
    emb = np.mean(emb, axis=0)
    print("SHAPE ", emb.shape)
    
    ind = faiss.read_index("index.index")
    ind_filenames = pickle.load(open('index.filenames','rb'))
    dict_bmg = pickle.load(open('dict_bmg.pickle','rb')) # filename to url
    D, I = ind.search(emb.reshape((1,512)), 5) 

    top1 = dict_bmg[ind_filenames[I[0][0]]]
    top2 = dict_bmg[ind_filenames[I[0][1]]]
    top3 = dict_bmg[ind_filenames[I[0][2]]]
    top4 = dict_bmg[ind_filenames[I[0][3]]]
    top5 = dict_bmg[ind_filenames[I[0][4]]]
    
    return top1, top2, top3, top4, top5
            
 
with gr.Blocks() as demo:
    
    with gr.Row():

        with gr.Column():

            with gr.Row():
                #gr.HTML(embed_html)
                html = gr.HTML()
            
            with gr.Row():
                with gr.Column():
                    audio_url_input = gr.Textbox(placeholder='YouTube video URL', label='YouTube video URL')
                    analyze_url_btn = gr.Button('Search from URL')

            with gr.Row():
                with gr.Column():
                    audio_input_file = gr.Audio(type="filepath", label='Audio Input')
                    analyze_file_btn = gr.Button('Search from file')
                

            with gr.Row():
                with gr.Column():
                    '''
                    gr.HTML("<h3>Top 1</h3>")
                    top1 = gr.Textbox(label="top1", show_label=False)
                    gr.HTML("<h3>Top 2</h3>")
                    top2 = gr.Textbox(label="top2", show_label=False)
                    gr.HTML("<h3>Top 3</h3>")
                    top3 = gr.Textbox(label="top3", show_label=False)
                    gr.HTML("<h3>Top 4</h3>")
                    top4 = gr.Textbox(label="top4", show_label=False)
                    gr.HTML("<h3>Top 5</h3>")
                    top5 = gr.Textbox(label="top5", show_label=False)
                    '''
                    top1 = gr.Audio(label="top1", show_label=True)
                    top2 = gr.Audio(label="top2", show_label=True)
                    top3 = gr.Audio(label="top3", show_label=True)
                    top4 = gr.Audio(label="top4", show_label=True)
                    top5 = gr.Audio(label="top5", show_label=True)
            
    
    analyze_url_btn.click(process_url, inputs=[audio_url_input], 
                      outputs=[top1, top2, top3, top4, top5])

    analyze_file_btn.click(process_file, inputs=[audio_input_file], 
                      outputs=[top1, top2, top3, top4, top5])


demo.launch(debug=True)