from simbals_apis_public_clients.clients.services import SimbalsAPIClient
import time
import json
import os
import gradio as gr
import yt_dlp

SIMBALS_GLOBAL_DB = 1
SIMBALS_MAIN_DATABASE = 2

embed_html1 = '<iframe width="560" height="315" src="https://www.youtube.com/embed/'
embed_html2 = '" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'

def download_audio_(link):
  #with yt_dlp.YoutubeDL({'extract_audio': True, 'format': 'bestaudio', 'outtmpl': '%(title)s.mp3'}) as video:
  with yt_dlp.YoutubeDL({'extract_audio': True, 'format': 'bestaudio', 'outtmpl': 'audio.mp3'}) as video:
    info_dict = video.extract_info(link, download = True)
    video_title = info_dict['title']
    video.download(link)    
    return video_title

def download_audio(id_video):
    id = id_video.split("?v=")[-1][:11]
    audio_file = download_audio_(id_video)
    #audio_file = audio_file+'.mp3'
    audio_file = 'audio.mp3'
    embed_html_all = embed_html1 + id +embed_html2
    return audio_file, audio_file, embed_html_all

def process_url(input_path):
    # setup the client
    audio_file, audio_file, embed_html_all = download_audio(input_path)
    return process(audio_file, embed_html_all)

def process_file(input_path):
    return process(input_path, '')

def process(audio_file, embed_html_all):
    client = SimbalsAPIClient(os.environ['TOKEN'], debug=True)
    parameters = client.prepare_request("31415", audio_file=audio_file)
        
    # add audio features service with an available database
    #parameters = client.add_audio_features(parameters, SIMBALS_MAIN_DATABASE)
    parameters = client.add_tagging_all(parameters, SIMBALS_MAIN_DATABASE)
    
    # launch the request and test for ok/code values
    ok, code, job_id = client.launch_request(parameters)
    # try to get the results with obtained job_id. If code is 4, job is not finished. In all other cases, job is finished
    for i in range(1000):
        results=client.get_results(job_id)
        if results[0] != 4:
            output = json.dumps(results[1], indent=1)
            break
        time.sleep(1)

    os.system('rm -f audio.mp3')
    res = results[1]["response"]["tagging"]["2"]

    dict_moods = {}
    for m in res['moods']:
      dict_moods[m['name']] = m['probability']
        
    dict_desc={}
    for d in res['music_descriptors']:
        if d['name'] in ['BPM', 'Key', 'Mode']:
            dict_desc[d['name']] = d['value']
        else:
            dict_desc[d['name']] = int(round(d['value']*100))/100.0
        
        
    genres={}
    for d in res['genres']:
      genres[d['name']] = d['probability']
        
    themes = {}
    for d in res['themes']:
      themes[d['name']]= d['probability']
        
    instruments = {}
    for d in res['instruments']:
      instruments[d['name']] = d['probability']
    
    timbres = {}
    for d in res['timbres']:
      timbres[d['name']] = d['probability']
        
    vocalgender= {}
    if res['vocal_gender'][0]['probability'] != -1:
        vocalgender[res['vocal_gender'][0]['name']]= res['vocal_gender'][0]['probability']
    else:
        vocalgender[res['vocal_gender'][0]['name']]= 0
        
    musicratio = res['music_ratio']
    
    audioquality = {}
    audioquality[res['audio_quality'][0]['name']]= res['audio_quality'][0]['probability']
    
    return embed_html_all, dict_moods, genres, instruments, vocalgender, timbres, themes, audioquality,str(dict_desc['Electric/Acoustic']),str(dict_desc['Danceability']),str(dict_desc['Arousal']),str(dict_desc['Vocal/Instrumental']),str(dict_desc['Studio/Live']),str(dict_desc['Music/Speech']),str(dict_desc['Valence']),str(dict_desc['Melodic']),str(dict_desc['Articulation']),str(dict_desc['RhythmicStability']),str(dict_desc['Dissonance']),str(dict_desc['BPM']),str(dict_desc['Binary']),str(dict_desc['Key']),str(dict_desc['Mode']),str(dict_desc['TexturalStability']),musicratio
    #return dict_moods, genres, instruments, vocalgender, timbres, themes, audioquality,str(dict_desc['Electric/Acoustic']),str(dict_desc['Danceability']),str(dict_desc['Arousal']),str(dict_desc['Vocal/Instrumental']),str(dict_desc['Studio/Live']),str(dict_desc['Music/Speech']),str(dict_desc['Valence']),str(dict_desc['Melodic']),str(dict_desc['Articulation']),str(dict_desc['RhythmicStability']),str(dict_desc['Dissonance']),str(dict_desc['BPM']),str(dict_desc['Binary']),str(dict_desc['Key']),str(dict_desc['Mode']),str(dict_desc['TexturalStability'])
            

with gr.Blocks() as demo:
    
    with gr.Row():

        with gr.Column():

            with gr.Row():
                #gr.HTML(embed_html)
                html = gr.HTML()
            
            with gr.Row():
                with gr.Column():
                    audio_url_input = gr.Textbox(placeholder='YouTube video URL', label='YouTube video URL')
                    analyze_url_btn = gr.Button('Analyze URL')

            with gr.Row():
                with gr.Column():
                    audio_input_file = gr.Audio(type="filepath", label='Audio Input')
                    analyze_file_btn = gr.Button('Analyze File')
                

            with gr.Row():
                with gr.Column():
                    gr.HTML("<h3>Moods</h3>")
                    dict_moods=gr.Label(label="Moods", show_label=False)

                    gr.HTML("<h3>Themes</h3>")
                    themes=gr.Label(label="Themes", show_label=False)
                
                    gr.HTML("<h3>BPM</h3>")
                    bpm  = gr.Textbox(label="BPM", show_label=False)

                    gr.HTML("<h3>Key</h3>")
                    key  = gr.Textbox(label="Key", show_label=False)
                    mode = gr.Textbox(label="Mode", show_label=False)

                with gr.Column():
                    gr.HTML("<h3>Genres</h3>")
                    genres  = gr.Label(label="Genres", show_label=False)

#                with gr.Column():
                    gr.HTML("<h3>Instruments</h3>")
                    instruments  = gr.Label(label="Instruments", show_label=False)
                    
                    gr.HTML("<h3> Vocal Gender</h3>")
                    vocalgender  = gr.Label(label="Vocal Gender", show_label=False)
                    
                    gr.HTML("<h3>Textures</h3>")
                    timbres  = gr.Label(label="Texture", show_label=False)
                    
                    gr.HTML("<h3> AudioQuality</h3>")
                    audioquality  = gr.Label(label="Audio Quality", show_label=False)

                with gr.Column():
                    gr.HTML("<h3> Descriptors</h3>")
                    #gr.HTML("<h5> Vocal/Instrumental</h5>")
                    musicratio = gr.Slider(label="Music Ratio", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    acousticness = gr.Slider(label="Electric/Acoustic", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    danceability = gr.Slider(label="Danceability", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    arousal = gr.Slider(label="Arousal", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    instrumentalness = gr.Slider(label="Vocal/Instrumental", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    liveness = gr.Slider(label="Studio/Live", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    speechiness = gr.Slider(label="Music/Speech", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    valence = gr.Slider(label="Valence", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    melodic = gr.Slider(label="Melodic", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    articulation = gr.Slider(label="Articulation", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    rhythmicstability = gr.Slider(label="Rhythmic Stability", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    dissonance = gr.Slider(label="Dissonance", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    binary = gr.Slider(label="Binary", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    texturalstability = gr.Slider(label="Textural Stability", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)

                    
            ''''
            with gr.Row():

                with gr.Column():
                    gr.HTML("<h3>Themes</h3>")
                    themes=gr.Label(label="Themes", show_label=False)
            ''' 
    
    analyze_url_btn.click(process_url, inputs=[audio_url_input], 
                      outputs=[html,
                               dict_moods, 
                               genres, 
                               instruments,
                               vocalgender,
                               timbres,
                               themes, 
                               audioquality,
                               acousticness,
                               danceability,
                               arousal,
                               instrumentalness,
                               liveness,
                               speechiness,
                               valence,
                               melodic,
                               articulation,
                               rhythmicstability,
                               dissonance,
                               bpm,
                               binary,
                               key,
                               mode,
                               texturalstability,
                               musicratio
                               ])
    gr.Examples(
        examples = [["https://www.youtube.com/watch?v=v2AC41dglnM"],
                    ["https://www.youtube.com/watch?v=We4-lJe0JRY"],
                    ["https://www.youtube.com/watch?v=HavnIXPxnjk"],
                    ["https://www.youtube.com/watch?v=LuKm4L9ryB0"],
                    ["https://www.youtube.com/watch?v=-s5-VXG7Qio"],
                    ["https://www.youtube.com/watch?v=n4CXVuODVhY"],
                    ["https://www.youtube.com/watch?v=Nj2U6rhnucI"],
                    ["https://www.youtube.com/watch?v=RlrNPBfOowg"],
                    ["https://www.youtube.com/watch?v=6BOHpjIZyx0"],
                    ["https://www.youtube.com/watch?v=QMbvpftTEUs"],
                    ["https://www.youtube.com/watch?v=irVDVag2br4"]],
        inputs = [audio_url_input],
        outputs=[html,
               dict_moods, 
               genres, 
               instruments,
               vocalgender,
               timbres,
               themes, 
               audioquality,
               acousticness,
               danceability,
               arousal,
               instrumentalness,
               liveness,
               speechiness,
               valence,
               melodic,
               articulation,
               rhythmicstability,
               dissonance,
               bpm,
               binary,
               key,
               mode,
               texturalstability,
               musicratio
               ],
        fn = process_url,
        examples_per_page=15,
        cache_examples=True,
    )            
    
    analyze_file_btn.click(process_file, inputs=[audio_input_file], 
                      outputs=[html,
                               dict_moods, 
                               genres, 
                               instruments,
                               vocalgender,
                               timbres,
                               themes, 
                               audioquality,
                               acousticness,
                               danceability,
                               arousal,
                               instrumentalness,
                               liveness,
                               speechiness,
                               valence,
                               melodic,
                               articulation,
                               rhythmicstability,
                               dissonance,
                               bpm,
                               binary,
                               key,
                               mode,
                               texturalstability,
                               musicratio
                               ])


demo.launch(debug=True)