Spaces:

Simbals
/

SimbalsAPI_Tagging

Runtime error

File size: 8,998 Bytes

from simbals_apis_public_clients.clients.services import SimbalsAPIClient
import time
import json
import os
import gradio as gr

SIMBALS_GLOBAL_DB = 1
SIMBALS_MAIN_DATABASE = 2

from pytube import YouTube

embed_html = '<iframe width="560" height="315" src="https://www.youtube.com/embed/EngW7tLk6R8" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'

embed_html1 = '<iframe width="560" height="315" src="https://www.youtube.com/embed/'
embed_html2 = '" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'


def download_audio(id_video):
    video = YouTube(id_video)
    id = id_video.split("?v=")[-1][:11]
    audio_streams = video.streams.filter(only_audio=True)
    audio_stream = audio_streams[0]
    audio_file = audio_stream.download()
    audio_parts = audio_file.split('/')[-1]
    audio_title = '.'.join(audio_parts.split('.')[:-1])
    embed_html_all = embed_html1 + id +embed_html2
    return audio_file, audio_file, embed_html_all

def process(input_path):
    # setup the client
    audio_file, audio_file, embed_html_all = download_audio(input_path)
    client = SimbalsAPIClient(os.environ['TOKEN'], debug=True)
    parameters = client.prepare_request("31415", audio_file=audio_file)
    
    
    #client = SimbalsAPIClient(os.environ['TOKEN'], debug=True)
    #parameters = client.prepare_request("31415", audio_file=input_path)
    
    # add audio features service with an available database
    #parameters = client.add_audio_features(parameters, SIMBALS_MAIN_DATABASE)
    parameters = client.add_tags(parameters, SIMBALS_MAIN_DATABASE)
    
    # launch the request and test for ok/code values
    ok, code, job_id = client.launch_request(parameters)
    # try to get the results with obtained job_id. If code is 4, job is not finished. In all other cases, job is finished
    for i in range(1000):
        results=client.get_results(job_id)
        if results[0] != 4:
            #print(json.dumps(results[1], indent=1))
            #return json.dumps(results[1], indent=1)
            output = json.dumps(results[1], indent=1)
            break
        time.sleep(1)


    res = results[1]["response"]["tagging"]["2"]

    dict_moods = {}
    for m in res['moods']:
      dict_moods[m['name']] = m['probability']
        
    dict_desc={}
    for d in res['music_descriptors']:
      dict_desc[d['name']] = int(round(d['value']*100))
    
    genres={}
    for d in res['genres']:
      genres[d['name']] = d['probability']
        
    themes = {}
    for d in res['themes']:
      themes[d['name']]= d['probability']
        
    instruments = {}
    for d in res['instruments']:
      instruments[d['name']] = d['probability']
    
    timbres = {}
    for d in res['timbres']:
      timbres[d['name']] = d['probability']
        
    vocalgender= {}
    vocalgender[res['vocal_gender'][0]['name']]= res['vocal_gender'][0]['probability']
    
    audioquality = {}
    audioquality[res['audio_quality'][0]['name']]= res['audio_quality'][0]['probability']
    
    return embed_html_all, dict_moods, genres, instruments, vocalgender, timbres, themes, audioquality,str(dict_desc['Electric/Acoustic']),str(dict_desc['Danceability']),str(dict_desc['Arousal']),str(dict_desc['Vocal/Instrumental']),str(dict_desc['Studio/Live']),str(dict_desc['Music/Speech']),str(dict_desc['Valence']),str(dict_desc['Melodic']),str(dict_desc['Articulation']),str(dict_desc['RhythmicStability']),str(dict_desc['Dissonance']),str(dict_desc['BPM']),str(dict_desc['Binary']),str(dict_desc['Key']),str(dict_desc['Mode']),str(dict_desc['TexturalStability'])
            

 
with gr.Blocks() as demo:
    
    with gr.Row():

        with gr.Column():

            with gr.Row():
                #gr.HTML(embed_html)
                html = gr.HTML()
            
            with gr.Row():
                audio_input = gr.Textbox(placeholder='YouTube video URL', label='YouTube video URL')

            #with gr.Row():
            #    audio_input = gr.Audio(type="filepath", label='Audio Input')
                
            with gr.Row():
                analyze_btn = gr.Button('Analyze File')

            with gr.Row():
                with gr.Column():
                    gr.HTML("<h3>Moods</h3>")
                    dict_moods=gr.Label(label="Moods", show_label=False)

                    gr.HTML("<h3>Themes</h3>")
                    themes=gr.Label(label="Themes", show_label=False)
                
                with gr.Column():
                    gr.HTML("<h3>Genres</h3>")
                    genres  = gr.Label(label="Genres", show_label=False)

                    gr.HTML("<h3>BPM</h3>")
                    bpm  = gr.Textbox(label="BPM", show_label=False)

                    gr.HTML("<h3>Key</h3>")
                    key  = gr.Textbox(label="Key", show_label=False)
                    mode = gr.Textbox(label="Mode", show_label=False)

                with gr.Column():
                    gr.HTML("<h3>Instruments</h3>")
                    instruments  = gr.Label(label="Instruments", show_label=False)
                    
                    gr.HTML("<h3>Key</h3>")
                    vocalgender  = gr.Label(label="Vocal Gender", show_label=False)
                    
                    gr.HTML("<h3>Textures</h3>")
                    timbres  = gr.Label(label="Texture", show_label=False)
                    
                    gr.HTML("<h3> AudioQuality</h3>")
                    audioquality  = gr.Label(label="Audio Quality", show_label=False)

                with gr.Column():
                    gr.HTML("<h3> Descriptors</h3>")
                    #gr.HTML("<h5> Vocal/Instrumental</h5>")
                    acousticness = gr.Slider(label="Electric/Acoustic", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    danceability = gr.Slider(label="Danceability", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    arousal = gr.Slider(label="Arousal", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    instrumentalness = gr.Slider(label="Vocal/Instrumental", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    liveness = gr.Slider(label="Studio/Live", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    speechiness = gr.Slider(label="Music/Speech", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    valence = gr.Slider(label="Valence", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    melodic = gr.Slider(label="Melodic", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    articulation = gr.Slider(label="Articulation", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    rhythmicstability = gr.Slider(label="Rhythmic Stability", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    dissonance = gr.Slider(label="Dissonance", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    binary = gr.Slider(label="Binary", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    texturalstability = gr.Slider(label="Textural Stability", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)

                    
            ''''
            with gr.Row():

                with gr.Column():
                    gr.HTML("<h3>Themes</h3>")
                    themes=gr.Label(label="Themes", show_label=False)
            ''' 
    
    analyze_btn.click(process, inputs=[audio_input], 
                      outputs=[html,
                               dict_moods, 
                               genres, 
                               instruments,
                               vocalgender,
                               timbres,
                               themes, 
                               audioquality,
                               acousticness,
                               danceability,
                               arousal,
                               instrumentalness,
                               liveness,
                               speechiness,
                               valence,
                               melodic,
                               articulation,
                               rhythmicstability,
                               dissonance,
                               bpm,
                               binary,
                               key,
                               mode,
                               texturalstability
                               ])
    
demo.launch(debug=True)