Spaces:

Simbals
/

SimbalsAPI_Tagging

Runtime error

File size: 8,880 Bytes

5558062
 
 
 
 
f1a824b
5558062
 
 
 
27ecaed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5558062
 
27ecaed
5558062
27ecaed
 
 
 
 
5558062
 
 
 
 
 
 
 
 
 
 
8844de9
5558062
 
 
 
 
830f8b5
 
99236eb
812d7ec
830f8b5
812d7ec
 
 
830f8b5
798481c
 
 
 
 
 
830f8b5
812d7ec
830f8b5
812d7ec
 
9099754
830f8b5
9099754
 
 
830f8b5
9099754
830f8b5
6946728
830f8b5
6946728
 
701d5f1
6946728
830f8b5
701d5f1
6946728
830f8b5
27ecaed
baf9728
dd3cb6b
01baa28
356b9b0
 
 
 
 
 
 
27ecaed
 
 
 
 
 
 
 
356b9b0
 
 
 
 
f4f5f6a
ba15b51
f4f5f6a
558bd35
 
 
 
ba15b51
 
 
baf9728
 
31c0c8e
baf9728
 
31c0c8e
 
baf9728
ba15b51
 
 
434a807
de5ecfe
6946728
434a807
 
6946728
434a807
 
6946728
4d0049a
7513630
 
dd3cb6b
baf9728
 
 
 
 
 
 
 
 
aac7846
baf9728
 
 
dd3cb6b
 
558bd35
456a03f
 
 
 
 
558bd35
baf9728
 
27ecaed
 
baf9728
6946728
 
 
baf9728
6946728
baf9728
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356b9b0
e204a2f

from simbals_apis_public_clients.clients.services import SimbalsAPIClient
import time
import json
import os
import gradio as gr
from pytube import YouTube

SIMBALS_GLOBAL_DB = 1
SIMBALS_MAIN_DATABASE = 2

embed_html1 = '<iframe width="560" height="315" src="https://www.youtube.com/embed/'
embed_html2 = '" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'


def download_audio(id_video):
    video = YouTube(id_video)
    id = id_video.split("?v=")[-1][:11]
    audio_streams = video.streams.filter(only_audio=True)
    audio_stream = audio_streams[0]
    audio_file = audio_stream.download()
    audio_parts = audio_file.split('/')[-1]
    audio_title = '.'.join(audio_parts.split('.')[:-1])
    embed_html_all = embed_html1 + id +embed_html2
    return audio_file, audio_file, embed_html_all

def process(input_path):
    # setup the client
    audio_file, audio_file, embed_html_all = download_audio(input_path)
    client = SimbalsAPIClient(os.environ['TOKEN'], debug=True)
    parameters = client.prepare_request("31415", audio_file=audio_file)
    
    
    #client = SimbalsAPIClient(os.environ['TOKEN'], debug=True)
    #parameters = client.prepare_request("31415", audio_file=input_path)
    
    # add audio features service with an available database
    #parameters = client.add_audio_features(parameters, SIMBALS_MAIN_DATABASE)
    parameters = client.add_tags(parameters, SIMBALS_MAIN_DATABASE)
    
    # launch the request and test for ok/code values
    ok, code, job_id = client.launch_request(parameters)
    # try to get the results with obtained job_id. If code is 4, job is not finished. In all other cases, job is finished
    for i in range(1000):
        results=client.get_results(job_id)
        if results[0] != 4:
            #print(json.dumps(results[1], indent=1))
            #return json.dumps(results[1], indent=1)
            output = json.dumps(results[1], indent=1)
            break
        time.sleep(1)


    res = results[1]["response"]["tagging"]["2"]

    dict_moods = {}
    for m in res['moods']:
      dict_moods[m['name']] = m['probability']
        
    dict_desc={}
    for d in res['music_descriptors']:
        if d['name'] in ['BPM', 'Key', 'Mode']:
            dict_desc[d['name']] = d['value']
        else:
            dict_desc[d['name']] = int(round(d['value']*100))/100.0
        
        
    
    genres={}
    for d in res['genres']:
      genres[d['name']] = d['probability']
        
    themes = {}
    for d in res['themes']:
      themes[d['name']]= d['probability']
        
    instruments = {}
    for d in res['instruments']:
      instruments[d['name']] = d['probability']
    
    timbres = {}
    for d in res['timbres']:
      timbres[d['name']] = d['probability']
        
    vocalgender= {}
    vocalgender[res['vocal_gender'][0]['name']]= res['vocal_gender'][0]['probability']
    
    audioquality = {}
    audioquality[res['audio_quality'][0]['name']]= res['audio_quality'][0]['probability']
    
    return embed_html_all, dict_moods, genres, instruments, vocalgender, timbres, themes, audioquality,str(dict_desc['Electric/Acoustic']),str(dict_desc['Danceability']),str(dict_desc['Arousal']),str(dict_desc['Vocal/Instrumental']),str(dict_desc['Studio/Live']),str(dict_desc['Music/Speech']),str(dict_desc['Valence']),str(dict_desc['Melodic']),str(dict_desc['Articulation']),str(dict_desc['RhythmicStability']),str(dict_desc['Dissonance']),str(dict_desc['BPM']),str(dict_desc['Binary']),str(dict_desc['Key']),str(dict_desc['Mode']),str(dict_desc['TexturalStability'])
            

 
with gr.Blocks() as demo:
    
    with gr.Row():

        with gr.Column():

            with gr.Row():
                #gr.HTML(embed_html)
                html = gr.HTML()
            
            with gr.Row():
                audio_input = gr.Textbox(placeholder='YouTube video URL', label='YouTube video URL')

            #with gr.Row():
            #    audio_input = gr.Audio(type="filepath", label='Audio Input')
                
            with gr.Row():
                analyze_btn = gr.Button('Analyze File')

            with gr.Row():
                with gr.Column():
                    gr.HTML("<h3>Moods</h3>")
                    dict_moods=gr.Label(label="Moods", show_label=False)

                    gr.HTML("<h3>Themes</h3>")
                    themes=gr.Label(label="Themes", show_label=False)
                
                with gr.Column():
                    gr.HTML("<h3>Genres</h3>")
                    genres  = gr.Label(label="Genres", show_label=False)

                    gr.HTML("<h3>BPM</h3>")
                    bpm  = gr.Textbox(label="BPM", show_label=False)

                    gr.HTML("<h3>Key</h3>")
                    key  = gr.Textbox(label="Key", show_label=False)
                    mode = gr.Textbox(label="Mode", show_label=False)

                with gr.Column():
                    gr.HTML("<h3>Instruments</h3>")
                    instruments  = gr.Label(label="Instruments", show_label=False)
                    
                    gr.HTML("<h3> Vocal Gender</h3>")
                    vocalgender  = gr.Label(label="Vocal Gender", show_label=False)
                    
                    gr.HTML("<h3>Textures</h3>")
                    timbres  = gr.Label(label="Texture", show_label=False)
                    
                    gr.HTML("<h3> AudioQuality</h3>")
                    audioquality  = gr.Label(label="Audio Quality", show_label=False)

                with gr.Column():
                    gr.HTML("<h3> Descriptors</h3>")
                    #gr.HTML("<h5> Vocal/Instrumental</h5>")
                    acousticness = gr.Slider(label="Electric/Acoustic", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    danceability = gr.Slider(label="Danceability", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    arousal = gr.Slider(label="Arousal", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    instrumentalness = gr.Slider(label="Vocal/Instrumental", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    liveness = gr.Slider(label="Studio/Live", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    speechiness = gr.Slider(label="Music/Speech", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    valence = gr.Slider(label="Valence", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    melodic = gr.Slider(label="Melodic", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    articulation = gr.Slider(label="Articulation", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    rhythmicstability = gr.Slider(label="Rhythmic Stability", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    dissonance = gr.Slider(label="Dissonance", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    binary = gr.Slider(label="Binary", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    texturalstability = gr.Slider(label="Textural Stability", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)

                    
            ''''
            with gr.Row():

                with gr.Column():
                    gr.HTML("<h3>Themes</h3>")
                    themes=gr.Label(label="Themes", show_label=False)
            ''' 
    
    analyze_btn.click(process, inputs=[audio_input], 
                      outputs=[html,
                               dict_moods, 
                               genres, 
                               instruments,
                               vocalgender,
                               timbres,
                               themes, 
                               audioquality,
                               acousticness,
                               danceability,
                               arousal,
                               instrumentalness,
                               liveness,
                               speechiness,
                               valence,
                               melodic,
                               articulation,
                               rhythmicstability,
                               dissonance,
                               bpm,
                               binary,
                               key,
                               mode,
                               texturalstability
                               ])
    
demo.launch(debug=True)