Spaces:

Simbals
/

SimbalsAPI_Tagging

Runtime error

File size: 10,957 Bytes

5558062
 
 
 
 
3b406ca
5558062
 
 
 
27ecaed
 
 
aafa630
77e0e67
 
aafa630
 
 
 
27ecaed
 
 
aafa630
 
77e0e67
27ecaed
 
 
feb1951
5558062
27ecaed
50c1ac4
feb1951
 
50c1ac4
feb1951
7d037c0
5558062
27ecaed
285ba22
5558062
 
5b1ec57
5558062
 
 
 
 
 
 
 
 
 
 
830f8b5
 
99236eb
812d7ec
830f8b5
812d7ec
 
 
830f8b5
798481c
 
 
 
 
 
830f8b5
812d7ec
830f8b5
812d7ec
 
9099754
830f8b5
9099754
 
 
830f8b5
9099754
830f8b5
6946728
830f8b5
6946728
 
701d5f1
a718c40
 
 
 
 
830f8b5
701d5f1
6946728
830f8b5
50c1ac4
 
baf9728
dd3cb6b
01baa28
356b9b0
 
 
 
 
 
 
27ecaed
 
 
 
1064e64
50c1ac4
1064e64
27ecaed
356b9b0
1064e64
 
 
feb1951
356b9b0
 
f4f5f6a
ba15b51
f4f5f6a
558bd35
 
 
 
baf9728
31c0c8e
baf9728
 
31c0c8e
 
baf9728
ba15b51
1b8f086
 
 
 
ba15b51
 
434a807
de5ecfe
6946728
434a807
 
6946728
434a807
 
6946728
4d0049a
7513630
 
dd3cb6b
baf9728
 
 
 
 
 
 
 
 
aac7846
baf9728
 
 
dd3cb6b
 
558bd35
456a03f
 
 
 
 
558bd35
baf9728
343a53f
27ecaed
 
baf9728
6946728
 
 
baf9728
6946728
baf9728
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
feb1951
50c1ac4
feb1951
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e204a2f

from simbals_apis_public_clients.clients.services import SimbalsAPIClient
import time
import json
import os
import gradio as gr
import yt_dlp

SIMBALS_GLOBAL_DB = 1
SIMBALS_MAIN_DATABASE = 2

embed_html1 = '<iframe width="560" height="315" src="https://www.youtube.com/embed/'
embed_html2 = '" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'

def download_audio_(link):
  #with yt_dlp.YoutubeDL({'extract_audio': True, 'format': 'bestaudio', 'outtmpl': '%(title)s.mp3'}) as video:
  with yt_dlp.YoutubeDL({'extract_audio': True, 'format': 'bestaudio', 'outtmpl': 'audio.mp3'}) as video:
    info_dict = video.extract_info(link, download = True)
    video_title = info_dict['title']
    video.download(link)    
    return video_title

def download_audio(id_video):
    id = id_video.split("?v=")[-1][:11]
    audio_file = download_audio_(id_video)
    audio_file = audio_file+'.mp3'
    audio_file = 'audio.mp3'
    embed_html_all = embed_html1 + id +embed_html2
    return audio_file, audio_file, embed_html_all

def process_url(input_path):
    # setup the client
    audio_file, audio_file, embed_html_all = download_audio(input_path)
    return process(audio_file, embed_html_all)

def process_file(input_path):
    return process(input_path, '')

def process(audio_file, embed_html_all):
    client = SimbalsAPIClient(os.environ['TOKEN'], debug=True)
    parameters = client.prepare_request("31415", audio_file=audio_file)
        
    # add audio features service with an available database
    #parameters = client.add_audio_features(parameters, SIMBALS_MAIN_DATABASE)
    parameters = client.add_tagging_all(parameters, SIMBALS_MAIN_DATABASE)
    
    # launch the request and test for ok/code values
    ok, code, job_id = client.launch_request(parameters)
    # try to get the results with obtained job_id. If code is 4, job is not finished. In all other cases, job is finished
    for i in range(1000):
        results=client.get_results(job_id)
        if results[0] != 4:
            output = json.dumps(results[1], indent=1)
            break
        time.sleep(1)


    res = results[1]["response"]["tagging"]["2"]

    dict_moods = {}
    for m in res['moods']:
      dict_moods[m['name']] = m['probability']
        
    dict_desc={}
    for d in res['music_descriptors']:
        if d['name'] in ['BPM', 'Key', 'Mode']:
            dict_desc[d['name']] = d['value']
        else:
            dict_desc[d['name']] = int(round(d['value']*100))/100.0
        
        
    
    genres={}
    for d in res['genres']:
      genres[d['name']] = d['probability']
        
    themes = {}
    for d in res['themes']:
      themes[d['name']]= d['probability']
        
    instruments = {}
    for d in res['instruments']:
      instruments[d['name']] = d['probability']
    
    timbres = {}
    for d in res['timbres']:
      timbres[d['name']] = d['probability']
        
    vocalgender= {}
    if res['vocal_gender'][0]['probability'] != -1:
        vocalgender[res['vocal_gender'][0]['name']]= res['vocal_gender'][0]['probability']
    else:
        vocalgender[res['vocal_gender'][0]['name']]= 0
        
    
    audioquality = {}
    audioquality[res['audio_quality'][0]['name']]= res['audio_quality'][0]['probability']
    
    return embed_html_all, dict_moods, genres, instruments, vocalgender, timbres, themes, audioquality,str(dict_desc['Electric/Acoustic']),str(dict_desc['Danceability']),str(dict_desc['Arousal']),str(dict_desc['Vocal/Instrumental']),str(dict_desc['Studio/Live']),str(dict_desc['Music/Speech']),str(dict_desc['Valence']),str(dict_desc['Melodic']),str(dict_desc['Articulation']),str(dict_desc['RhythmicStability']),str(dict_desc['Dissonance']),str(dict_desc['BPM']),str(dict_desc['Binary']),str(dict_desc['Key']),str(dict_desc['Mode']),str(dict_desc['TexturalStability'])
    #return dict_moods, genres, instruments, vocalgender, timbres, themes, audioquality,str(dict_desc['Electric/Acoustic']),str(dict_desc['Danceability']),str(dict_desc['Arousal']),str(dict_desc['Vocal/Instrumental']),str(dict_desc['Studio/Live']),str(dict_desc['Music/Speech']),str(dict_desc['Valence']),str(dict_desc['Melodic']),str(dict_desc['Articulation']),str(dict_desc['RhythmicStability']),str(dict_desc['Dissonance']),str(dict_desc['BPM']),str(dict_desc['Binary']),str(dict_desc['Key']),str(dict_desc['Mode']),str(dict_desc['TexturalStability'])
            

 
with gr.Blocks() as demo:
    
    with gr.Row():

        with gr.Column():

            with gr.Row():
                #gr.HTML(embed_html)
                html = gr.HTML()
            
            with gr.Row():
                with gr.Column():
                    audio_url_input = gr.Textbox(placeholder='YouTube video URL', label='YouTube video URL')
                    analyze_url_btn = gr.Button('Analyze URL')

            with gr.Row():
                with gr.Column():
                    audio_input_file = gr.Audio(type="filepath", label='Audio Input')
                    analyze_file_btn = gr.Button('Analyze File')
                

            with gr.Row():
                with gr.Column():
                    gr.HTML("<h3>Moods</h3>")
                    dict_moods=gr.Label(label="Moods", show_label=False)

                    gr.HTML("<h3>Themes</h3>")
                    themes=gr.Label(label="Themes", show_label=False)
                
                    gr.HTML("<h3>BPM</h3>")
                    bpm  = gr.Textbox(label="BPM", show_label=False)

                    gr.HTML("<h3>Key</h3>")
                    key  = gr.Textbox(label="Key", show_label=False)
                    mode = gr.Textbox(label="Mode", show_label=False)

                with gr.Column():
                    gr.HTML("<h3>Genres</h3>")
                    genres  = gr.Label(label="Genres", show_label=False)

#                with gr.Column():
                    gr.HTML("<h3>Instruments</h3>")
                    instruments  = gr.Label(label="Instruments", show_label=False)
                    
                    gr.HTML("<h3> Vocal Gender</h3>")
                    vocalgender  = gr.Label(label="Vocal Gender", show_label=False)
                    
                    gr.HTML("<h3>Textures</h3>")
                    timbres  = gr.Label(label="Texture", show_label=False)
                    
                    gr.HTML("<h3> AudioQuality</h3>")
                    audioquality  = gr.Label(label="Audio Quality", show_label=False)

                with gr.Column():
                    gr.HTML("<h3> Descriptors</h3>")
                    #gr.HTML("<h5> Vocal/Instrumental</h5>")
                    acousticness = gr.Slider(label="Electric/Acoustic", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    danceability = gr.Slider(label="Danceability", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    arousal = gr.Slider(label="Arousal", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    instrumentalness = gr.Slider(label="Vocal/Instrumental", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    liveness = gr.Slider(label="Studio/Live", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    speechiness = gr.Slider(label="Music/Speech", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    valence = gr.Slider(label="Valence", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    melodic = gr.Slider(label="Melodic", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    articulation = gr.Slider(label="Articulation", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    rhythmicstability = gr.Slider(label="Rhythmic Stability", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    dissonance = gr.Slider(label="Dissonance", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    binary = gr.Slider(label="Binary", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    texturalstability = gr.Slider(label="Textural Stability", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)

                    
            ''''
            with gr.Row():

                with gr.Column():
                    gr.HTML("<h3>Themes</h3>")
                    themes=gr.Label(label="Themes", show_label=False)
            ''' 
    
    analyze_url_btn.click(process_url, inputs=[audio_url_input], 
                      outputs=[html,
                               dict_moods, 
                               genres, 
                               instruments,
                               vocalgender,
                               timbres,
                               themes, 
                               audioquality,
                               acousticness,
                               danceability,
                               arousal,
                               instrumentalness,
                               liveness,
                               speechiness,
                               valence,
                               melodic,
                               articulation,
                               rhythmicstability,
                               dissonance,
                               bpm,
                               binary,
                               key,
                               mode,
                               texturalstability
                               ])

    analyze_file_btn.click(process_file, inputs=[audio_input_file], 
                      outputs=[html,
                               dict_moods, 
                               genres, 
                               instruments,
                               vocalgender,
                               timbres,
                               themes, 
                               audioquality,
                               acousticness,
                               danceability,
                               arousal,
                               instrumentalness,
                               liveness,
                               speechiness,
                               valence,
                               melodic,
                               articulation,
                               rhythmicstability,
                               dissonance,
                               bpm,
                               binary,
                               key,
                               mode,
                               texturalstability
                               ])


demo.launch(debug=True)