from simbals_apis_public_clients.clients.services import SimbalsAPIClient import time import json import os import gradio as gr from pytube import YouTube SIMBALS_GLOBAL_DB = 1 SIMBALS_MAIN_DATABASE = 2 embed_html1 = '' def download_audio(id_video): video = YouTube(id_video) id = id_video.split("?v=")[-1][:11] audio_streams = video.streams.filter(only_audio=True) audio_stream = audio_streams[0] audio_file = audio_stream.download() audio_parts = audio_file.split('/')[-1] audio_title = '.'.join(audio_parts.split('.')[:-1]) embed_html_all = embed_html1 + id +embed_html2 return audio_file, audio_file, embed_html_all def process(input_path): # setup the client audio_file, audio_file, embed_html_all = download_audio(input_path) client = SimbalsAPIClient(os.environ['TOKEN'], debug=True) parameters = client.prepare_request("31415", audio_file=audio_file) #client = SimbalsAPIClient(os.environ['TOKEN'], debug=True) #parameters = client.prepare_request("31415", audio_file=input_path) # add audio features service with an available database #parameters = client.add_audio_features(parameters, SIMBALS_MAIN_DATABASE) parameters = client.add_tags(parameters, SIMBALS_MAIN_DATABASE) # launch the request and test for ok/code values ok, code, job_id = client.launch_request(parameters) # try to get the results with obtained job_id. If code is 4, job is not finished. In all other cases, job is finished for i in range(1000): results=client.get_results(job_id) if results[0] != 4: #print(json.dumps(results[1], indent=1)) #return json.dumps(results[1], indent=1) output = json.dumps(results[1], indent=1) break time.sleep(1) res = results[1]["response"]["tagging"]["2"] dict_moods = {} for m in res['moods']: dict_moods[m['name']] = m['probability'] dict_desc={} for d in res['music_descriptors']: if d['name'] in ['BPM', 'Key', 'Mode']: dict_desc[d['name']] = d['value'] else: dict_desc[d['name']] = int(round(d['value']*100))/100.0 genres={} for d in res['genres']: genres[d['name']] = d['probability'] themes = {} for d in res['themes']: themes[d['name']]= d['probability'] instruments = {} for d in res['instruments']: instruments[d['name']] = d['probability'] timbres = {} for d in res['timbres']: timbres[d['name']] = d['probability'] vocalgender= {} if res['vocal_gender'][0]['probability'] != -1: vocalgender[res['vocal_gender'][0]['name']]= res['vocal_gender'][0]['probability'] else: vocalgender[res['vocal_gender'][0]['name']]= 0 audioquality = {} audioquality[res['audio_quality'][0]['name']]= res['audio_quality'][0]['probability'] return embed_html_all, dict_moods, genres, instruments, vocalgender, timbres, themes, audioquality,str(dict_desc['Electric/Acoustic']),str(dict_desc['Danceability']),str(dict_desc['Arousal']),str(dict_desc['Vocal/Instrumental']),str(dict_desc['Studio/Live']),str(dict_desc['Music/Speech']),str(dict_desc['Valence']),str(dict_desc['Melodic']),str(dict_desc['Articulation']),str(dict_desc['RhythmicStability']),str(dict_desc['Dissonance']),str(dict_desc['BPM']),str(dict_desc['Binary']),str(dict_desc['Key']),str(dict_desc['Mode']),str(dict_desc['TexturalStability']) with gr.Blocks() as demo: with gr.Row(): with gr.Column(): with gr.Row(): #gr.HTML(embed_html) html = gr.HTML() with gr.Row(): audio_input = gr.Textbox(placeholder='YouTube video URL', label='YouTube video URL') #with gr.Row(): # audio_input = gr.Audio(type="filepath", label='Audio Input') with gr.Row(): analyze_btn = gr.Button('Analyze File') with gr.Row(): with gr.Column(): gr.HTML("

Moods

") dict_moods=gr.Label(label="Moods", show_label=False) gr.HTML("

Themes

") themes=gr.Label(label="Themes", show_label=False) with gr.Column(): gr.HTML("

Genres

") genres = gr.Label(label="Genres", show_label=False) gr.HTML("

BPM

") bpm = gr.Textbox(label="BPM", show_label=False) gr.HTML("

Key

") key = gr.Textbox(label="Key", show_label=False) mode = gr.Textbox(label="Mode", show_label=False) with gr.Column(): gr.HTML("

Instruments

") instruments = gr.Label(label="Instruments", show_label=False) gr.HTML("

Vocal Gender

") vocalgender = gr.Label(label="Vocal Gender", show_label=False) gr.HTML("

Textures

") timbres = gr.Label(label="Texture", show_label=False) gr.HTML("

AudioQuality

") audioquality = gr.Label(label="Audio Quality", show_label=False) with gr.Column(): gr.HTML("

Descriptors

") #gr.HTML("
Vocal/Instrumental
") acousticness = gr.Slider(label="Electric/Acoustic", minimum=0, maximum=1.0)#, info="Information todo", show_label=False) danceability = gr.Slider(label="Danceability", minimum=0, maximum=1.0)#, info="Information todo", show_label=False) arousal = gr.Slider(label="Arousal", minimum=0, maximum=1.0)#, info="Information todo", show_label=False) instrumentalness = gr.Slider(label="Vocal/Instrumental", minimum=0, maximum=1.0)#, info="Information todo", show_label=False) liveness = gr.Slider(label="Studio/Live", minimum=0, maximum=1.0)#, info="Information todo", show_label=False) speechiness = gr.Slider(label="Music/Speech", minimum=0, maximum=1.0)#, info="Information todo", show_label=False) valence = gr.Slider(label="Valence", minimum=0, maximum=1.0)#, info="Information todo", show_label=False) melodic = gr.Slider(label="Melodic", minimum=0, maximum=1.0)#, info="Information todo", show_label=False) articulation = gr.Slider(label="Articulation", minimum=0, maximum=1.0)#, info="Information todo", show_label=False) rhythmicstability = gr.Slider(label="Rhythmic Stability", minimum=0, maximum=1.0)#, info="Information todo", show_label=False) dissonance = gr.Slider(label="Dissonance", minimum=0, maximum=1.0)#, info="Information todo", show_label=False) binary = gr.Slider(label="Binary", minimum=0, maximum=1.0)#, info="Information todo", show_label=False) texturalstability = gr.Slider(label="Textural Stability", minimum=0, maximum=1.0)#, info="Information todo", show_label=False) '''' with gr.Row(): with gr.Column(): gr.HTML("

Themes

") themes=gr.Label(label="Themes", show_label=False) ''' analyze_btn.click(process, inputs=[audio_input], outputs=[html, dict_moods, genres, instruments, vocalgender, timbres, themes, audioquality, acousticness, danceability, arousal, instrumentalness, liveness, speechiness, valence, melodic, articulation, rhythmicstability, dissonance, bpm, binary, key, mode, texturalstability ]) demo.launch(debug=True)