from simbals_apis_public_clients.clients.services import SimbalsAPIClient import time import json import os import gradio as gr import yt_dlp SIMBALS_GLOBAL_DB = 1 SIMBALS_MAIN_DATABASE = 2 embed_html1 = '' def download_audio_(link): #with yt_dlp.YoutubeDL({'extract_audio': True, 'format': 'bestaudio', 'outtmpl': '%(title)s.mp3'}) as video: with yt_dlp.YoutubeDL({'extract_audio': True, 'format': 'bestaudio', 'outtmpl': 'audio.mp3'}) as video: info_dict = video.extract_info(link, download = True) video_title = info_dict['title'] video.download(link) return video_title def download_audio(id_video): id = id_video.split("?v=")[-1][:11] audio_file = download_audio_(id_video) #audio_file = audio_file+'.mp3' audio_file = 'audio.mp3' embed_html_all = embed_html1 + id +embed_html2 return audio_file, audio_file, embed_html_all def process_url(input_path): # setup the client audio_file, audio_file, embed_html_all = download_audio(input_path) return process(audio_file, embed_html_all) def process_file(input_path): return process(input_path, '') def process(audio_file, embed_html_all): client = SimbalsAPIClient(os.environ['TOKEN'], debug=True) parameters = client.prepare_request("31415", audio_file=audio_file) # add audio features service with an available database #parameters = client.add_audio_features(parameters, SIMBALS_MAIN_DATABASE) parameters = client.add_tagging_all(parameters, SIMBALS_MAIN_DATABASE) # launch the request and test for ok/code values ok, code, job_id = client.launch_request(parameters) # try to get the results with obtained job_id. If code is 4, job is not finished. In all other cases, job is finished for i in range(1000): results=client.get_results(job_id) if results[0] != 4: output = json.dumps(results[1], indent=1) break time.sleep(1) os.system('rm -f audio.mp3') res = results[1]["response"]["tagging"]["2"] dict_moods = {} for m in res['moods']: dict_moods[m['name']] = m['probability'] dict_desc={} for d in res['music_descriptors']: if d['name'] in ['BPM', 'Key', 'Mode']: dict_desc[d['name']] = d['value'] else: dict_desc[d['name']] = int(round(d['value']*100))/100.0 genres={} for d in res['genres']: genres[d['name']] = d['probability'] themes = {} for d in res['themes']: themes[d['name']]= d['probability'] instruments = {} for d in res['instruments']: instruments[d['name']] = d['probability'] timbres = {} for d in res['timbres']: timbres[d['name']] = d['probability'] vocalgender= {} if res['vocal_gender'][0]['probability'] != -1: vocalgender[res['vocal_gender'][0]['name']]= res['vocal_gender'][0]['probability'] else: vocalgender[res['vocal_gender'][0]['name']]= 0 musicratio = res['music_ratio'] audioquality = {} audioquality[res['audio_quality'][0]['name']]= res['audio_quality'][0]['probability'] return embed_html_all, dict_moods, genres, instruments, vocalgender, timbres, themes, audioquality,str(dict_desc['Electric/Acoustic']),str(dict_desc['Danceability']),str(dict_desc['Arousal']),str(dict_desc['Vocal/Instrumental']),str(dict_desc['Studio/Live']),str(dict_desc['Music/Speech']),str(dict_desc['Valence']),str(dict_desc['Melodic']),str(dict_desc['Articulation']),str(dict_desc['RhythmicStability']),str(dict_desc['Dissonance']),str(dict_desc['BPM']),str(dict_desc['Binary']),str(dict_desc['Key']),str(dict_desc['Mode']),str(dict_desc['TexturalStability']),musicratio #return dict_moods, genres, instruments, vocalgender, timbres, themes, audioquality,str(dict_desc['Electric/Acoustic']),str(dict_desc['Danceability']),str(dict_desc['Arousal']),str(dict_desc['Vocal/Instrumental']),str(dict_desc['Studio/Live']),str(dict_desc['Music/Speech']),str(dict_desc['Valence']),str(dict_desc['Melodic']),str(dict_desc['Articulation']),str(dict_desc['RhythmicStability']),str(dict_desc['Dissonance']),str(dict_desc['BPM']),str(dict_desc['Binary']),str(dict_desc['Key']),str(dict_desc['Mode']),str(dict_desc['TexturalStability']) with gr.Blocks() as demo: with gr.Row(): with gr.Column(): with gr.Row(): #gr.HTML(embed_html) html = gr.HTML() with gr.Row(): with gr.Column(): audio_url_input = gr.Textbox(placeholder='YouTube video URL', label='YouTube video URL') analyze_url_btn = gr.Button('Analyze URL') with gr.Row(): with gr.Column(): audio_input_file = gr.Audio(type="filepath", label='Audio Input') analyze_file_btn = gr.Button('Analyze File') with gr.Row(): with gr.Column(): gr.HTML("

Moods

") dict_moods=gr.Label(label="Moods", show_label=False) gr.HTML("

Themes

") themes=gr.Label(label="Themes", show_label=False) gr.HTML("

BPM

") bpm = gr.Textbox(label="BPM", show_label=False) gr.HTML("

Key

") key = gr.Textbox(label="Key", show_label=False) mode = gr.Textbox(label="Mode", show_label=False) with gr.Column(): gr.HTML("

Genres

") genres = gr.Label(label="Genres", show_label=False) # with gr.Column(): gr.HTML("

Instruments

") instruments = gr.Label(label="Instruments", show_label=False) gr.HTML("

Vocal Gender

") vocalgender = gr.Label(label="Vocal Gender", show_label=False) gr.HTML("

Textures

") timbres = gr.Label(label="Texture", show_label=False) gr.HTML("

AudioQuality

") audioquality = gr.Label(label="Audio Quality", show_label=False) with gr.Column(): gr.HTML("

Descriptors

") #gr.HTML("
Vocal/Instrumental
") musicratio = gr.Slider(label="Music Ratio", minimum=0, maximum=1.0)#, info="Information todo", show_label=False) acousticness = gr.Slider(label="Electric/Acoustic", minimum=0, maximum=1.0)#, info="Information todo", show_label=False) danceability = gr.Slider(label="Danceability", minimum=0, maximum=1.0)#, info="Information todo", show_label=False) arousal = gr.Slider(label="Arousal", minimum=0, maximum=1.0)#, info="Information todo", show_label=False) instrumentalness = gr.Slider(label="Vocal/Instrumental", minimum=0, maximum=1.0)#, info="Information todo", show_label=False) liveness = gr.Slider(label="Studio/Live", minimum=0, maximum=1.0)#, info="Information todo", show_label=False) speechiness = gr.Slider(label="Music/Speech", minimum=0, maximum=1.0)#, info="Information todo", show_label=False) valence = gr.Slider(label="Valence", minimum=0, maximum=1.0)#, info="Information todo", show_label=False) melodic = gr.Slider(label="Melodic", minimum=0, maximum=1.0)#, info="Information todo", show_label=False) articulation = gr.Slider(label="Articulation", minimum=0, maximum=1.0)#, info="Information todo", show_label=False) rhythmicstability = gr.Slider(label="Rhythmic Stability", minimum=0, maximum=1.0)#, info="Information todo", show_label=False) dissonance = gr.Slider(label="Dissonance", minimum=0, maximum=1.0)#, info="Information todo", show_label=False) binary = gr.Slider(label="Binary", minimum=0, maximum=1.0)#, info="Information todo", show_label=False) texturalstability = gr.Slider(label="Textural Stability", minimum=0, maximum=1.0)#, info="Information todo", show_label=False) '''' with gr.Row(): with gr.Column(): gr.HTML("

Themes

") themes=gr.Label(label="Themes", show_label=False) ''' analyze_url_btn.click(process_url, inputs=[audio_url_input], outputs=[html, dict_moods, genres, instruments, vocalgender, timbres, themes, audioquality, acousticness, danceability, arousal, instrumentalness, liveness, speechiness, valence, melodic, articulation, rhythmicstability, dissonance, bpm, binary, key, mode, texturalstability, musicratio ]) gr.Examples( examples = [["https://www.youtube.com/watch?v=v2AC41dglnM"], ["https://www.youtube.com/watch?v=We4-lJe0JRY"], ["https://www.youtube.com/watch?v=HavnIXPxnjk"], ["https://www.youtube.com/watch?v=LuKm4L9ryB0"], ["https://www.youtube.com/watch?v=-s5-VXG7Qio"], ["https://www.youtube.com/watch?v=n4CXVuODVhY"], ["https://www.youtube.com/watch?v=Nj2U6rhnucI"], ["https://www.youtube.com/watch?v=RlrNPBfOowg"], ["https://www.youtube.com/watch?v=6BOHpjIZyx0"], ["https://www.youtube.com/watch?v=QMbvpftTEUs"], ["https://www.youtube.com/watch?v=irVDVag2br4"]], inputs = [audio_url_input], outputs=[html, dict_moods, genres, instruments, vocalgender, timbres, themes, audioquality, acousticness, danceability, arousal, instrumentalness, liveness, speechiness, valence, melodic, articulation, rhythmicstability, dissonance, bpm, binary, key, mode, texturalstability, musicratio ], fn = process_url, examples_per_page=15, cache_examples=True, ) analyze_file_btn.click(process_file, inputs=[audio_input_file], outputs=[html, dict_moods, genres, instruments, vocalgender, timbres, themes, audioquality, acousticness, danceability, arousal, instrumentalness, liveness, speechiness, valence, melodic, articulation, rhythmicstability, dissonance, bpm, binary, key, mode, texturalstability, musicratio ]) demo.launch(debug=True)