PierreHanna's picture
Update app.py
7d037c0
raw
history blame
11 kB
from simbals_apis_public_clients.clients.services import SimbalsAPIClient
import time
import json
import os
import gradio as gr
from pytube import YouTube
SIMBALS_GLOBAL_DB = 1
SIMBALS_MAIN_DATABASE = 2
embed_html1 = '<iframe width="560" height="315" src="https://www.youtube.com/embed/'
embed_html2 = '" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'
def download_audio(id_video):
video = YouTube(id_video)
id = id_video.split("?v=")[-1][:11]
print("ID youtube ", id)
audio_streams = video.streams.filter(only_audio=True)
audio_stream = audio_streams[0]
audio_file = audio_stream.download()
audio_parts = audio_file.split('/')[-1]
audio_title = '.'.join(audio_parts.split('.')[:-1])
embed_html_all = embed_html1 + id +embed_html2
return audio_file, audio_file, embed_html_all
def process_url(input_path):
# setup the client
audio_file, audio_file, embed_html_all = download_audio(input_path)
return process(audio_file, embed_html_all)
def process_file(input_path):
return process(input_path, '')
def process(audio_file, embed_html_all):
client = SimbalsAPIClient(os.environ['TOKEN'], debug=True)
parameters = client.prepare_request("31415", audio_file=audio_file)
#client = SimbalsAPIClient(os.environ['TOKEN'], debug=True)
#parameters = client.prepare_request("31415", audio_file=input_path)
# add audio features service with an available database
#parameters = client.add_audio_features(parameters, SIMBALS_MAIN_DATABASE)
parameters = client.add_tags(parameters, SIMBALS_MAIN_DATABASE)
# launch the request and test for ok/code values
ok, code, job_id = client.launch_request(parameters)
# try to get the results with obtained job_id. If code is 4, job is not finished. In all other cases, job is finished
for i in range(1000):
results=client.get_results(job_id)
if results[0] != 4:
#print(json.dumps(results[1], indent=1))
#return json.dumps(results[1], indent=1)
output = json.dumps(results[1], indent=1)
break
time.sleep(1)
res = results[1]["response"]["tagging"]["2"]
dict_moods = {}
for m in res['moods']:
dict_moods[m['name']] = m['probability']
dict_desc={}
for d in res['music_descriptors']:
if d['name'] in ['BPM', 'Key', 'Mode']:
dict_desc[d['name']] = d['value']
else:
dict_desc[d['name']] = int(round(d['value']*100))/100.0
genres={}
for d in res['genres']:
genres[d['name']] = d['probability']
themes = {}
for d in res['themes']:
themes[d['name']]= d['probability']
instruments = {}
for d in res['instruments']:
instruments[d['name']] = d['probability']
timbres = {}
for d in res['timbres']:
timbres[d['name']] = d['probability']
vocalgender= {}
if res['vocal_gender'][0]['probability'] != -1:
vocalgender[res['vocal_gender'][0]['name']]= res['vocal_gender'][0]['probability']
else:
vocalgender[res['vocal_gender'][0]['name']]= 0
audioquality = {}
audioquality[res['audio_quality'][0]['name']]= res['audio_quality'][0]['probability']
return embed_html_all, dict_moods, genres, instruments, vocalgender, timbres, themes, audioquality,str(dict_desc['Electric/Acoustic']),str(dict_desc['Danceability']),str(dict_desc['Arousal']),str(dict_desc['Vocal/Instrumental']),str(dict_desc['Studio/Live']),str(dict_desc['Music/Speech']),str(dict_desc['Valence']),str(dict_desc['Melodic']),str(dict_desc['Articulation']),str(dict_desc['RhythmicStability']),str(dict_desc['Dissonance']),str(dict_desc['BPM']),str(dict_desc['Binary']),str(dict_desc['Key']),str(dict_desc['Mode']),str(dict_desc['TexturalStability'])
#return dict_moods, genres, instruments, vocalgender, timbres, themes, audioquality,str(dict_desc['Electric/Acoustic']),str(dict_desc['Danceability']),str(dict_desc['Arousal']),str(dict_desc['Vocal/Instrumental']),str(dict_desc['Studio/Live']),str(dict_desc['Music/Speech']),str(dict_desc['Valence']),str(dict_desc['Melodic']),str(dict_desc['Articulation']),str(dict_desc['RhythmicStability']),str(dict_desc['Dissonance']),str(dict_desc['BPM']),str(dict_desc['Binary']),str(dict_desc['Key']),str(dict_desc['Mode']),str(dict_desc['TexturalStability'])
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
with gr.Row():
#gr.HTML(embed_html)
html = gr.HTML()
with gr.Row():
with gr.Column():
audio_url_input = gr.Textbox(placeholder='YouTube video URL', label='YouTube video URL')
analyze_url_btn = gr.Button('Analyze URL')
with gr.Row():
with gr.Column():
audio_input_file = gr.Audio(type="filepath", label='Audio Input')
analyze_file_btn = gr.Button('Analyze File')
with gr.Row():
with gr.Column():
gr.HTML("<h3>Moods</h3>")
dict_moods=gr.Label(label="Moods", show_label=False)
gr.HTML("<h3>Themes</h3>")
themes=gr.Label(label="Themes", show_label=False)
with gr.Column():
gr.HTML("<h3>Genres</h3>")
genres = gr.Label(label="Genres", show_label=False)
gr.HTML("<h3>BPM</h3>")
bpm = gr.Textbox(label="BPM", show_label=False)
gr.HTML("<h3>Key</h3>")
key = gr.Textbox(label="Key", show_label=False)
mode = gr.Textbox(label="Mode", show_label=False)
with gr.Column():
gr.HTML("<h3>Instruments</h3>")
instruments = gr.Label(label="Instruments", show_label=False)
gr.HTML("<h3> Vocal Gender</h3>")
vocalgender = gr.Label(label="Vocal Gender", show_label=False)
gr.HTML("<h3>Textures</h3>")
timbres = gr.Label(label="Texture", show_label=False)
gr.HTML("<h3> AudioQuality</h3>")
audioquality = gr.Label(label="Audio Quality", show_label=False)
with gr.Column():
gr.HTML("<h3> Descriptors</h3>")
#gr.HTML("<h5> Vocal/Instrumental</h5>")
acousticness = gr.Slider(label="Electric/Acoustic", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
danceability = gr.Slider(label="Danceability", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
arousal = gr.Slider(label="Arousal", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
instrumentalness = gr.Slider(label="Vocal/Instrumental", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
liveness = gr.Slider(label="Studio/Live", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
speechiness = gr.Slider(label="Music/Speech", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
valence = gr.Slider(label="Valence", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
melodic = gr.Slider(label="Melodic", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
articulation = gr.Slider(label="Articulation", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
rhythmicstability = gr.Slider(label="Rhythmic Stability", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
dissonance = gr.Slider(label="Dissonance", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
binary = gr.Slider(label="Binary", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
texturalstability = gr.Slider(label="Textural Stability", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
''''
with gr.Row():
with gr.Column():
gr.HTML("<h3>Themes</h3>")
themes=gr.Label(label="Themes", show_label=False)
'''
analyze_url_btn.click(process_url, inputs=[audio_url_input],
outputs=[html,
dict_moods,
genres,
instruments,
vocalgender,
timbres,
themes,
audioquality,
acousticness,
danceability,
arousal,
instrumentalness,
liveness,
speechiness,
valence,
melodic,
articulation,
rhythmicstability,
dissonance,
bpm,
binary,
key,
mode,
texturalstability
])
analyze_file_btn.click(process_file, inputs=[audio_input_file],
outputs=[html,
dict_moods,
genres,
instruments,
vocalgender,
timbres,
themes,
audioquality,
acousticness,
danceability,
arousal,
instrumentalness,
liveness,
speechiness,
valence,
melodic,
articulation,
rhythmicstability,
dissonance,
bpm,
binary,
key,
mode,
texturalstability
])
demo.launch(debug=True)