PierreHanna's picture
Update app.py
70561f0
raw
history blame
11.9 kB
from simbals_apis_public_clients.clients.services import SimbalsAPIClient
import time
import json
import os
import gradio as gr
import yt_dlp
SIMBALS_GLOBAL_DB = 1
SIMBALS_MAIN_DATABASE = 2
embed_html1 = '<iframe width="560" height="315" src="https://www.youtube.com/embed/'
embed_html2 = '" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'
def download_audio_(link):
#with yt_dlp.YoutubeDL({'extract_audio': True, 'format': 'bestaudio', 'outtmpl': '%(title)s.mp3'}) as video:
with yt_dlp.YoutubeDL({'extract_audio': True, 'format': 'bestaudio', 'outtmpl': 'audio.mp3'}) as video:
info_dict = video.extract_info(link, download = True)
video_title = info_dict['title']
video.download(link)
return video_title
def download_audio(id_video):
id = id_video.split("?v=")[-1][:11]
audio_file = download_audio_(id_video)
#audio_file = audio_file+'.mp3'
audio_file = 'audio.mp3'
embed_html_all = embed_html1 + id +embed_html2
return audio_file, audio_file, embed_html_all
def process_url(input_path):
# setup the client
audio_file, audio_file, embed_html_all = download_audio(input_path)
return process(audio_file, embed_html_all)
def process_file(input_path):
return process(input_path, '')
def process(audio_file, embed_html_all):
client = SimbalsAPIClient(os.environ['TOKEN'], debug=True)
parameters = client.prepare_request("31415", audio_file=audio_file)
# add audio features service with an available database
#parameters = client.add_audio_features(parameters, SIMBALS_MAIN_DATABASE)
parameters = client.add_tagging_all(parameters, SIMBALS_MAIN_DATABASE)
# launch the request and test for ok/code values
ok, code, job_id = client.launch_request(parameters)
# try to get the results with obtained job_id. If code is 4, job is not finished. In all other cases, job is finished
for i in range(1000):
results=client.get_results(job_id)
if results[0] != 4:
output = json.dumps(results[1], indent=1)
break
time.sleep(1)
os.system('rm -f audio.mp3')
res = results[1]["response"]["tagging"]["2"]
dict_moods = {}
for m in res['moods']:
dict_moods[m['name']] = m['probability']
dict_desc={}
for d in res['music_descriptors']:
if d['name'] in ['BPM', 'Key', 'Mode']:
dict_desc[d['name']] = d['value']
else:
dict_desc[d['name']] = int(round(d['value']*100))/100.0
genres={}
for d in res['genres']:
genres[d['name']] = d['probability']
themes = {}
for d in res['themes']:
themes[d['name']]= d['probability']
instruments = {}
for d in res['instruments']:
instruments[d['name']] = d['probability']
timbres = {}
for d in res['timbres']:
timbres[d['name']] = d['probability']
vocalgender= {}
if res['vocal_gender'][0]['probability'] != -1:
vocalgender[res['vocal_gender'][0]['name']]= res['vocal_gender'][0]['probability']
else:
vocalgender[res['vocal_gender'][0]['name']]= 0
audioquality = {}
audioquality[res['audio_quality'][0]['name']]= res['audio_quality'][0]['probability']
return embed_html_all, dict_moods, genres, instruments, vocalgender, timbres, themes, audioquality,str(dict_desc['Electric/Acoustic']),str(dict_desc['Danceability']),str(dict_desc['Arousal']),str(dict_desc['Vocal/Instrumental']),str(dict_desc['Studio/Live']),str(dict_desc['Music/Speech']),str(dict_desc['Valence']),str(dict_desc['Melodic']),str(dict_desc['Articulation']),str(dict_desc['RhythmicStability']),str(dict_desc['Dissonance']),str(dict_desc['BPM']),str(dict_desc['Binary']),str(dict_desc['Key']),str(dict_desc['Mode']),str(dict_desc['TexturalStability'])
#return dict_moods, genres, instruments, vocalgender, timbres, themes, audioquality,str(dict_desc['Electric/Acoustic']),str(dict_desc['Danceability']),str(dict_desc['Arousal']),str(dict_desc['Vocal/Instrumental']),str(dict_desc['Studio/Live']),str(dict_desc['Music/Speech']),str(dict_desc['Valence']),str(dict_desc['Melodic']),str(dict_desc['Articulation']),str(dict_desc['RhythmicStability']),str(dict_desc['Dissonance']),str(dict_desc['BPM']),str(dict_desc['Binary']),str(dict_desc['Key']),str(dict_desc['Mode']),str(dict_desc['TexturalStability'])
with gr.Blocks() as demo:
with gr.Row():
with gr.Column():
with gr.Row():
#gr.HTML(embed_html)
html = gr.HTML()
with gr.Row():
with gr.Column():
audio_url_input = gr.Textbox(placeholder='YouTube video URL', label='YouTube video URL')
analyze_url_btn = gr.Button('Analyze URL')
with gr.Row():
with gr.Column():
audio_input_file = gr.Audio(type="filepath", label='Audio Input')
analyze_file_btn = gr.Button('Analyze File')
with gr.Row():
with gr.Column():
gr.HTML("<h3>Moods</h3>")
dict_moods=gr.Label(label="Moods", show_label=False)
gr.HTML("<h3>Themes</h3>")
themes=gr.Label(label="Themes", show_label=False)
gr.HTML("<h3>BPM</h3>")
bpm = gr.Textbox(label="BPM", show_label=False)
gr.HTML("<h3>Key</h3>")
key = gr.Textbox(label="Key", show_label=False)
mode = gr.Textbox(label="Mode", show_label=False)
with gr.Column():
gr.HTML("<h3>Genres</h3>")
genres = gr.Label(label="Genres", show_label=False)
# with gr.Column():
gr.HTML("<h3>Instruments</h3>")
instruments = gr.Label(label="Instruments", show_label=False)
gr.HTML("<h3> Vocal Gender</h3>")
vocalgender = gr.Label(label="Vocal Gender", show_label=False)
gr.HTML("<h3>Textures</h3>")
timbres = gr.Label(label="Texture", show_label=False)
gr.HTML("<h3> AudioQuality</h3>")
audioquality = gr.Label(label="Audio Quality", show_label=False)
with gr.Column():
gr.HTML("<h3> Descriptors</h3>")
#gr.HTML("<h5> Vocal/Instrumental</h5>")
acousticness = gr.Slider(label="Electric/Acoustic", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
danceability = gr.Slider(label="Danceability", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
arousal = gr.Slider(label="Arousal", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
instrumentalness = gr.Slider(label="Vocal/Instrumental", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
liveness = gr.Slider(label="Studio/Live", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
speechiness = gr.Slider(label="Music/Speech", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
valence = gr.Slider(label="Valence", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
melodic = gr.Slider(label="Melodic", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
articulation = gr.Slider(label="Articulation", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
rhythmicstability = gr.Slider(label="Rhythmic Stability", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
dissonance = gr.Slider(label="Dissonance", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
binary = gr.Slider(label="Binary", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
texturalstability = gr.Slider(label="Textural Stability", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
''''
with gr.Row():
with gr.Column():
gr.HTML("<h3>Themes</h3>")
themes=gr.Label(label="Themes", show_label=False)
'''
analyze_url_btn.click(process_url, inputs=[audio_url_input],
outputs=[html,
dict_moods,
genres,
instruments,
vocalgender,
timbres,
themes,
audioquality,
acousticness,
danceability,
arousal,
instrumentalness,
liveness,
speechiness,
valence,
melodic,
articulation,
rhythmicstability,
dissonance,
bpm,
binary,
key,
mode,
texturalstability
])
gr.Examples(
examples = [["https://www.youtube.com/watch?v=QLWkl5YpiD0"],
["https://www.youtube.com/watch?v=u2INmt0dCD8"]],
inputs = [audio_url_input],
outputs=[html,
dict_moods,
genres,
instruments,
vocalgender,
timbres,
themes,
audioquality,
acousticness,
danceability,
arousal,
instrumentalness,
liveness,
speechiness,
valence,
melodic,
articulation,
rhythmicstability,
dissonance,
bpm,
binary,
key,
mode,
texturalstability
],
fn = process_url,
examples_per_page=15,
cache_examples=True,
)
analyze_file_btn.click(process_file, inputs=[audio_input_file],
outputs=[html,
dict_moods,
genres,
instruments,
vocalgender,
timbres,
themes,
audioquality,
acousticness,
danceability,
arousal,
instrumentalness,
liveness,
speechiness,
valence,
melodic,
articulation,
rhythmicstability,
dissonance,
bpm,
binary,
key,
mode,
texturalstability
])
demo.launch(debug=True)