Spaces:

Simbals
/

SimbalsAPI_Tagging

Runtime error

App Files Files Community

SimbalsAPI_Tagging / app.py

PierreHanna

Update app.py

127c67b almost 2 years ago

raw

history blame

11 kB

	from simbals_apis_public_clients.clients.services import SimbalsAPIClient
	import time
	import json
	import os
	import gradio as gr
	from pytube import YouTube

	SIMBALS_GLOBAL_DB = 1
	SIMBALS_MAIN_DATABASE = 2

	embed_html1 = '<iframe width="560" height="315" src="https://www.youtube.com/embed/'
	embed_html2 = '" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'


	def download_audio(id_video):
	video = YouTube(id_video)
	id = id_video.split("?v=")[-1][:11]
	print("ID youtube ", id)
	audio_streams = video.streams.filter(only_audio=True)
	audio_stream = audio_streams[0]
	audio_file = audio_stream.download()
	audio_parts = audio_file.split('/')[-1]
	audio_title = '.'.join(audio_parts.split('.')[:-1])
	embed_html_all = embed_html1 + id +embed_html2
	return audio_file, audio_file, embed_html_all

	def process_url(input_path):
	# setup the client
	audio_file, audio_file, embed_html_all = download_audio(input_path)
	return embed_html_all, process(audio_file)

	def process_file(input_path):
	return "", process(input_path)

	def process(audio_file):
	client = SimbalsAPIClient(os.environ['TOKEN'], debug=True)
	parameters = client.prepare_request("31415", audio_file=audio_file)


	#client = SimbalsAPIClient(os.environ['TOKEN'], debug=True)
	#parameters = client.prepare_request("31415", audio_file=input_path)

	# add audio features service with an available database
	#parameters = client.add_audio_features(parameters, SIMBALS_MAIN_DATABASE)
	parameters = client.add_tags(parameters, SIMBALS_MAIN_DATABASE)

	# launch the request and test for ok/code values
	ok, code, job_id = client.launch_request(parameters)
	# try to get the results with obtained job_id. If code is 4, job is not finished. In all other cases, job is finished
	for i in range(1000):
	results=client.get_results(job_id)
	if results[0] != 4:
	#print(json.dumps(results[1], indent=1))
	#return json.dumps(results[1], indent=1)
	output = json.dumps(results[1], indent=1)
	break
	time.sleep(1)


	res = results[1]["response"]["tagging"]["2"]

	dict_moods = {}
	for m in res['moods']:
	dict_moods[m['name']] = m['probability']

	dict_desc={}
	for d in res['music_descriptors']:
	if d['name'] in ['BPM', 'Key', 'Mode']:
	dict_desc[d['name']] = d['value']
	else:
	dict_desc[d['name']] = int(round(d['value']*100))/100.0



	genres={}
	for d in res['genres']:
	genres[d['name']] = d['probability']

	themes = {}
	for d in res['themes']:
	themes[d['name']]= d['probability']

	instruments = {}
	for d in res['instruments']:
	instruments[d['name']] = d['probability']

	timbres = {}
	for d in res['timbres']:
	timbres[d['name']] = d['probability']

	vocalgender= {}
	if res['vocal_gender'][0]['probability'] != -1:
	vocalgender[res['vocal_gender'][0]['name']]= res['vocal_gender'][0]['probability']
	else:
	vocalgender[res['vocal_gender'][0]['name']]= 0


	audioquality = {}
	audioquality[res['audio_quality'][0]['name']]= res['audio_quality'][0]['probability']

	#return embed_html_all, dict_moods, genres, instruments, vocalgender, timbres, themes, audioquality,str(dict_desc['Electric/Acoustic']),str(dict_desc['Danceability']),str(dict_desc['Arousal']),str(dict_desc['Vocal/Instrumental']),str(dict_desc['Studio/Live']),str(dict_desc['Music/Speech']),str(dict_desc['Valence']),str(dict_desc['Melodic']),str(dict_desc['Articulation']),str(dict_desc['RhythmicStability']),str(dict_desc['Dissonance']),str(dict_desc['BPM']),str(dict_desc['Binary']),str(dict_desc['Key']),str(dict_desc['Mode']),str(dict_desc['TexturalStability'])
	return dict_moods, genres, instruments, vocalgender, timbres, themes, audioquality,str(dict_desc['Electric/Acoustic']),str(dict_desc['Danceability']),str(dict_desc['Arousal']),str(dict_desc['Vocal/Instrumental']),str(dict_desc['Studio/Live']),str(dict_desc['Music/Speech']),str(dict_desc['Valence']),str(dict_desc['Melodic']),str(dict_desc['Articulation']),str(dict_desc['RhythmicStability']),str(dict_desc['Dissonance']),str(dict_desc['BPM']),str(dict_desc['Binary']),str(dict_desc['Key']),str(dict_desc['Mode']),str(dict_desc['TexturalStability'])



	with gr.Blocks() as demo:

	with gr.Row():

	with gr.Column():

	with gr.Row():
	#gr.HTML(embed_html)
	html = gr.HTML()

	with gr.Row():
	with gr.Column():
	audio_input = gr.Textbox(placeholder='YouTube video URL', label='YouTube video URL')
	analyze_url_btn = gr.Button('Analyze URL')

	with gr.Row():
	with gr.Column():
	audio_input_file = gr.Audio(type="filepath", label='Audio Input')
	analyze_file_btn = gr.Button('Analyze File')


	with gr.Row():
	with gr.Column():
	gr.HTML("<h3>Moods</h3>")
	dict_moods=gr.Label(label="Moods", show_label=False)

	gr.HTML("<h3>Themes</h3>")
	themes=gr.Label(label="Themes", show_label=False)

	with gr.Column():
	gr.HTML("<h3>Genres</h3>")
	genres = gr.Label(label="Genres", show_label=False)

	gr.HTML("<h3>BPM</h3>")
	bpm = gr.Textbox(label="BPM", show_label=False)

	gr.HTML("<h3>Key</h3>")
	key = gr.Textbox(label="Key", show_label=False)
	mode = gr.Textbox(label="Mode", show_label=False)

	with gr.Column():
	gr.HTML("<h3>Instruments</h3>")
	instruments = gr.Label(label="Instruments", show_label=False)

	gr.HTML("<h3> Vocal Gender</h3>")
	vocalgender = gr.Label(label="Vocal Gender", show_label=False)

	gr.HTML("<h3>Textures</h3>")
	timbres = gr.Label(label="Texture", show_label=False)

	gr.HTML("<h3> AudioQuality</h3>")
	audioquality = gr.Label(label="Audio Quality", show_label=False)

	with gr.Column():
	gr.HTML("<h3> Descriptors</h3>")
	#gr.HTML("<h5> Vocal/Instrumental</h5>")
	acousticness = gr.Slider(label="Electric/Acoustic", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
	danceability = gr.Slider(label="Danceability", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
	arousal = gr.Slider(label="Arousal", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
	instrumentalness = gr.Slider(label="Vocal/Instrumental", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
	liveness = gr.Slider(label="Studio/Live", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
	speechiness = gr.Slider(label="Music/Speech", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
	valence = gr.Slider(label="Valence", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
	melodic = gr.Slider(label="Melodic", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
	articulation = gr.Slider(label="Articulation", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
	rhythmicstability = gr.Slider(label="Rhythmic Stability", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
	dissonance = gr.Slider(label="Dissonance", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
	binary = gr.Slider(label="Binary", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
	texturalstability = gr.Slider(label="Textural Stability", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)


	''''
	with gr.Row():

	with gr.Column():
	gr.HTML("<h3>Themes</h3>")
	themes=gr.Label(label="Themes", show_label=False)
	'''

	analyze_url_btn.click(process, inputs=[audio_input],
	outputs=[html,
	dict_moods,
	genres,
	instruments,
	vocalgender,
	timbres,
	themes,
	audioquality,
	acousticness,
	danceability,
	arousal,
	instrumentalness,
	liveness,
	speechiness,
	valence,
	melodic,
	articulation,
	rhythmicstability,
	dissonance,
	bpm,
	binary,
	key,
	mode,
	texturalstability
	])

	analyze_file_btn.click(process_file, inputs=[audio_input],
	outputs=[html,
	dict_moods,
	genres,
	instruments,
	vocalgender,
	timbres,
	themes,
	audioquality,
	acousticness,
	danceability,
	arousal,
	instrumentalness,
	liveness,
	speechiness,
	valence,
	melodic,
	articulation,
	rhythmicstability,
	dissonance,
	bpm,
	binary,
	key,
	mode,
	texturalstability
	])


	demo.launch(debug=True)