File size: 10,957 Bytes
5558062
 
 
 
 
3b406ca
5558062
 
 
 
27ecaed
 
 
aafa630
77e0e67
 
aafa630
 
 
 
27ecaed
 
 
aafa630
 
77e0e67
27ecaed
 
 
feb1951
5558062
27ecaed
50c1ac4
feb1951
 
50c1ac4
feb1951
7d037c0
5558062
27ecaed
285ba22
5558062
 
5b1ec57
5558062
 
 
 
 
 
 
 
 
 
 
830f8b5
 
99236eb
812d7ec
830f8b5
812d7ec
 
 
830f8b5
798481c
 
 
 
 
 
830f8b5
812d7ec
830f8b5
812d7ec
 
9099754
830f8b5
9099754
 
 
830f8b5
9099754
830f8b5
6946728
830f8b5
6946728
 
701d5f1
a718c40
 
 
 
 
830f8b5
701d5f1
6946728
830f8b5
50c1ac4
 
baf9728
dd3cb6b
01baa28
356b9b0
 
 
 
 
 
 
27ecaed
 
 
 
1064e64
50c1ac4
1064e64
27ecaed
356b9b0
1064e64
 
 
feb1951
356b9b0
 
f4f5f6a
ba15b51
f4f5f6a
558bd35
 
 
 
baf9728
31c0c8e
baf9728
 
31c0c8e
 
baf9728
ba15b51
1b8f086
 
 
 
ba15b51
 
434a807
de5ecfe
6946728
434a807
 
6946728
434a807
 
6946728
4d0049a
7513630
 
dd3cb6b
baf9728
 
 
 
 
 
 
 
 
aac7846
baf9728
 
 
dd3cb6b
 
558bd35
456a03f
 
 
 
 
558bd35
baf9728
343a53f
27ecaed
 
baf9728
6946728
 
 
baf9728
6946728
baf9728
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
feb1951
50c1ac4
feb1951
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e204a2f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
from simbals_apis_public_clients.clients.services import SimbalsAPIClient
import time
import json
import os
import gradio as gr
import yt_dlp

SIMBALS_GLOBAL_DB = 1
SIMBALS_MAIN_DATABASE = 2

embed_html1 = '<iframe width="560" height="315" src="https://www.youtube.com/embed/'
embed_html2 = '" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'

def download_audio_(link):
  #with yt_dlp.YoutubeDL({'extract_audio': True, 'format': 'bestaudio', 'outtmpl': '%(title)s.mp3'}) as video:
  with yt_dlp.YoutubeDL({'extract_audio': True, 'format': 'bestaudio', 'outtmpl': 'audio.mp3'}) as video:
    info_dict = video.extract_info(link, download = True)
    video_title = info_dict['title']
    video.download(link)    
    return video_title

def download_audio(id_video):
    id = id_video.split("?v=")[-1][:11]
    audio_file = download_audio_(id_video)
    audio_file = audio_file+'.mp3'
    audio_file = 'audio.mp3'
    embed_html_all = embed_html1 + id +embed_html2
    return audio_file, audio_file, embed_html_all

def process_url(input_path):
    # setup the client
    audio_file, audio_file, embed_html_all = download_audio(input_path)
    return process(audio_file, embed_html_all)

def process_file(input_path):
    return process(input_path, '')

def process(audio_file, embed_html_all):
    client = SimbalsAPIClient(os.environ['TOKEN'], debug=True)
    parameters = client.prepare_request("31415", audio_file=audio_file)
        
    # add audio features service with an available database
    #parameters = client.add_audio_features(parameters, SIMBALS_MAIN_DATABASE)
    parameters = client.add_tagging_all(parameters, SIMBALS_MAIN_DATABASE)
    
    # launch the request and test for ok/code values
    ok, code, job_id = client.launch_request(parameters)
    # try to get the results with obtained job_id. If code is 4, job is not finished. In all other cases, job is finished
    for i in range(1000):
        results=client.get_results(job_id)
        if results[0] != 4:
            output = json.dumps(results[1], indent=1)
            break
        time.sleep(1)


    res = results[1]["response"]["tagging"]["2"]

    dict_moods = {}
    for m in res['moods']:
      dict_moods[m['name']] = m['probability']
        
    dict_desc={}
    for d in res['music_descriptors']:
        if d['name'] in ['BPM', 'Key', 'Mode']:
            dict_desc[d['name']] = d['value']
        else:
            dict_desc[d['name']] = int(round(d['value']*100))/100.0
        
        
    
    genres={}
    for d in res['genres']:
      genres[d['name']] = d['probability']
        
    themes = {}
    for d in res['themes']:
      themes[d['name']]= d['probability']
        
    instruments = {}
    for d in res['instruments']:
      instruments[d['name']] = d['probability']
    
    timbres = {}
    for d in res['timbres']:
      timbres[d['name']] = d['probability']
        
    vocalgender= {}
    if res['vocal_gender'][0]['probability'] != -1:
        vocalgender[res['vocal_gender'][0]['name']]= res['vocal_gender'][0]['probability']
    else:
        vocalgender[res['vocal_gender'][0]['name']]= 0
        
    
    audioquality = {}
    audioquality[res['audio_quality'][0]['name']]= res['audio_quality'][0]['probability']
    
    return embed_html_all, dict_moods, genres, instruments, vocalgender, timbres, themes, audioquality,str(dict_desc['Electric/Acoustic']),str(dict_desc['Danceability']),str(dict_desc['Arousal']),str(dict_desc['Vocal/Instrumental']),str(dict_desc['Studio/Live']),str(dict_desc['Music/Speech']),str(dict_desc['Valence']),str(dict_desc['Melodic']),str(dict_desc['Articulation']),str(dict_desc['RhythmicStability']),str(dict_desc['Dissonance']),str(dict_desc['BPM']),str(dict_desc['Binary']),str(dict_desc['Key']),str(dict_desc['Mode']),str(dict_desc['TexturalStability'])
    #return dict_moods, genres, instruments, vocalgender, timbres, themes, audioquality,str(dict_desc['Electric/Acoustic']),str(dict_desc['Danceability']),str(dict_desc['Arousal']),str(dict_desc['Vocal/Instrumental']),str(dict_desc['Studio/Live']),str(dict_desc['Music/Speech']),str(dict_desc['Valence']),str(dict_desc['Melodic']),str(dict_desc['Articulation']),str(dict_desc['RhythmicStability']),str(dict_desc['Dissonance']),str(dict_desc['BPM']),str(dict_desc['Binary']),str(dict_desc['Key']),str(dict_desc['Mode']),str(dict_desc['TexturalStability'])
            

 
with gr.Blocks() as demo:
    
    with gr.Row():

        with gr.Column():

            with gr.Row():
                #gr.HTML(embed_html)
                html = gr.HTML()
            
            with gr.Row():
                with gr.Column():
                    audio_url_input = gr.Textbox(placeholder='YouTube video URL', label='YouTube video URL')
                    analyze_url_btn = gr.Button('Analyze URL')

            with gr.Row():
                with gr.Column():
                    audio_input_file = gr.Audio(type="filepath", label='Audio Input')
                    analyze_file_btn = gr.Button('Analyze File')
                

            with gr.Row():
                with gr.Column():
                    gr.HTML("<h3>Moods</h3>")
                    dict_moods=gr.Label(label="Moods", show_label=False)

                    gr.HTML("<h3>Themes</h3>")
                    themes=gr.Label(label="Themes", show_label=False)
                
                    gr.HTML("<h3>BPM</h3>")
                    bpm  = gr.Textbox(label="BPM", show_label=False)

                    gr.HTML("<h3>Key</h3>")
                    key  = gr.Textbox(label="Key", show_label=False)
                    mode = gr.Textbox(label="Mode", show_label=False)

                with gr.Column():
                    gr.HTML("<h3>Genres</h3>")
                    genres  = gr.Label(label="Genres", show_label=False)

#                with gr.Column():
                    gr.HTML("<h3>Instruments</h3>")
                    instruments  = gr.Label(label="Instruments", show_label=False)
                    
                    gr.HTML("<h3> Vocal Gender</h3>")
                    vocalgender  = gr.Label(label="Vocal Gender", show_label=False)
                    
                    gr.HTML("<h3>Textures</h3>")
                    timbres  = gr.Label(label="Texture", show_label=False)
                    
                    gr.HTML("<h3> AudioQuality</h3>")
                    audioquality  = gr.Label(label="Audio Quality", show_label=False)

                with gr.Column():
                    gr.HTML("<h3> Descriptors</h3>")
                    #gr.HTML("<h5> Vocal/Instrumental</h5>")
                    acousticness = gr.Slider(label="Electric/Acoustic", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    danceability = gr.Slider(label="Danceability", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    arousal = gr.Slider(label="Arousal", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    instrumentalness = gr.Slider(label="Vocal/Instrumental", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    liveness = gr.Slider(label="Studio/Live", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    speechiness = gr.Slider(label="Music/Speech", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    valence = gr.Slider(label="Valence", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    melodic = gr.Slider(label="Melodic", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    articulation = gr.Slider(label="Articulation", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    rhythmicstability = gr.Slider(label="Rhythmic Stability", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    dissonance = gr.Slider(label="Dissonance", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    binary = gr.Slider(label="Binary", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    texturalstability = gr.Slider(label="Textural Stability", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)

                    
            ''''
            with gr.Row():

                with gr.Column():
                    gr.HTML("<h3>Themes</h3>")
                    themes=gr.Label(label="Themes", show_label=False)
            ''' 
    
    analyze_url_btn.click(process_url, inputs=[audio_url_input], 
                      outputs=[html,
                               dict_moods, 
                               genres, 
                               instruments,
                               vocalgender,
                               timbres,
                               themes, 
                               audioquality,
                               acousticness,
                               danceability,
                               arousal,
                               instrumentalness,
                               liveness,
                               speechiness,
                               valence,
                               melodic,
                               articulation,
                               rhythmicstability,
                               dissonance,
                               bpm,
                               binary,
                               key,
                               mode,
                               texturalstability
                               ])

    analyze_file_btn.click(process_file, inputs=[audio_input_file], 
                      outputs=[html,
                               dict_moods, 
                               genres, 
                               instruments,
                               vocalgender,
                               timbres,
                               themes, 
                               audioquality,
                               acousticness,
                               danceability,
                               arousal,
                               instrumentalness,
                               liveness,
                               speechiness,
                               valence,
                               melodic,
                               articulation,
                               rhythmicstability,
                               dissonance,
                               bpm,
                               binary,
                               key,
                               mode,
                               texturalstability
                               ])


demo.launch(debug=True)