File size: 8,880 Bytes
5558062
 
 
 
 
f1a824b
5558062
 
 
 
27ecaed
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5558062
 
27ecaed
5558062
27ecaed
 
 
 
 
5558062
 
 
 
 
 
 
 
 
 
 
8844de9
5558062
 
 
 
 
830f8b5
 
99236eb
812d7ec
830f8b5
812d7ec
 
 
830f8b5
798481c
 
 
 
 
 
830f8b5
812d7ec
830f8b5
812d7ec
 
9099754
830f8b5
9099754
 
 
830f8b5
9099754
830f8b5
6946728
830f8b5
6946728
 
701d5f1
6946728
830f8b5
701d5f1
6946728
830f8b5
27ecaed
baf9728
dd3cb6b
01baa28
356b9b0
 
 
 
 
 
 
27ecaed
 
 
 
 
 
 
 
356b9b0
 
 
 
 
f4f5f6a
ba15b51
f4f5f6a
558bd35
 
 
 
ba15b51
 
 
baf9728
 
31c0c8e
baf9728
 
31c0c8e
 
baf9728
ba15b51
 
 
434a807
de5ecfe
6946728
434a807
 
6946728
434a807
 
6946728
4d0049a
7513630
 
dd3cb6b
baf9728
 
 
 
 
 
 
 
 
aac7846
baf9728
 
 
dd3cb6b
 
558bd35
456a03f
 
 
 
 
558bd35
baf9728
 
27ecaed
 
baf9728
6946728
 
 
baf9728
6946728
baf9728
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
356b9b0
e204a2f
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
from simbals_apis_public_clients.clients.services import SimbalsAPIClient
import time
import json
import os
import gradio as gr
from pytube import YouTube

SIMBALS_GLOBAL_DB = 1
SIMBALS_MAIN_DATABASE = 2

embed_html1 = '<iframe width="560" height="315" src="https://www.youtube.com/embed/'
embed_html2 = '" title="YouTube video player" frameborder="0" allow="accelerometer; autoplay; clipboard-write; encrypted-media; gyroscope; picture-in-picture" allowfullscreen></iframe>'


def download_audio(id_video):
    video = YouTube(id_video)
    id = id_video.split("?v=")[-1][:11]
    audio_streams = video.streams.filter(only_audio=True)
    audio_stream = audio_streams[0]
    audio_file = audio_stream.download()
    audio_parts = audio_file.split('/')[-1]
    audio_title = '.'.join(audio_parts.split('.')[:-1])
    embed_html_all = embed_html1 + id +embed_html2
    return audio_file, audio_file, embed_html_all

def process(input_path):
    # setup the client
    audio_file, audio_file, embed_html_all = download_audio(input_path)
    client = SimbalsAPIClient(os.environ['TOKEN'], debug=True)
    parameters = client.prepare_request("31415", audio_file=audio_file)
    
    
    #client = SimbalsAPIClient(os.environ['TOKEN'], debug=True)
    #parameters = client.prepare_request("31415", audio_file=input_path)
    
    # add audio features service with an available database
    #parameters = client.add_audio_features(parameters, SIMBALS_MAIN_DATABASE)
    parameters = client.add_tags(parameters, SIMBALS_MAIN_DATABASE)
    
    # launch the request and test for ok/code values
    ok, code, job_id = client.launch_request(parameters)
    # try to get the results with obtained job_id. If code is 4, job is not finished. In all other cases, job is finished
    for i in range(1000):
        results=client.get_results(job_id)
        if results[0] != 4:
            #print(json.dumps(results[1], indent=1))
            #return json.dumps(results[1], indent=1)
            output = json.dumps(results[1], indent=1)
            break
        time.sleep(1)


    res = results[1]["response"]["tagging"]["2"]

    dict_moods = {}
    for m in res['moods']:
      dict_moods[m['name']] = m['probability']
        
    dict_desc={}
    for d in res['music_descriptors']:
        if d['name'] in ['BPM', 'Key', 'Mode']:
            dict_desc[d['name']] = d['value']
        else:
            dict_desc[d['name']] = int(round(d['value']*100))/100.0
        
        
    
    genres={}
    for d in res['genres']:
      genres[d['name']] = d['probability']
        
    themes = {}
    for d in res['themes']:
      themes[d['name']]= d['probability']
        
    instruments = {}
    for d in res['instruments']:
      instruments[d['name']] = d['probability']
    
    timbres = {}
    for d in res['timbres']:
      timbres[d['name']] = d['probability']
        
    vocalgender= {}
    vocalgender[res['vocal_gender'][0]['name']]= res['vocal_gender'][0]['probability']
    
    audioquality = {}
    audioquality[res['audio_quality'][0]['name']]= res['audio_quality'][0]['probability']
    
    return embed_html_all, dict_moods, genres, instruments, vocalgender, timbres, themes, audioquality,str(dict_desc['Electric/Acoustic']),str(dict_desc['Danceability']),str(dict_desc['Arousal']),str(dict_desc['Vocal/Instrumental']),str(dict_desc['Studio/Live']),str(dict_desc['Music/Speech']),str(dict_desc['Valence']),str(dict_desc['Melodic']),str(dict_desc['Articulation']),str(dict_desc['RhythmicStability']),str(dict_desc['Dissonance']),str(dict_desc['BPM']),str(dict_desc['Binary']),str(dict_desc['Key']),str(dict_desc['Mode']),str(dict_desc['TexturalStability'])
            

 
with gr.Blocks() as demo:
    
    with gr.Row():

        with gr.Column():

            with gr.Row():
                #gr.HTML(embed_html)
                html = gr.HTML()
            
            with gr.Row():
                audio_input = gr.Textbox(placeholder='YouTube video URL', label='YouTube video URL')

            #with gr.Row():
            #    audio_input = gr.Audio(type="filepath", label='Audio Input')
                
            with gr.Row():
                analyze_btn = gr.Button('Analyze File')

            with gr.Row():
                with gr.Column():
                    gr.HTML("<h3>Moods</h3>")
                    dict_moods=gr.Label(label="Moods", show_label=False)

                    gr.HTML("<h3>Themes</h3>")
                    themes=gr.Label(label="Themes", show_label=False)
                
                with gr.Column():
                    gr.HTML("<h3>Genres</h3>")
                    genres  = gr.Label(label="Genres", show_label=False)

                    gr.HTML("<h3>BPM</h3>")
                    bpm  = gr.Textbox(label="BPM", show_label=False)

                    gr.HTML("<h3>Key</h3>")
                    key  = gr.Textbox(label="Key", show_label=False)
                    mode = gr.Textbox(label="Mode", show_label=False)

                with gr.Column():
                    gr.HTML("<h3>Instruments</h3>")
                    instruments  = gr.Label(label="Instruments", show_label=False)
                    
                    gr.HTML("<h3> Vocal Gender</h3>")
                    vocalgender  = gr.Label(label="Vocal Gender", show_label=False)
                    
                    gr.HTML("<h3>Textures</h3>")
                    timbres  = gr.Label(label="Texture", show_label=False)
                    
                    gr.HTML("<h3> AudioQuality</h3>")
                    audioquality  = gr.Label(label="Audio Quality", show_label=False)

                with gr.Column():
                    gr.HTML("<h3> Descriptors</h3>")
                    #gr.HTML("<h5> Vocal/Instrumental</h5>")
                    acousticness = gr.Slider(label="Electric/Acoustic", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    danceability = gr.Slider(label="Danceability", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    arousal = gr.Slider(label="Arousal", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    instrumentalness = gr.Slider(label="Vocal/Instrumental", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    liveness = gr.Slider(label="Studio/Live", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    speechiness = gr.Slider(label="Music/Speech", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    valence = gr.Slider(label="Valence", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    melodic = gr.Slider(label="Melodic", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    articulation = gr.Slider(label="Articulation", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    rhythmicstability = gr.Slider(label="Rhythmic Stability", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    dissonance = gr.Slider(label="Dissonance", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    binary = gr.Slider(label="Binary", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)
                    texturalstability = gr.Slider(label="Textural Stability", minimum=0, maximum=1.0)#, info="Information todo", show_label=False)

                    
            ''''
            with gr.Row():

                with gr.Column():
                    gr.HTML("<h3>Themes</h3>")
                    themes=gr.Label(label="Themes", show_label=False)
            ''' 
    
    analyze_btn.click(process, inputs=[audio_input], 
                      outputs=[html,
                               dict_moods, 
                               genres, 
                               instruments,
                               vocalgender,
                               timbres,
                               themes, 
                               audioquality,
                               acousticness,
                               danceability,
                               arousal,
                               instrumentalness,
                               liveness,
                               speechiness,
                               valence,
                               melodic,
                               articulation,
                               rhythmicstability,
                               dissonance,
                               bpm,
                               binary,
                               key,
                               mode,
                               texturalstability
                               ])
    
demo.launch(debug=True)