Sylvain Filoni commited on
Commit
7fb6157
β€’
1 Parent(s): 3d381f7

added duration controls

Browse files
Files changed (5) hide show
  1. README.md +1 -1
  2. app.py +87 -41
  3. constants.py +9 -0
  4. requirements.txt +5 -0
  5. utils.py +50 -0
README.md CHANGED
@@ -4,7 +4,7 @@ emoji: πŸŒ…πŸŽΆ
4
  colorFrom: green
5
  colorTo: purple
6
  sdk: gradio
7
- sdk_version: 3.9.1
8
  app_file: app.py
9
  pinned: false
10
  ---
4
  colorFrom: green
5
  colorTo: purple
6
  sdk: gradio
7
+ sdk_version: 3.15.0
8
  app_file: app.py
9
  pinned: false
10
  ---
app.py CHANGED
@@ -1,4 +1,11 @@
 
 
1
  import gradio as gr
 
 
 
 
 
2
  import os
3
  import requests
4
  import urllib
@@ -7,7 +14,6 @@ from os import path
7
  from pydub import AudioSegment
8
 
9
  img_to_text = gr.Blocks.load(name="spaces/pharma/CLIP-Interrogator")
10
- text_to_music = gr.Interface.load("spaces/fffiloni/text-2-music")
11
 
12
  from share_btn import community_icon_html, loading_icon_html, share_js
13
 
@@ -15,22 +21,59 @@ def get_prompts(uploaded_image):
15
 
16
  prompt = img_to_text(uploaded_image, "ViT-L (best for Stable Diffusion 1.*)", "fast", fn_index=1)[0]
17
 
18
- music_result = get_music(prompt)
19
 
20
- return music_result
21
 
22
- def get_music(prompt):
23
-
24
- result = text_to_music(prompt, fn_index=0)
25
-
26
- print(f"""β€”β€”β€”β€”β€”
27
- NEW RESULTS
28
- prompt : {prompt}
29
- music : {result}
30
- β€”β€”β€”β€”β€”β€”β€”
31
- """)
32
-
33
- url = result
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
34
  save_as = "file.mp3"
35
 
36
  data = urllib.request.urlopen(url)
@@ -44,7 +87,7 @@ def get_music(prompt):
44
  sound = AudioSegment.from_mp3(save_as)
45
  sound.export(wave_file, format="wav")
46
 
47
- return wave_file, gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
48
 
49
  css = """
50
  #col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
@@ -79,39 +122,42 @@ a {text-decoration-line: underline; font-weight: 600;}
79
  """
80
 
81
  with gr.Blocks(css=css) as demo:
82
- with gr.Column(elem_id="col-container"):
83
- gr.HTML("""<div style="text-align: center; max-width: 700px; margin: 0 auto;">
84
- <div
85
- style="
86
- display: inline-flex;
87
- align-items: center;
88
- gap: 0.8rem;
89
- font-size: 1.75rem;
90
- "
91
- >
92
- <h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;">
93
- Image to Music
94
- </h1>
95
- </div>
96
- <p style="margin-bottom: 10px; font-size: 94%">
97
- Sends an image in to <a href="https://huggingface.co/spaces/pharma/CLIP-Interrogator" target="_blank">CLIP Interrogator</a>
98
- to generate a text prompt which is then run through
99
- <a href="https://huggingface.co/Mubert" target="_blank">Mubert</a> text-to-music to generate music from the input image!
100
- </p>
101
- </div>""")
102
 
103
 
104
  input_img = gr.Image(type="filepath", elem_id="input-img")
 
 
 
105
  generate = gr.Button("Generate Music from Image")
106
 
107
  music_output = gr.Audio(label="Result", type="filepath", elem_id="music-output")
108
 
109
  with gr.Group(elem_id="share-btn-container"):
110
- community_icon = gr.HTML(community_icon_html, visible=False)
111
- loading_icon = gr.HTML(loading_icon_html, visible=False)
112
- share_button = gr.Button("Share to community", elem_id="share-btn", visible=False)
113
 
114
- generate.click(get_prompts, inputs=[input_img], outputs=[music_output, share_button, community_icon, loading_icon], api_name="i2m")
115
- share_button.click(None, [], [], _js=share_js)
116
 
117
  demo.queue(max_size=32, concurrency_count=20).launch()
1
+ import time
2
+ import base64
3
  import gradio as gr
4
+ from sentence_transformers import SentenceTransformer
5
+
6
+ import httpx
7
+ import json
8
+
9
  import os
10
  import requests
11
  import urllib
14
  from pydub import AudioSegment
15
 
16
  img_to_text = gr.Blocks.load(name="spaces/pharma/CLIP-Interrogator")
 
17
 
18
  from share_btn import community_icon_html, loading_icon_html, share_js
19
 
21
 
22
  prompt = img_to_text(uploaded_image, "ViT-L (best for Stable Diffusion 1.*)", "fast", fn_index=1)[0]
23
 
24
+ music_result = generate_track_by_prompt(prompt, duration, gen_intensity, audio_format)
25
 
26
+ return music_result[0], gr.update(visible=True), gr.update(visible=True), gr.update(visible=True)
27
 
28
+ from utils import get_tags_for_prompts, get_mubert_tags_embeddings, get_pat
29
+
30
+ minilm = SentenceTransformer('all-MiniLM-L6-v2')
31
+ mubert_tags_embeddings = get_mubert_tags_embeddings(minilm)
32
+
33
+
34
+ def get_track_by_tags(tags, pat, duration, gen_intensity, maxit=20, loop=False):
35
+ if loop:
36
+ mode = "loop"
37
+ else:
38
+ mode = "track"
39
+ r = httpx.post('https://api-b2b.mubert.com/v2/RecordTrackTTM',
40
+ json={
41
+ "method": "RecordTrackTTM",
42
+ "params": {
43
+ "pat": pat,
44
+ "duration": duration,
45
+ "format": "wav",
46
+ "intensity":gen_intensity,
47
+ "tags": tags,
48
+ "mode": mode
49
+ }
50
+ })
51
+
52
+ rdata = json.loads(r.text)
53
+ assert rdata['status'] == 1, rdata['error']['text']
54
+ trackurl = rdata['data']['tasks'][0]['download_link']
55
+
56
+ print('Generating track ', end='')
57
+ for i in range(maxit):
58
+ r = httpx.get(trackurl)
59
+ if r.status_code == 200:
60
+ return trackurl
61
+ time.sleep(1)
62
+
63
+
64
+ def generate_track_by_prompt(prompt, duration, gen_intensity):
65
+ try:
66
+ pat = get_pat("prodia@prodia.com")
67
+ _, tags = get_tags_for_prompts(minilm, mubert_tags_embeddings, [prompt, ])[0]
68
+ result = get_track_by_tags(tags, pat, int(duration), gen_intensity, loop=False)
69
+ print(result)
70
+ return result, ",".join(tags), "Success"
71
+ except Exception as e:
72
+ return None, "", str(e)
73
+
74
+ def convert_mp3_to_wav(mp3_filepath):
75
+
76
+ url = mp3_filepath
77
  save_as = "file.mp3"
78
 
79
  data = urllib.request.urlopen(url)
87
  sound = AudioSegment.from_mp3(save_as)
88
  sound.export(wave_file, format="wav")
89
 
90
+ return wave_file
91
 
92
  css = """
93
  #col-container {max-width: 700px; margin-left: auto; margin-right: auto;}
122
  """
123
 
124
  with gr.Blocks(css=css) as demo:
125
+ with gr.Column(elem_id="col-container"):
126
+ gr.HTML("""<div style="text-align: center; max-width: 700px; margin: 0 auto;">
127
+ <div
128
+ style="
129
+ display: inline-flex;
130
+ align-items: center;
131
+ gap: 0.8rem;
132
+ font-size: 1.75rem;
133
+ "
134
+ >
135
+ <h1 style="font-weight: 900; margin-bottom: 7px; margin-top: 5px;">
136
+ Image to Music
137
+ </h1>
138
+ </div>
139
+ <p style="margin-bottom: 10px; font-size: 94%">
140
+ Sends an image in to <a href="https://huggingface.co/spaces/pharma/CLIP-Interrogator" target="_blank">CLIP Interrogator</a>
141
+ to generate a text prompt which is then run through
142
+ <a href="https://huggingface.co/Mubert" target="_blank">Mubert</a> text-to-music to generate music from the input image!
143
+ </p>
144
+ </div>""")
145
 
146
 
147
  input_img = gr.Image(type="filepath", elem_id="input-img")
148
+ with gr.Row():
149
+ track_duration = gr.Slider(minimum=20, maximum=120, value=30, step=5, label="Track duration", elem_id="duration-inp")
150
+ gen_intensity = gr.Dropdown(choices=["low", "medium", "high"], value="high", label="Complexity")
151
  generate = gr.Button("Generate Music from Image")
152
 
153
  music_output = gr.Audio(label="Result", type="filepath", elem_id="music-output")
154
 
155
  with gr.Group(elem_id="share-btn-container"):
156
+ community_icon = gr.HTML(community_icon_html, visible=False)
157
+ loading_icon = gr.HTML(loading_icon_html, visible=False)
158
+ share_button = gr.Button("Share to community", elem_id="share-btn", visible=False)
159
 
160
+ generate.click(get_prompts, inputs=[input_img,track_duration,gen_intensity], outputs=[music_output, share_button, community_icon, loading_icon], api_name="i2m")
161
+ share_button.click(None, [], [], _js=share_js)
162
 
163
  demo.queue(max_size=32, concurrency_count=20).launch()
constants.py ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
1
+ import numpy as np
2
+ import os
3
+
4
+ MUBERT_LICENCE = os.environ.get('MUBERT_LICENCE')
5
+ MUBERT_TOKEN = os.environ.get('MUBERT_TOKEN')
6
+
7
+ MUBERT_MODE = "loop"
8
+ MUBERT_TAGS_STRING = 'tribal,action,kids,neo-classic,run 130,pumped,jazz / funk,ethnic,dubtechno,reggae,acid jazz,liquidfunk,funk,witch house,tech house,underground,artists,mystical,disco,sensorium,r&b,agender,psychedelic trance / psytrance,peaceful,run 140,piano,run 160,setting,meditation,christmas,ambient,horror,cinematic,electro house,idm,bass,minimal,underscore,drums,glitchy,beautiful,technology,tribal house,country pop,jazz & funk,documentary,space,classical,valentines,chillstep,experimental,trap,new jack swing,drama,post-rock,tense,corporate,neutral,happy,analog,funky,spiritual,sberzvuk special,chill hop,dramatic,catchy,holidays,fitness 90,optimistic,orchestra,acid techno,energizing,romantic,minimal house,breaks,hyper pop,warm up,dreamy,dark,urban,microfunk,dub,nu disco,vogue,keys,hardcore,aggressive,indie,electro funk,beauty,relaxing,trance,pop,hiphop,soft,acoustic,chillrave / ethno-house,deep techno,angry,dance,fun,dubstep,tropical,latin pop,heroic,world music,inspirational,uplifting,atmosphere,art,epic,advertising,chillout,scary,spooky,slow ballad,saxophone,summer,erotic,jazzy,energy 100,kara mar,xmas,atmospheric,indie pop,hip-hop,yoga,reggaeton,lounge,travel,running,folk,chillrave & ethno-house,detective,darkambient,chill,fantasy,minimal techno,special,night,tropical house,downtempo,lullaby,meditative,upbeat,glitch hop,fitness,neurofunk,sexual,indie rock,future pop,jazz,cyberpunk,melancholic,happy hardcore,family / kids,synths,electric guitar,comedy,psychedelic trance & psytrance,edm,psychedelic rock,calm,zen,bells,podcast,melodic house,ethnic percussion,nature,heavy,bassline,indie dance,techno,drumnbass,synth pop,vaporwave,sad,8-bit,chillgressive,deep,orchestral,futuristic,hardtechno,nostalgic,big room,sci-fi,tutorial,joyful,pads,minimal 170,drill,ethnic 108,amusing,sleepy ambient,psychill,italo disco,lofi,house,acoustic guitar,bassline house,rock,k-pop,synthwave,deep house,electronica,gabber,nightlife,sport & fitness,road trip,celebration,electro,disco house,electronic'
9
+ MUBERT_TAGS = np.array(MUBERT_TAGS_STRING.split(','))
requirements.txt CHANGED
@@ -1,3 +1,8 @@
 
 
 
 
 
1
  pydub
2
  ffmpeg
3
  requests
1
+ httpx
2
+ sentence-transformers
3
+ ffmpeg
4
+ audio2numpy
5
+
6
  pydub
7
  ffmpeg
8
  requests
utils.py ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import json
2
+ import numpy as np
3
+ import httpx
4
+ import os
5
+
6
+ from constants import MUBERT_TAGS, MUBERT_MODE, MUBERT_LICENCE, MUBERT_TOKEN
7
+
8
+ def get_mubert_tags_embeddings(w2v_model):
9
+ return w2v_model.encode(MUBERT_TAGS)
10
+
11
+
12
+ def get_pat(email: str):
13
+ r = httpx.post('https://api-b2b.mubert.com/v2/GetServiceAccess',
14
+ json={
15
+ "method": "GetServiceAccess",
16
+ "params": {
17
+ "email": email,
18
+ "license": MUBERT_LICENCE,
19
+ "token": MUBERT_TOKEN,
20
+ "mode": MUBERT_MODE,
21
+ }
22
+ })
23
+
24
+ rdata = json.loads(r.text)
25
+ assert rdata['status'] == 1, "probably incorrect e-mail"
26
+ pat = rdata['data']['pat']
27
+ return pat
28
+
29
+
30
+ def find_similar(em, embeddings, method='cosine'):
31
+ scores = []
32
+ for ref in embeddings:
33
+ if method == 'cosine':
34
+ scores.append(1 - np.dot(ref, em) / (np.linalg.norm(ref) * np.linalg.norm(em)))
35
+ if method == 'norm':
36
+ scores.append(np.linalg.norm(ref - em))
37
+ return np.array(scores), np.argsort(scores)
38
+
39
+
40
+ def get_tags_for_prompts(w2v_model, mubert_tags_embeddings, prompts, top_n=3, debug=False):
41
+ prompts_embeddings = w2v_model.encode(prompts)
42
+ ret = []
43
+ for i, pe in enumerate(prompts_embeddings):
44
+ scores, idxs = find_similar(pe, mubert_tags_embeddings)
45
+ top_tags = MUBERT_TAGS[idxs[:top_n]]
46
+ top_prob = 1 - scores[idxs[:top_n]]
47
+ if debug:
48
+ print(f"Prompt: {prompts[i]}\nTags: {', '.join(top_tags)}\nScores: {top_prob}\n\n\n")
49
+ ret.append((prompts[i], list(top_tags)))
50
+ return ret