MantraDas commited on
Commit
b31e324
1 Parent(s): 0b5812c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +24 -68
app.py CHANGED
@@ -10,11 +10,9 @@
10
  import argparse
11
  from concurrent.futures import ProcessPoolExecutor
12
  import os
13
- from pathlib import Path
14
  import subprocess as sp
15
  from tempfile import NamedTemporaryFile
16
  import time
17
- import typing as tp
18
  import warnings
19
 
20
  import torch
@@ -52,29 +50,6 @@ def interrupt():
52
  INTERRUPTING = True
53
 
54
 
55
- class FileCleaner:
56
- def __init__(self, file_lifetime: float = 3600):
57
- self.file_lifetime = file_lifetime
58
- self.files = []
59
-
60
- def add(self, path: tp.Union[str, Path]):
61
- self._cleanup()
62
- self.files.append((time.time(), Path(path)))
63
-
64
- def _cleanup(self):
65
- now = time.time()
66
- for time_added, path in list(self.files):
67
- if now - time_added > self.file_lifetime:
68
- if path.exists():
69
- path.unlink()
70
- self.files.pop(0)
71
- else:
72
- break
73
-
74
-
75
- file_cleaner = FileCleaner()
76
-
77
-
78
  def make_waveform(*args, **kwargs):
79
  # Further remove some warnings.
80
  be = time.time()
@@ -128,12 +103,8 @@ def _do_predictions(texts, melodies, duration, progress=False, **gen_kwargs):
128
  file.name, output, MODEL.sample_rate, strategy="loudness",
129
  loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
130
  out_files.append(pool.submit(make_waveform, file.name))
131
- file_cleaner.add(file.name)
132
  res = [out_file.result() for out_file in out_files]
133
- for file in res:
134
- file_cleaner.add(file)
135
  print("batch finished", len(texts), time.time() - be)
136
- print("Tempfiles currently stored: ", len(file_cleaner.files))
137
  return res
138
 
139
 
@@ -169,20 +140,19 @@ def predict_full(model, text, melody, duration, topk, topp, temperature, cfg_coe
169
  top_k=topk, top_p=topp, temperature=temperature, cfg_coef=cfg_coef)
170
  return outs[0]
171
 
172
-
173
  def toggle_audio_src(choice):
174
  if choice == "mic":
175
  return gr.update(source="microphone", value=None, label="Microphone")
176
  else:
177
  return gr.update(source="upload", value=None, label="File")
178
-
179
-
180
  def ui_full(launch_kwargs):
181
  with gr.Blocks() as interface:
182
  gr.Markdown(
183
  """
184
- # Cyberpunk DJ
185
- This is a cyberpunk DJ app used to make FUTURISTIC 31st CENTURY AI HIP HOP BEATS
 
186
  """
187
  )
188
  with gr.Row():
@@ -190,29 +160,24 @@ def ui_full(launch_kwargs):
190
  with gr.Row():
191
  text = gr.Text(label="Input Text", interactive=True)
192
  with gr.Column():
193
- radio = gr.Radio(["file", "mic"], value="file",
194
- label="Condition on a melody (optional) File or Mic")
195
- melody = gr.Audio(source="upload", type="numpy", label="File",
196
- interactive=True, elem_id="melody-input")
197
  with gr.Row():
198
  submit = gr.Button("Submit")
199
  # Adapted from https://github.com/rkfg/audiocraft/blob/long/app.py, MIT license.
200
  _ = gr.Button("Interrupt").click(fn=interrupt, queue=False)
201
  with gr.Row():
202
- model = gr.Radio(["melody", "medium", "small", "large"],
203
- label="Model", value="melody", interactive=True)
204
  with gr.Row():
205
- duration = gr.Slider(minimum=1, maximum=30, value=30, label="Duration", interactive=True)
206
  with gr.Row():
207
- topk = gr.Number(label="Randomness Value", value=250, interactive=True)
208
- topp = gr.Number(label="Beat Tweaking Value", value=0, interactive=True)
209
- temperature = gr.Number(label="Dubstep Glitch Value", value=1.0, interactive=True)
210
- cfg_coef = gr.Number(label="Beatnik Trip Hop Value", value=3.0, interactive=True)
211
  with gr.Column():
212
- output = gr.Video(label="Finished Cypberpunk DJ Masterpiece Audio File")
213
- submit.click(predict_full,
214
- inputs=[model, text, melody, duration, topk, topp, temperature, cfg_coef],
215
- outputs=[output])
216
  radio.change(toggle_audio_src, radio, [melody], queue=False, show_progress=False)
217
  gr.Examples(
218
  fn=predict_full,
@@ -256,20 +221,17 @@ def ui_full(launch_kwargs):
256
  This can take a long time, and the model might lose consistency. The model might also
257
  decide at arbitrary positions that the song ends.
258
 
259
- **WARNING:** Choosing long durations will take a long time to generate (2min might take ~10min).
260
- An overlap of 12 seconds is kept with the previously generated chunk, and 18 "new" seconds
261
- are generated each time.
262
 
263
  We present 4 model variations:
264
- 1. Melody -- a music generation model capable of generating music condition
265
- on text and melody inputs. **Note**, you can also use text only.
266
  2. Small -- a 300M transformer decoder conditioned on text only.
267
  3. Medium -- a 1.5B transformer decoder conditioned on text only.
268
  4. Large -- a 3.3B transformer decoder conditioned on text only (might OOM for the longest sequences.)
269
 
270
  When using `melody`, ou can optionaly provide a reference audio from
271
- which a broad melody will be extracted. The model will then try to follow both
272
- the description and melody provided.
273
 
274
  You can also use your own GPU or a Google Colab by following the instructions on our repo.
275
  See [github.com/facebookresearch/audiocraft](https://github.com/facebookresearch/audiocraft)
@@ -286,14 +248,11 @@ def ui_batched(launch_kwargs):
286
  """
287
  # MusicGen
288
 
289
- This is the demo for [MusicGen](https://github.com/facebookresearch/audiocraft),
290
- a simple and controllable model for music generation
291
  presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284).
292
  <br/>
293
- <a href="https://huggingface.co/spaces/facebook/MusicGen?duplicate=true"
294
- style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank">
295
- <img style="margin-bottom: 0em;display: inline;margin-top: -.25em;"
296
- src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
297
  for longer sequences, more control and no queue.</p>
298
  """
299
  )
@@ -302,16 +261,13 @@ def ui_batched(launch_kwargs):
302
  with gr.Row():
303
  text = gr.Text(label="Describe your music", lines=2, interactive=True)
304
  with gr.Column():
305
- radio = gr.Radio(["file", "mic"], value="file",
306
- label="Condition on a melody (optional) File or Mic")
307
- melody = gr.Audio(source="upload", type="numpy", label="File",
308
- interactive=True, elem_id="melody-input")
309
  with gr.Row():
310
  submit = gr.Button("Generate")
311
  with gr.Column():
312
  output = gr.Video(label="Generated Music")
313
- submit.click(predict_batched, inputs=[text, melody],
314
- outputs=[output], batch=True, max_batch_size=MAX_BATCH_SIZE)
315
  radio.change(toggle_audio_src, radio, [melody], queue=False, show_progress=False)
316
  gr.Examples(
317
  fn=predict_batched,
 
10
  import argparse
11
  from concurrent.futures import ProcessPoolExecutor
12
  import os
 
13
  import subprocess as sp
14
  from tempfile import NamedTemporaryFile
15
  import time
 
16
  import warnings
17
 
18
  import torch
 
50
  INTERRUPTING = True
51
 
52
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
53
  def make_waveform(*args, **kwargs):
54
  # Further remove some warnings.
55
  be = time.time()
 
103
  file.name, output, MODEL.sample_rate, strategy="loudness",
104
  loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
105
  out_files.append(pool.submit(make_waveform, file.name))
 
106
  res = [out_file.result() for out_file in out_files]
 
 
107
  print("batch finished", len(texts), time.time() - be)
 
108
  return res
109
 
110
 
 
140
  top_k=topk, top_p=topp, temperature=temperature, cfg_coef=cfg_coef)
141
  return outs[0]
142
 
 
143
  def toggle_audio_src(choice):
144
  if choice == "mic":
145
  return gr.update(source="microphone", value=None, label="Microphone")
146
  else:
147
  return gr.update(source="upload", value=None, label="File")
148
+
 
149
  def ui_full(launch_kwargs):
150
  with gr.Blocks() as interface:
151
  gr.Markdown(
152
  """
153
+ # MusicGen
154
+ This is your private demo for [MusicGen](https://github.com/facebookresearch/audiocraft), a simple and controllable model for music generation
155
+ presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284)
156
  """
157
  )
158
  with gr.Row():
 
160
  with gr.Row():
161
  text = gr.Text(label="Input Text", interactive=True)
162
  with gr.Column():
163
+ radio = gr.Radio(["file", "mic"], value="file", label="Condition on a melody (optional) File or Mic")
164
+ melody = gr.Audio(source="upload", type="numpy", label="File", interactive=True, elem_id="melody-input")
 
 
165
  with gr.Row():
166
  submit = gr.Button("Submit")
167
  # Adapted from https://github.com/rkfg/audiocraft/blob/long/app.py, MIT license.
168
  _ = gr.Button("Interrupt").click(fn=interrupt, queue=False)
169
  with gr.Row():
170
+ model = gr.Radio(["melody", "medium", "small", "large"], label="Model", value="melody", interactive=True)
 
171
  with gr.Row():
172
+ duration = gr.Slider(minimum=1, maximum=120, value=10, label="Duration", interactive=True)
173
  with gr.Row():
174
+ topk = gr.Number(label="Top-k", value=250, interactive=True)
175
+ topp = gr.Number(label="Top-p", value=0, interactive=True)
176
+ temperature = gr.Number(label="Temperature", value=1.0, interactive=True)
177
+ cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True)
178
  with gr.Column():
179
+ output = gr.Video(label="Generated Music")
180
+ submit.click(predict_full, inputs=[model, text, melody, duration, topk, topp, temperature, cfg_coef], outputs=[output])
 
 
181
  radio.change(toggle_audio_src, radio, [melody], queue=False, show_progress=False)
182
  gr.Examples(
183
  fn=predict_full,
 
221
  This can take a long time, and the model might lose consistency. The model might also
222
  decide at arbitrary positions that the song ends.
223
 
224
+ **WARNING:** Choosing long durations will take a long time to generate (2min might take ~10min). An overlap of 12 seconds
225
+ is kept with the previously generated chunk, and 18 "new" seconds are generated each time.
 
226
 
227
  We present 4 model variations:
228
+ 1. Melody -- a music generation model capable of generating music condition on text and melody inputs. **Note**, you can also use text only.
 
229
  2. Small -- a 300M transformer decoder conditioned on text only.
230
  3. Medium -- a 1.5B transformer decoder conditioned on text only.
231
  4. Large -- a 3.3B transformer decoder conditioned on text only (might OOM for the longest sequences.)
232
 
233
  When using `melody`, ou can optionaly provide a reference audio from
234
+ which a broad melody will be extracted. The model will then try to follow both the description and melody provided.
 
235
 
236
  You can also use your own GPU or a Google Colab by following the instructions on our repo.
237
  See [github.com/facebookresearch/audiocraft](https://github.com/facebookresearch/audiocraft)
 
248
  """
249
  # MusicGen
250
 
251
+ This is the demo for [MusicGen](https://github.com/facebookresearch/audiocraft), a simple and controllable model for music generation
 
252
  presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284).
253
  <br/>
254
+ <a href="https://huggingface.co/spaces/facebook/MusicGen?duplicate=true" style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank">
255
+ <img style="margin-bottom: 0em;display: inline;margin-top: -.25em;" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
 
 
256
  for longer sequences, more control and no queue.</p>
257
  """
258
  )
 
261
  with gr.Row():
262
  text = gr.Text(label="Describe your music", lines=2, interactive=True)
263
  with gr.Column():
264
+ radio = gr.Radio(["file", "mic"], value="file", label="Condition on a melody (optional) File or Mic")
265
+ melody = gr.Audio(source="upload", type="numpy", label="File", interactive=True, elem_id="melody-input")
 
 
266
  with gr.Row():
267
  submit = gr.Button("Generate")
268
  with gr.Column():
269
  output = gr.Video(label="Generated Music")
270
+ submit.click(predict_batched, inputs=[text, melody], outputs=[output], batch=True, max_batch_size=MAX_BATCH_SIZE)
 
271
  radio.change(toggle_audio_src, radio, [melody], queue=False, show_progress=False)
272
  gr.Examples(
273
  fn=predict_batched,