Surn commited on
Commit
595ae94
·
1 Parent(s): c542417

Add Docker File

Browse files
Files changed (4) hide show
  1. .dockerignore +1 -0
  2. Dockerfile +26 -0
  3. app.py +96 -165
  4. requirements.txt +1 -2
.dockerignore ADDED
@@ -0,0 +1 @@
 
 
1
+ cache/
Dockerfile ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM nvidia/cuda:11.8.0-base-ubuntu22.04
2
+
3
+ ENV DEBIAN_FRONTEND=noninteractive \
4
+ PYTHONUNBUFFERED=1 \
5
+ PYTHONIOENCODING=UTF-8
6
+ RUN --mount=type=cache,target=/var/cache/apt --mount=type=cache,target=/var/lib/apt apt update &&\
7
+ apt install -y \
8
+ wget \
9
+ git \
10
+ pkg-config \
11
+ python3 \
12
+ python3-pip \
13
+ python-is-python3 \
14
+ ffmpeg \
15
+ libnvrtc11.2 \
16
+ libtcmalloc-minimal4
17
+
18
+ RUN useradd -m -u 1000 ac
19
+ RUN --mount=type=cache,target=/root/.cache python -m pip install --upgrade pip wheel
20
+ ENV TORCH_COMMAND="pip install torch==2.0.1+cu118 torchaudio --extra-index-url https://download.pytorch.org/whl/cu118"
21
+ RUN --mount=type=cache,target=/root/.cache python -m $TORCH_COMMAND
22
+ RUN ln -s /usr/lib/x86_64-linux-gnu/libnvrtc.so.11.2 /usr/lib/x86_64-linux-gnu/libnvrtc.so
23
+ USER 1000
24
+ RUN mkdir ~/.cache
25
+ RUN --mount=type=cache,target=/home/ac/.cache --mount=source=.,target=/home/ac/audiocraft python -m pip install -r /home/ac/audiocraft/requirements.txt
26
+ WORKDIR /home/ac/audiocraft
app.py CHANGED
@@ -20,7 +20,14 @@ import numpy as np
20
  import random
21
 
22
  MODEL = None
 
23
  IS_SHARED_SPACE = "musicgen/MusicGen" in os.environ.get('SPACE_ID', '')
 
 
 
 
 
 
24
 
25
  def interrupt():
26
  global INTERRUPTING
@@ -135,171 +142,95 @@ def predict(model, text, melody, duration, dimension, topk, topp, temperature, c
135
  return waveform_video, seed
136
 
137
 
138
- def ui(**kwargs):
139
- with gr.Blocks() as interface:
140
- gr.Markdown(
141
- """
142
- # MusicGen
143
- This is your private demo for [MusicGen](https://github.com/facebookresearch/audiocraft), a simple and controllable model for music generation
144
- presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284)
145
- """
146
- )
147
- if IS_SHARED_SPACE:
148
- gr.Markdown("""
149
- ⚠ This Space doesn't work in this shared UI ⚠
150
-
151
- <a href="https://huggingface.co/spaces/musicgen/MusicGen?duplicate=true" style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank">
152
- <img style="margin-bottom: 0em;display: inline;margin-top: -.25em;" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
153
- to use it privately, or use the <a href="https://huggingface.co/spaces/facebook/MusicGen">public demo</a>
154
- """)
155
- with gr.Row():
156
- with gr.Column():
157
- with gr.Row():
158
- text = gr.Text(label="Input Text", interactive=True, value="4/4 100bpm 320kbps 48khz, Industrial/Electronic Soundtrack, Dark, Intense, Sci-Fi")
159
- melody = gr.Audio(source="upload", type="numpy", label="Melody Condition (optional)", interactive=True)
160
- with gr.Row():
161
- submit = gr.Button("Submit")
162
- # Adapted from https://github.com/rkfg/audiocraft/blob/long/app.py, MIT license.
163
- _ = gr.Button("Interrupt").click(fn=interrupt, queue=False)
164
- with gr.Row():
165
- background= gr.Image(value="./assets/background.png", source="upload", label="Background", shape=(768,512), type="filepath", interactive=True)
166
- include_settings = gr.Checkbox(label="Add Settings to background", value=True, interactive=True)
167
- with gr.Row():
168
- title = gr.Textbox(label="Title", value="MusicGen", interactive=True)
169
- settings_font = gr.Text(label="Settings Font", value="arial.ttf", interactive=True)
170
- settings_font_color = gr.ColorPicker(label="Settings Font Color", value="#ffffff", interactive=True)
171
- with gr.Row():
172
- model = gr.Radio(["melody", "medium", "small", "large"], label="Model", value="melody", interactive=True)
173
- with gr.Row():
174
- duration = gr.Slider(minimum=1, maximum=1000, value=10, label="Duration", interactive=True)
175
- overlap = gr.Slider(minimum=1, maximum=29, value=5, step=1, label="Overlap", interactive=True)
176
- dimension = gr.Slider(minimum=-2, maximum=2, value=2, step=1, label="Dimension", info="determines which direction to add new segements of audio. (1 = stack tracks, 2 = lengthen, -2..0 = ?)", interactive=True)
177
- with gr.Row():
178
- topk = gr.Number(label="Top-k", value=250, interactive=True)
179
- topp = gr.Number(label="Top-p", value=0, interactive=True)
180
- temperature = gr.Number(label="Randomness Temperature", value=1.0, precision=2, interactive=True)
181
- cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, precision=2, interactive=True)
182
- with gr.Row():
183
- seed = gr.Number(label="Seed", value=-1, precision=0, interactive=True)
184
- gr.Button('\U0001f3b2\ufe0f').style(full_width=False).click(fn=lambda: -1, outputs=[seed], queue=False)
185
- reuse_seed = gr.Button('\u267b\ufe0f').style(full_width=False)
186
- with gr.Column() as c:
187
- output = gr.Video(label="Generated Music")
188
- seed_used = gr.Number(label='Seed used', value=-1, interactive=False)
189
-
190
- reuse_seed.click(fn=lambda x: x, inputs=[seed_used], outputs=[seed], queue=False)
191
- submit.click(predict, inputs=[model, text, melody, duration, dimension, topk, topp, temperature, cfg_coef, background, title, include_settings, settings_font, settings_font_color, seed, overlap], outputs=[output, seed_used])
192
- gr.Examples(
193
- fn=predict,
194
- examples=[
195
- [
196
- "An 80s driving pop song with heavy drums and synth pads in the background",
197
- "./assets/bach.mp3",
198
- "melody"
199
- ],
200
- [
201
- "A cheerful country song with acoustic guitars",
202
- "./assets/bolero_ravel.mp3",
203
- "melody"
204
- ],
205
- [
206
- "90s rock song with electric guitar and heavy drums",
207
- None,
208
- "medium"
209
- ],
210
- [
211
- "a light and cheerly EDM track, with syncopated drums, aery pads, and strong emotions",
212
- "./assets/bach.mp3",
213
- "melody"
214
- ],
215
- [
216
- "lofi slow bpm electro chill with organic samples",
217
- None,
218
- "medium",
219
- ],
220
- ],
221
- inputs=[text, melody, model],
222
- outputs=[output]
223
- )
224
- gr.Markdown(
225
- """
226
- ### More details
227
-
228
- The model will generate a short music extract based on the description you provided.
229
- You can generate up to 30 seconds of audio.
230
-
231
- We present 4 model variations:
232
- 1. Melody -- a music generation model capable of generating music condition on text and melody inputs. **Note**, you can also use text only.
233
- 2. Small -- a 300M transformer decoder conditioned on text only.
234
- 3. Medium -- a 1.5B transformer decoder conditioned on text only.
235
- 4. Large -- a 3.3B transformer decoder conditioned on text only (might OOM for the longest sequences.)
236
-
237
- When using `melody`, ou can optionaly provide a reference audio from
238
- which a broad melody will be extracted. The model will then try to follow both the description and melody provided.
239
-
240
- You can also use your own GPU or a Google Colab by following the instructions on our repo.
241
- See [github.com/facebookresearch/audiocraft](https://github.com/facebookresearch/audiocraft)
242
- for more details.
243
- """
244
- )
245
-
246
- # Show the interface
247
- launch_kwargs = {}
248
- username = kwargs.get('username')
249
- password = kwargs.get('password')
250
- server_port = kwargs.get('server_port', 0)
251
- inbrowser = kwargs.get('inbrowser', False)
252
- share = kwargs.get('share', False)
253
- server_name = kwargs.get('listen')
254
-
255
- launch_kwargs['server_name'] = server_name
256
-
257
- if username and password:
258
- launch_kwargs['auth'] = (username, password)
259
- if server_port > 0:
260
- launch_kwargs['server_port'] = server_port
261
- if inbrowser:
262
- launch_kwargs['inbrowser'] = inbrowser
263
- if share:
264
- launch_kwargs['share'] = share
265
-
266
- interface.queue().launch(**launch_kwargs, max_threads=1)
267
-
268
-
269
- if __name__ == "__main__":
270
- parser = argparse.ArgumentParser()
271
- parser.add_argument(
272
- '--listen',
273
- type=str,
274
- default='127.0.0.1',
275
- help='IP to listen on for connections to Gradio',
276
- )
277
- parser.add_argument(
278
- '--username', type=str, default='', help='Username for authentication'
279
- )
280
- parser.add_argument(
281
- '--password', type=str, default='', help='Password for authentication'
282
- )
283
- parser.add_argument(
284
- '--server_port',
285
- type=int,
286
- default=7859,
287
- help='Port to run the server listener on',
288
- )
289
- parser.add_argument(
290
- '--inbrowser', action='store_true', help='Open in browser'
291
  )
292
- parser.add_argument(
293
- '--share', action='store_true', help='Share the gradio UI'
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
294
  )
 
295
 
296
- args = parser.parse_args()
297
-
298
- ui(
299
- username=args.username,
300
- password=args.password,
301
- inbrowser=args.inbrowser,
302
- server_port=args.server_port,
303
- share=args.share,
304
- listen=args.listen
305
- )
 
20
  import random
21
 
22
  MODEL = None
23
+ MODELS = None
24
  IS_SHARED_SPACE = "musicgen/MusicGen" in os.environ.get('SPACE_ID', '')
25
+ IS_SHARED_SPACE = "musicgen/MusicGen" in os.environ.get('SPACE_ID', '')
26
+ INTERRUPTED = False
27
+ INTERRUPTED = False
28
+ UNLOAD_MODEL = False
29
+ UNLOAD_MODEL = False
30
+ MOVE_TO_CPU = False
31
 
32
  def interrupt():
33
  global INTERRUPTING
 
142
  return waveform_video, seed
143
 
144
 
145
+ css="""
146
+ #col-container {max-width: 910px; margin-left: auto; margin-right: auto;}
147
+ a {text-decoration-line: underline; font-weight: 600;}
148
+ """
149
+ with gr.Blocks(title="UnlimitedMusicGen", css=css) as demo:
150
+ gr.Markdown(
151
+ """
152
+ # UnlimitedMusicGen
153
+ This is your private demo for [UnlimitedMusicGen](https://github.com/Oncorporation/audiocraft), a simple and controllable model for music generation
154
+ presented at: ["Simple and Controllable Music Generation"](https://huggingface.co/papers/2306.05284)
155
+ """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
156
  )
157
+ if IS_SHARED_SPACE:
158
+ gr.Markdown("""
159
+ ⚠ This Space doesn't work in this shared UI ⚠
160
+
161
+ <a href="https://huggingface.co/spaces/musicgen/MusicGen?duplicate=true" style="display: inline-block;margin-top: .5em;margin-right: .25em;" target="_blank">
162
+ <img style="margin-bottom: 0em;display: inline;margin-top: -.25em;" src="https://bit.ly/3gLdBN6" alt="Duplicate Space"></a>
163
+ to use it privately, or use the <a href="https://huggingface.co/spaces/facebook/MusicGen">public demo</a>
164
+ """)
165
+ with gr.Row():
166
+ with gr.Column():
167
+ with gr.Row():
168
+ text = gr.Text(label="Input Text", interactive=True, value="4/4 100bpm 320kbps 48khz, Industrial/Electronic Soundtrack, Dark, Intense, Sci-Fi")
169
+ melody = gr.Audio(source="upload", type="numpy", label="Melody Condition (optional)", interactive=True)
170
+ with gr.Row():
171
+ submit = gr.Button("Submit")
172
+ # Adapted from https://github.com/rkfg/audiocraft/blob/long/app.py, MIT license.
173
+ _ = gr.Button("Interrupt").click(fn=interrupt, queue=False)
174
+ with gr.Row():
175
+ background= gr.Image(value="./assets/background.png", source="upload", label="Background", shape=(768,512), type="filepath", interactive=True)
176
+ include_settings = gr.Checkbox(label="Add Settings to background", value=True, interactive=True)
177
+ with gr.Row():
178
+ title = gr.Textbox(label="Title", value="UnlimitedMusicGen", interactive=True)
179
+ settings_font = gr.Text(label="Settings Font", value="arial.ttf", interactive=True)
180
+ settings_font_color = gr.ColorPicker(label="Settings Font Color", value="#ffffff", interactive=True)
181
+ with gr.Row():
182
+ model = gr.Radio(["melody", "medium", "small", "large"], label="Model", value="melody", interactive=True)
183
+ with gr.Row():
184
+ duration = gr.Slider(minimum=1, maximum=1000, value=10, label="Duration", interactive=True)
185
+ overlap = gr.Slider(minimum=1, maximum=29, value=5, step=1, label="Overlap", interactive=True)
186
+ dimension = gr.Slider(minimum=-2, maximum=2, value=2, step=1, label="Dimension", info="determines which direction to add new segements of audio. (1 = stack tracks, 2 = lengthen, -2..0 = ?)", interactive=True)
187
+ with gr.Row():
188
+ topk = gr.Number(label="Top-k", value=250, interactive=True)
189
+ topp = gr.Number(label="Top-p", value=0, interactive=True)
190
+ temperature = gr.Number(label="Randomness Temperature", value=1.0, precision=2, interactive=True)
191
+ cfg_coef = gr.Number(label="Classifier Free Guidance", value=5.0, precision=2, interactive=True)
192
+ with gr.Row():
193
+ seed = gr.Number(label="Seed", value=-1, precision=0, interactive=True)
194
+ gr.Button('\U0001f3b2\ufe0f').style(full_width=False).click(fn=lambda: -1, outputs=[seed], queue=False)
195
+ reuse_seed = gr.Button('\u267b\ufe0f').style(full_width=False)
196
+ with gr.Column() as c:
197
+ output = gr.Video(label="Generated Music")
198
+ seed_used = gr.Number(label='Seed used', value=-1, interactive=False)
199
+
200
+ reuse_seed.click(fn=lambda x: x, inputs=[seed_used], outputs=[seed], queue=False)
201
+ submit.click(predict, inputs=[model, text, melody, duration, dimension, topk, topp, temperature, cfg_coef, background, title, include_settings, settings_font, settings_font_color, seed, overlap], outputs=[output, seed_used])
202
+ gr.Examples(
203
+ fn=predict,
204
+ examples=[
205
+ [
206
+ "An 80s driving pop song with heavy drums and synth pads in the background",
207
+ "./assets/bach.mp3",
208
+ "melody"
209
+ ],
210
+ [
211
+ "A cheerful country song with acoustic guitars",
212
+ "./assets/bolero_ravel.mp3",
213
+ "melody"
214
+ ],
215
+ [
216
+ "90s rock song with electric guitar and heavy drums",
217
+ None,
218
+ "medium"
219
+ ],
220
+ [
221
+ "a light and cheerly EDM track, with syncopated drums, aery pads, and strong emotions",
222
+ "./assets/bach.mp3",
223
+ "melody"
224
+ ],
225
+ [
226
+ "lofi slow bpm electro chill with organic samples",
227
+ None,
228
+ "medium",
229
+ ],
230
+ ],
231
+ inputs=[text, melody, model],
232
+ outputs=[output]
233
  )
234
+
235
 
236
+ demo.queue(max_size=32).launch()
 
 
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -17,5 +17,4 @@ transformers
17
  xformers
18
  demucs
19
  librosa
20
- gradio
21
- textwrap
 
17
  xformers
18
  demucs
19
  librosa
20
+ gradio