Surn commited on
Commit
efabdc6
1 Parent(s): e3f64dd

Add Interrupt Button

Browse files
Files changed (2) hide show
  1. app.py +20 -3
  2. audiocraft/utils/extend.py +5 -3
app.py CHANGED
@@ -11,6 +11,8 @@ import argparse
11
  import torch
12
  import gradio as gr
13
  import os
 
 
14
  from audiocraft.models import MusicGen
15
  from audiocraft.data.audio import audio_write
16
  from audiocraft.utils.extend import generate_music_segments, add_settings_to_image
@@ -20,6 +22,19 @@ import random
20
  MODEL = None
21
  IS_SHARED_SPACE = "musicgen/MusicGen" in os.environ.get('SPACE_ID', '')
22
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  def load_model(version):
25
  print("Loading model", version)
@@ -102,7 +117,7 @@ def predict(model, text, melody, duration, dimension, topk, topp, temperature, c
102
  output = output_segments[0]
103
  for i in range(1, len(output_segments)):
104
  overlap_samples = overlap * MODEL.sample_rate
105
- output = torch.cat([output[:, :, :-overlap_samples], output_segments[i][:, :, overlap_samples:]], dim=2)
106
  output = output.detach().cpu().float()[0]
107
  except Exception as e:
108
  print(f"Error combining segments: {e}. Using the first segment only.")
@@ -116,7 +131,7 @@ def predict(model, text, melody, duration, dimension, topk, topp, temperature, c
116
  audio_write(
117
  file.name, output, MODEL.sample_rate, strategy="loudness",
118
  loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
119
- waveform_video = gr.make_waveform(file.name,bg_image=background, bar_count=40)
120
  return waveform_video, seed
121
 
122
 
@@ -144,6 +159,8 @@ def ui(**kwargs):
144
  melody = gr.Audio(source="upload", type="numpy", label="Melody Condition (optional)", interactive=True)
145
  with gr.Row():
146
  submit = gr.Button("Submit")
 
 
147
  with gr.Row():
148
  background= gr.Image(value="./assets/background.png", source="upload", label="Background", shape=(768,512), type="filepath", interactive=True)
149
  include_settings = gr.Checkbox(label="Add Settings to background", value=True, interactive=True)
@@ -156,7 +173,7 @@ def ui(**kwargs):
156
  with gr.Row():
157
  duration = gr.Slider(minimum=1, maximum=1000, value=10, label="Duration", interactive=True)
158
  overlap = gr.Slider(minimum=1, maximum=29, value=5, step=1, label="Overlap", interactive=True)
159
- dimension = gr.Slider(minimum=-2, maximum=1, value=1, step=1, label="Dimension", info="determines which direction to add new segements of audio. (0 = stack tracks, 1 = lengthen, -1 = ?)", interactive=True)
160
  with gr.Row():
161
  topk = gr.Number(label="Top-k", value=250, interactive=True)
162
  topp = gr.Number(label="Top-p", value=0, interactive=True)
 
11
  import torch
12
  import gradio as gr
13
  import os
14
+ import time
15
+ import warnings
16
  from audiocraft.models import MusicGen
17
  from audiocraft.data.audio import audio_write
18
  from audiocraft.utils.extend import generate_music_segments, add_settings_to_image
 
22
  MODEL = None
23
  IS_SHARED_SPACE = "musicgen/MusicGen" in os.environ.get('SPACE_ID', '')
24
 
25
+ def interrupt():
26
+ global INTERRUPTING
27
+ INTERRUPTING = True
28
+
29
+
30
+ def make_waveform(*args, **kwargs):
31
+ # Further remove some warnings.
32
+ be = time.time()
33
+ with warnings.catch_warnings():
34
+ warnings.simplefilter('ignore')
35
+ out = gr.make_waveform(*args, **kwargs)
36
+ print("Make a video took", time.time() - be)
37
+ return out
38
 
39
  def load_model(version):
40
  print("Loading model", version)
 
117
  output = output_segments[0]
118
  for i in range(1, len(output_segments)):
119
  overlap_samples = overlap * MODEL.sample_rate
120
+ output = torch.cat([output[:, :, :-overlap_samples], output_segments[i][:, :, overlap_samples:]], dim=dimension)
121
  output = output.detach().cpu().float()[0]
122
  except Exception as e:
123
  print(f"Error combining segments: {e}. Using the first segment only.")
 
131
  audio_write(
132
  file.name, output, MODEL.sample_rate, strategy="loudness",
133
  loudness_headroom_db=16, loudness_compressor=True, add_suffix=False)
134
+ waveform_video = make_waveform(file.name,bg_image=background, bar_count=40)
135
  return waveform_video, seed
136
 
137
 
 
159
  melody = gr.Audio(source="upload", type="numpy", label="Melody Condition (optional)", interactive=True)
160
  with gr.Row():
161
  submit = gr.Button("Submit")
162
+ # Adapted from https://github.com/rkfg/audiocraft/blob/long/app.py, MIT license.
163
+ _ = gr.Button("Interrupt").click(fn=interrupt, queue=False)
164
  with gr.Row():
165
  background= gr.Image(value="./assets/background.png", source="upload", label="Background", shape=(768,512), type="filepath", interactive=True)
166
  include_settings = gr.Checkbox(label="Add Settings to background", value=True, interactive=True)
 
173
  with gr.Row():
174
  duration = gr.Slider(minimum=1, maximum=1000, value=10, label="Duration", interactive=True)
175
  overlap = gr.Slider(minimum=1, maximum=29, value=5, step=1, label="Overlap", interactive=True)
176
+ dimension = gr.Slider(minimum=-2, maximum=2, value=2, step=1, label="Dimension", info="determines which direction to add new segements of audio. (1 = stack tracks, 2 = lengthen, -2..0 = ?)", interactive=True)
177
  with gr.Row():
178
  topk = gr.Number(label="Top-k", value=250, interactive=True)
179
  topp = gr.Number(label="Top-p", value=0, interactive=True)
audiocraft/utils/extend.py CHANGED
@@ -30,7 +30,7 @@ def separate_audio_segments(audio, segment_duration=30, overlap=1):
30
  if total_samples > 0:
31
  segment = audio_data[-segment_samples:]
32
  segments.append((sr, segment))
33
-
34
  return segments
35
 
36
  def generate_music_segments(text, melody, MODEL, seed, duration:int=10, overlap:int=1, segment_duration:int=30):
@@ -43,9 +43,11 @@ def generate_music_segments(text, melody, MODEL, seed, duration:int=10, overlap:
43
 
44
  # Calculate the total number of segments
45
  total_segments = max(math.ceil(duration / segment_duration),1)
46
-
 
 
47
  #calc excess duration
48
- excess_duration = total_segments * segment_duration - duration
49
  print(f"total Segments to Generate: {total_segments} for {duration} seconds. Each segment is {segment_duration} seconds. Excess {excess_duration}")
50
 
51
  # If melody_segments is shorter than total_segments, repeat the segments until the total_segments is reached
 
30
  if total_samples > 0:
31
  segment = audio_data[-segment_samples:]
32
  segments.append((sr, segment))
33
+ print(f"separate_audio_segments: {len(segments)} segments")
34
  return segments
35
 
36
  def generate_music_segments(text, melody, MODEL, seed, duration:int=10, overlap:int=1, segment_duration:int=30):
 
43
 
44
  # Calculate the total number of segments
45
  total_segments = max(math.ceil(duration / segment_duration),1)
46
+ # account for overlap
47
+ duration = duration + (max((total_segments - 1),0) * overlap)
48
+ total_segments = max(math.ceil(duration / segment_duration),1)
49
  #calc excess duration
50
+ excess_duration = segment_duration - (total_segments * segment_duration - duration)
51
  print(f"total Segments to Generate: {total_segments} for {duration} seconds. Each segment is {segment_duration} seconds. Excess {excess_duration}")
52
 
53
  # If melody_segments is shorter than total_segments, repeat the segments until the total_segments is reached