Surn commited on
Commit
e62d648
1 Parent(s): e83dc6d

Add Dimension option

Browse files

0 stacks tracks
1 lengthen music

Files changed (1) hide show
  1. app.py +4 -3
app.py CHANGED
@@ -25,7 +25,7 @@ def load_model(version):
25
  return MusicGen.get_pretrained(version)
26
 
27
 
28
- def predict(model, text, melody, duration, topk, topp, temperature, cfg_coef):
29
  global MODEL
30
  topk = int(topk)
31
  if MODEL is None or MODEL.name != model:
@@ -67,7 +67,7 @@ def predict(model, text, melody, duration, topk, topp, temperature, cfg_coef):
67
  try:
68
  # Combine the output segments into one long audio file
69
  output_segments = [segment.detach().cpu().float()[0] for segment in output_segments]
70
- output = torch.cat(output_segments, dim=1)
71
  except Exception as e:
72
  print(f"Error combining segments: {e}. Using the first segment only.")
73
  output = output_segments[0].detach().cpu().float()[0]
@@ -109,6 +109,7 @@ def ui(**kwargs):
109
  model = gr.Radio(["melody", "medium", "small", "large"], label="Model", value="melody", interactive=True)
110
  with gr.Row():
111
  duration = gr.Slider(minimum=1, maximum=1000, value=10, label="Duration", interactive=True)
 
112
  with gr.Row():
113
  topk = gr.Number(label="Top-k", value=250, interactive=True)
114
  topp = gr.Number(label="Top-p", value=0, interactive=True)
@@ -116,7 +117,7 @@ def ui(**kwargs):
116
  cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True)
117
  with gr.Column():
118
  output = gr.Video(label="Generated Music")
119
- submit.click(predict, inputs=[model, text, melody, duration, topk, topp, temperature, cfg_coef], outputs=[output])
120
  gr.Examples(
121
  fn=predict,
122
  examples=[
 
25
  return MusicGen.get_pretrained(version)
26
 
27
 
28
+ def predict(model, text, melody, duration, dimension, topk, topp, temperature, cfg_coef):
29
  global MODEL
30
  topk = int(topk)
31
  if MODEL is None or MODEL.name != model:
 
67
  try:
68
  # Combine the output segments into one long audio file
69
  output_segments = [segment.detach().cpu().float()[0] for segment in output_segments]
70
+ output = torch.cat(output_segments, dim=dimension)
71
  except Exception as e:
72
  print(f"Error combining segments: {e}. Using the first segment only.")
73
  output = output_segments[0].detach().cpu().float()[0]
 
109
  model = gr.Radio(["melody", "medium", "small", "large"], label="Model", value="melody", interactive=True)
110
  with gr.Row():
111
  duration = gr.Slider(minimum=1, maximum=1000, value=10, label="Duration", interactive=True)
112
+ dimension = gr.Slider(minimum=-2, maximum=1, value=1, step=1, label="Dimension", info="determines which direction to add new segements of audio. (0 = stack tracks, 1 = lengthen, -1 = ?)", interactive=True)
113
  with gr.Row():
114
  topk = gr.Number(label="Top-k", value=250, interactive=True)
115
  topp = gr.Number(label="Top-p", value=0, interactive=True)
 
117
  cfg_coef = gr.Number(label="Classifier Free Guidance", value=3.0, interactive=True)
118
  with gr.Column():
119
  output = gr.Video(label="Generated Music")
120
+ submit.click(predict, inputs=[model, text, melody, duration, dimension, topk, topp, temperature, cfg_coef], outputs=[output])
121
  gr.Examples(
122
  fn=predict,
123
  examples=[