Surn commited on
Commit
d7ef5a5
·
1 Parent(s): 1a6de5e

First Audio Fade version

Browse files

-fades audio segments together

Files changed (2) hide show
  1. app.py +15 -1
  2. audiocraft/data/audio_utils.py +26 -0
app.py CHANGED
@@ -15,6 +15,7 @@ import time
15
  import warnings
16
  from audiocraft.models import MusicGen
17
  from audiocraft.data.audio import audio_write
 
18
  from audiocraft.utils.extend import generate_music_segments, add_settings_to_image, INTERRUPTING
19
  import numpy as np
20
  import random
@@ -162,7 +163,20 @@ def predict(model, text, melody, duration, dimension, topk, topp, temperature, c
162
  output = output_segments[0]
163
  for i in range(1, len(output_segments)):
164
  overlap_samples = overlap * MODEL.sample_rate
165
- output = torch.cat([output[:, :, :-overlap_samples], output_segments[i]], dim=dimension)
 
 
 
 
 
 
 
 
 
 
 
 
 
166
  output = output.detach().cpu().float()[0]
167
  except Exception as e:
168
  print(f"Error combining segments: {e}. Using the first segment only.")
 
15
  import warnings
16
  from audiocraft.models import MusicGen
17
  from audiocraft.data.audio import audio_write
18
+ from audiocraft.data.audio_utils import apply_fade
19
  from audiocraft.utils.extend import generate_music_segments, add_settings_to_image, INTERRUPTING
20
  import numpy as np
21
  import random
 
163
  output = output_segments[0]
164
  for i in range(1, len(output_segments)):
165
  overlap_samples = overlap * MODEL.sample_rate
166
+ #stack tracks and fade out/in
167
+ overlapping_output_fadeout = output[:, :, -overlap_samples:]
168
+ overlapping_output_fadeout = apply_fade(overlapping_output_fadeout,sample_rate=MODEL.sample_rate,duration=overlap,out=True,start=True, curve_end=0.9, current_device=MODEL.device)
169
+
170
+ overlapping_output_fadein = output_segments[i][:, :, :overlap_samples]
171
+ overlapping_output_fadein = apply_fade(overlapping_output_fadein,sample_rate=MODEL.sample_rate,duration=overlap,out=False,start=False, curve_start=0.1, current_device=MODEL.device)
172
+
173
+ overlapping_output = (overlapping_output_fadeout + overlapping_output_fadein) / 2
174
+ print(f" overlap size Fade:{overlapping_output.size()}\n output: {output.size()}\n segment: {output_segments[i].size()}")
175
+ ##overlapping_output = torch.cat([output[:, :, -overlap_samples:], output_segments[i][:, :, :overlap_samples]], dim=1) #stack tracks
176
+ ##print(f" overlap size stack:{overlapping_output.size()}\n output: {output.size()}\n segment: {output_segments[i].size()}")
177
+ #overlapping_output = torch.cat([output[:, :, -overlap_samples:], output_segments[i][:, :, :overlap_samples]], dim=2) #stack tracks
178
+ #print(f" overlap size cat:{overlapping_output.size()}\n output: {output.size()}\n segment: {output_segments[i].size()}")
179
+ output = torch.cat([output[:, :, :-overlap_samples], overlapping_output, output_segments[i][:, :, overlap_samples:]], dim=dimension)
180
  output = output.detach().cpu().float()[0]
181
  except Exception as e:
182
  print(f"Error combining segments: {e}. Using the first segment only.")
audiocraft/data/audio_utils.py CHANGED
@@ -172,3 +172,29 @@ def i16_pcm(wav: torch.Tensor) -> torch.Tensor:
172
  else:
173
  assert wav.dtype == torch.int16
174
  return wav
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
  else:
173
  assert wav.dtype == torch.int16
174
  return wav
175
+
176
+ def apply_fade(audio: torch.Tensor, sample_rate, duration=3.0, out=True, start=True, curve_start:float=0.0, curve_end:float=1.0, current_device:str="cpu") -> torch.Tensor:
177
+ fade_samples = int(sample_rate * duration) # Number of samples for the fade duration
178
+ fade_curve = torch.linspace(curve_start, curve_end, fade_samples, device=current_device) # Generate linear fade curve
179
+
180
+ if out:
181
+ fade_curve = fade_curve.flip(0) # Reverse the fade curve for fade out
182
+
183
+ # Select the portion of the audio to apply the fade
184
+ if start:
185
+ audio_fade_section = audio[:, :fade_samples]
186
+ else:
187
+ audio_fade_section = audio[:, -fade_samples:]
188
+
189
+ # Apply the fade curve to the audio section
190
+ audio_faded = audio.clone()
191
+ audio_faded[:, :fade_samples] *= fade_curve.unsqueeze(0)
192
+ audio_faded[:, -fade_samples:] *= fade_curve.unsqueeze(0)
193
+
194
+ # Replace the selected portion of the audio with the faded section
195
+ if start:
196
+ audio_faded[:, :fade_samples] = audio_fade_section
197
+ else:
198
+ audio_faded[:, -fade_samples:] = audio_fade_section
199
+
200
+ return audio_faded