Hugo Flores Garcia commited on
Commit
4a2dc41
1 Parent(s): f1ccdc1

interface, cleanup imputation code

Browse files
Files changed (4) hide show
  1. env/data.sh +1 -1
  2. requirements.txt +1 -1
  3. scripts/exp/train.py +22 -14
  4. vampnet/interface.py +7 -1
env/data.sh CHANGED
@@ -1,7 +1,7 @@
1
  export PATH_TO_DATA=~/data
2
 
3
  if [[ $(hostname) == "oon17" ]]; then
4
- export PATH_TO_DATA=/home/prem/shared/data/
5
  fi
6
 
7
  if [[ $(hostname) == "oon19" ]]; then
 
1
  export PATH_TO_DATA=~/data
2
 
3
  if [[ $(hostname) == "oon17" ]]; then
4
+ export PATH_TO_DATA=/data/
5
  fi
6
 
7
  if [[ $(hostname) == "oon19" ]]; then
requirements.txt CHANGED
@@ -1,7 +1,7 @@
1
  argbind>=0.3.1
2
  pytorch-ignite
3
  rich
4
- audiotools @ git+https://github.com/descriptinc/lyrebird-audiotools.git@0.6.3
5
  lac @ git+https://github.com/descriptinc/lyrebird-audio-codec.git@main
6
  tqdm
7
  tensorboard
 
1
  argbind>=0.3.1
2
  pytorch-ignite
3
  rich
4
+ audiotools @ git+https://github.com/descriptinc/lyrebird-audiotools.git@hf/backup-info
5
  lac @ git+https://github.com/descriptinc/lyrebird-audio-codec.git@main
6
  tqdm
7
  tensorboard
scripts/exp/train.py CHANGED
@@ -547,30 +547,38 @@ def train(
547
 
548
  def save_imputation(self, z: torch.Tensor):
549
  # imputations
550
- mask_begin = z.shape[-1] // 4
551
- mask_end = (z.shape[-1] * 3) // 4
552
 
553
- imp_mask = torch.zeros(z.shape[0], z.shape[-1]).to(accel.device).int()
554
- imp_mask[:, mask_begin:mask_end] = 1
 
 
555
 
556
- imp_noisy = (
557
- z * (1 - imp_mask[:, None, :])
558
- + torch.randint_like(z, 0, accel.unwrap(model).vocab_size)
559
- * imp_mask[:, None, :]
 
 
 
 
 
560
  )
561
- imputed_noisy = accel.unwrap(model).to_signal(imp_noisy, codec)
562
- imputed_true = accel.unwrap(model).to_signal(z, codec)
 
563
 
564
  imputed = []
565
  for i in range(len(z)):
566
  imputed.append(
567
- accel.unwrap(model).sample(
568
  codec=codec,
569
  time_steps=z.shape[-1],
570
  start_tokens=z[i][None, ...],
571
- mask=imp_mask[i][None, ...],
572
- )
573
- )
574
  imputed = AudioSignal.batch(imputed)
575
 
576
  for i in range(len(val_idx)):
 
547
 
548
  def save_imputation(self, z: torch.Tensor):
549
  # imputations
550
+ _prefix_amt = prefix_amt
551
+ _suffix_amt = suffix_amt
552
 
553
+ if _prefix_amt == 0:
554
+ _prefix_amt = 0.25
555
+ if _suffix_amt == 0:
556
+ _suffix_amt = 0.25
557
 
558
+ n_prefix = int(z.shape[-1] * _prefix_amt)
559
+ n_suffix = int(z.shape[-1] * _suffix_amt)
560
+ downsample_factor = None
561
+
562
+ vn = accel.unwrap(model)
563
+
564
+ z_mask, mask = vn.add_noise(
565
+ z, r=0.0, n_prefix=n_prefix, n_suffix=n_suffix,
566
+ downsample_factor=downsample_factor
567
  )
568
+
569
+ imputed_noisy = vn.to_signal(z_mask, codec)
570
+ imputed_true = vn.to_signal(z, codec)
571
 
572
  imputed = []
573
  for i in range(len(z)):
574
  imputed.append(
575
+ vn.sample(
576
  codec=codec,
577
  time_steps=z.shape[-1],
578
  start_tokens=z[i][None, ...],
579
+ mask=mask[i][None, ...],
580
+ )
581
+ )
582
  imputed = AudioSignal.batch(imputed)
583
 
584
  for i in range(len(val_idx)):
vampnet/interface.py CHANGED
@@ -53,7 +53,13 @@ class Interface:
53
 
54
  @torch.inference_mode()
55
  def encode(self, signal: AudioSignal):
56
- signal = signal.clone().to(self.device).resample(self.codec.sample_rate).to_mono()
 
 
 
 
 
 
57
  z = self.codec.encode(signal.samples, signal.sample_rate)["codes"]
58
  return z
59
 
 
53
 
54
  @torch.inference_mode()
55
  def encode(self, signal: AudioSignal):
56
+ signal = (
57
+ signal.clone().to(self.device)
58
+ .resample(self.codec.sample_rate)
59
+ .to_mono()
60
+ .normalize(-24)
61
+ .ensure_max_of_audio(1.0)
62
+ )
63
  z = self.codec.encode(signal.samples, signal.sample_rate)["codes"]
64
  return z
65