Spaces:

MasalaDosa1337
/

RGMC

Sleeping

NikitaSrivatsan commited on May 18, 2024

Commit

7b39cbc

1 Parent(s): 8971856

Removed pickling of input files

Files changed (1) hide show

data_module.py CHANGED Viewed

@@ -223,38 +223,31 @@ class AudiostockDataset(Dataset):
         return tokens, mask, tweet_text_len
     def read_wav(self, filename):
-        stem = PurePosixPath(filename).stem
-        picklefile = f'wt-{self.whole_track}-t-{self.train}-{stem}.pt'
-        picklepath = f'/trunk/datasets/nsrivats/audiostock_proc/{picklefile}'
-        if os.path.exists(picklepath):
-            y = torch.load(picklepath)
-        else:
-            # chunk
-            try:
-                num_frames = torchaudio.info(filename).num_frames
-            except:
-                return None
-            # make sure it wasn't empty, if so die
-            if num_frames == 0:
-                return None
-            sta = 0
-            if not self.whole_track:
-                if self.train:
-                    sta = random.randint(0, num_frames - 441001)
-                else:
-                    sta = (num_frames - 441001) // 2
-                num_frames = 441000
-            y, sr = torchaudio.load(filename, frame_offset=sta, num_frames=num_frames)
-            # resample
-            y = torchaudio.functional.resample(y, sr, 48000)
-            y = y[:, :441000]
-            # mono
-            y = y.mean(dim=0)
-            # normalize
-            y = int16_to_float32(float32_to_int16(y))
-            # save
-            torch.save(y, picklepath)
         return y
     def __getitem__(self, index):

         return tokens, mask, tweet_text_len
     def read_wav(self, filename):
+        # pickling functionality removed since it shouldn't be necessary
+        # chunk
+        try:
+            num_frames = torchaudio.info(filename).num_frames
+        except:
+            return None
+        # make sure it wasn't empty, if so die
+        if num_frames == 0:
+            return None
+        sta = 0
+        if not self.whole_track:
+            if self.train:
+                sta = random.randint(0, num_frames - 441001)
+            else:
+                sta = (num_frames - 441001) // 2
+            num_frames = 441000
+        y, sr = torchaudio.load(filename, frame_offset=sta, num_frames=num_frames)
+        # resample
+        y = torchaudio.functional.resample(y, sr, 48000)
+        y = y[:, :441000]
+        # mono
+        y = y.mean(dim=0)
+        # normalize
+        y = int16_to_float32(float32_to_int16(y))
         return y
     def __getitem__(self, index):