mrneuralnet commited on
Commit
833847d
1 Parent(s): da86ada

Initial commit

Browse files
app.py CHANGED
@@ -97,10 +97,11 @@ if __name__ == "__main__":
97
  # model = download_whisper()
98
  # extract_and_save_encoder(model)\
99
 
100
- if torch.cuda.is_available():
101
- device = "cuda"
102
- else:
103
- device = "cpu"
 
104
 
105
  with open('config.yaml', "r") as f:
106
  config = yaml.safe_load(f)
 
97
  # model = download_whisper()
98
  # extract_and_save_encoder(model)\
99
 
100
+ # if torch.cuda.is_available():
101
+ # device = "cuda"
102
+ # else:
103
+ # device = "cpu"
104
+ device = 'cpu'
105
 
106
  with open('config.yaml', "r") as f:
107
  config = yaml.safe_load(f)
evaluate_models.py CHANGED
@@ -189,9 +189,9 @@ def inference(
189
  y_pred_label = torch.Tensor([]).to(device)
190
 
191
  preds = []
192
-
193
  for i, (batch_x, _, batch_y, metadata) in enumerate(test_loader):
194
- model.eval()
195
  _, path, _, _ = metadata
196
  if i % 10 == 0:
197
  print(f"Batch [{i}/{batches_number}]")
@@ -201,6 +201,9 @@ def inference(
201
  batch_y = batch_y.to(device)
202
  num_total += batch_x.size(0)
203
 
 
 
 
204
  batch_pred = model(batch_x).squeeze(1)
205
  batch_pred = torch.sigmoid(batch_pred)
206
  batch_pred_label = (batch_pred + 0.5).int()
 
189
  y_pred_label = torch.Tensor([]).to(device)
190
 
191
  preds = []
192
+ model = model.to(device)
193
  for i, (batch_x, _, batch_y, metadata) in enumerate(test_loader):
194
+ model = model.eval()
195
  _, path, _, _ = metadata
196
  if i % 10 == 0:
197
  print(f"Batch [{i}/{batches_number}]")
 
201
  batch_y = batch_y.to(device)
202
  num_total += batch_x.size(0)
203
 
204
+ print('batch device', batch_x)
205
+ print('model device', model)
206
+
207
  batch_pred = model(batch_x).squeeze(1)
208
  batch_pred = torch.sigmoid(batch_pred)
209
  batch_pred_label = (batch_pred + 0.5).int()
src/datasets/base_dataset.py CHANGED
@@ -84,9 +84,6 @@ class SimpleAudioFakeDataset(Dataset):
84
  path, label, attack_type = self.samples[index]
85
 
86
  waveform, sample_rate = torchaudio.load(path, normalize=APPLY_NORMALIZATION)
87
- import librosa
88
- # waveform, sample_rate = librosa.load(path, sr=SAMPLING_RATE)
89
- # waveform = torch.tensor(waveform)
90
  print('waveform', waveform)
91
  real_sec_length = len(waveform[0]) / sample_rate
92
 
 
84
  path, label, attack_type = self.samples[index]
85
 
86
  waveform, sample_rate = torchaudio.load(path, normalize=APPLY_NORMALIZATION)
 
 
 
87
  print('waveform', waveform)
88
  real_sec_length = len(waveform[0]) / sample_rate
89
 
src/frontends.py CHANGED
@@ -7,7 +7,8 @@ SAMPLING_RATE = 16_000
7
  win_length = 400 # int((25 / 1_000) * SAMPLING_RATE)
8
  hop_length = 160 # int((10 / 1_000) * SAMPLING_RATE)
9
 
10
- device = "cuda" if torch.cuda.is_available() else "cpu"
 
11
 
12
  MFCC_FN = torchaudio.transforms.MFCC(
13
  sample_rate=SAMPLING_RATE,
@@ -39,7 +40,7 @@ MEL_SCALE_FN = torchaudio.transforms.MelScale(
39
  delta_fn = torchaudio.transforms.ComputeDeltas(
40
  win_length=400,
41
  mode="replicate",
42
- )
43
 
44
 
45
  def get_frontend(
@@ -65,6 +66,7 @@ def prepare_lfcc_double_delta(input):
65
  def prepare_mfcc_double_delta(input):
66
  if input.ndim < 4:
67
  input = input.unsqueeze(1) # (bs, 1, n_lfcc, frames)
 
68
  x = MFCC_FN(input)
69
  delta = delta_fn(x)
70
  double_delta = delta_fn(delta)
 
7
  win_length = 400 # int((25 / 1_000) * SAMPLING_RATE)
8
  hop_length = 160 # int((10 / 1_000) * SAMPLING_RATE)
9
 
10
+ # device = "cuda" if torch.cuda.is_available() else "cpu"
11
+ device = 'cpu'
12
 
13
  MFCC_FN = torchaudio.transforms.MFCC(
14
  sample_rate=SAMPLING_RATE,
 
40
  delta_fn = torchaudio.transforms.ComputeDeltas(
41
  win_length=400,
42
  mode="replicate",
43
+ ).to(device)
44
 
45
 
46
  def get_frontend(
 
66
  def prepare_mfcc_double_delta(input):
67
  if input.ndim < 4:
68
  input = input.unsqueeze(1) # (bs, 1, n_lfcc, frames)
69
+ input.to(device)
70
  x = MFCC_FN(input)
71
  delta = delta_fn(x)
72
  double_delta = delta_fn(delta)