saeki commited on
Commit
d666f3e
·
1 Parent(s): 7b918f7
Files changed (1) hide show
  1. utils.py +0 -105
utils.py CHANGED
@@ -1,40 +1,9 @@
1
- import librosa.display
2
  import matplotlib.pyplot as plt
3
  import json
4
  import torch
5
  import torchaudio
6
  import hifigan
7
 
8
-
9
- def manual_logging(logger, item, idx, tag, global_step, data_type, config):
10
-
11
- if data_type == "audio":
12
- audio = item[idx, ...].detach().cpu().numpy()
13
- logger.add_audio(
14
- tag,
15
- audio,
16
- global_step,
17
- sample_rate=config["preprocess"]["sampling_rate"],
18
- )
19
- elif data_type == "image":
20
- image = item[idx, ...].detach().cpu().numpy()
21
- fig, ax = plt.subplots()
22
- _ = librosa.display.specshow(
23
- image,
24
- x_axis="time",
25
- y_axis="linear",
26
- sr=config["preprocess"]["sampling_rate"],
27
- hop_length=config["preprocess"]["frame_shift"],
28
- fmax=config["preprocess"]["sampling_rate"] // 2,
29
- ax=ax,
30
- )
31
- logger.add_figure(tag, fig, global_step)
32
- else:
33
- raise NotImplementedError(
34
- "Data type given to logger should be [audio] or [image]"
35
- )
36
-
37
-
38
  def load_vocoder(config):
39
  with open(
40
  "hifigan/config_{}.json".format(config["general"]["feature_type"]), "r"
@@ -47,80 +16,6 @@ def load_vocoder(config):
47
  param.requires_grad = False
48
  return vocoder
49
 
50
-
51
- def get_conv_padding(kernel_size, dilation=1):
52
- return int((kernel_size * dilation - dilation) / 2)
53
-
54
-
55
- def plot_and_save_mels(wav, save_path, config):
56
- spec_module = torchaudio.transforms.MelSpectrogram(
57
- sample_rate=config["preprocess"]["sampling_rate"],
58
- n_fft=config["preprocess"]["fft_length"],
59
- win_length=config["preprocess"]["frame_length"],
60
- hop_length=config["preprocess"]["frame_shift"],
61
- f_min=config["preprocess"]["fmin"],
62
- f_max=config["preprocess"]["fmax"],
63
- n_mels=config["preprocess"]["n_mels"],
64
- power=1,
65
- center=True,
66
- norm="slaney",
67
- mel_scale="slaney",
68
- )
69
- spec = spec_module(wav.unsqueeze(0))
70
- log_spec = torch.log(
71
- torch.clamp_min(spec, config["preprocess"]["min_magnitude"])
72
- * config["preprocess"]["comp_factor"]
73
- )
74
- fig, ax = plt.subplots()
75
- _ = librosa.display.specshow(
76
- log_spec.squeeze(0).numpy(),
77
- x_axis="time",
78
- y_axis="linear",
79
- sr=config["preprocess"]["sampling_rate"],
80
- hop_length=config["preprocess"]["frame_shift"],
81
- fmax=config["preprocess"]["sampling_rate"] // 2,
82
- ax=ax,
83
- cmap="viridis",
84
- )
85
- fig.savefig(save_path, bbox_inches="tight", pad_inches=0)
86
-
87
-
88
- def plot_and_save_mels_all(wavs, keys, save_path, config):
89
- spec_module = torchaudio.transforms.MelSpectrogram(
90
- sample_rate=config["preprocess"]["sampling_rate"],
91
- n_fft=config["preprocess"]["fft_length"],
92
- win_length=config["preprocess"]["frame_length"],
93
- hop_length=config["preprocess"]["frame_shift"],
94
- f_min=config["preprocess"]["fmin"],
95
- f_max=config["preprocess"]["fmax"],
96
- n_mels=config["preprocess"]["n_mels"],
97
- power=1,
98
- center=True,
99
- norm="slaney",
100
- mel_scale="slaney",
101
- )
102
- fig, ax = plt.subplots(nrows=3, ncols=3, figsize=(18, 18))
103
- for i, key in enumerate(keys):
104
- wav = wavs[key][0, ...].cpu()
105
- spec = spec_module(wav.unsqueeze(0))
106
- log_spec = torch.log(
107
- torch.clamp_min(spec, config["preprocess"]["min_magnitude"])
108
- * config["preprocess"]["comp_factor"]
109
- )
110
- ax[i // 3, i % 3].set(title=key)
111
- _ = librosa.display.specshow(
112
- log_spec.squeeze(0).numpy(),
113
- x_axis="time",
114
- y_axis="linear",
115
- sr=config["preprocess"]["sampling_rate"],
116
- hop_length=config["preprocess"]["frame_shift"],
117
- fmax=config["preprocess"]["sampling_rate"] // 2,
118
- ax=ax[i // 3, i % 3],
119
- cmap="viridis",
120
- )
121
- fig.savefig(save_path, bbox_inches="tight", pad_inches=0)
122
-
123
-
124
  def configure_args(config, args):
125
  for key in ["stage", "corpus_type", "source_path", "aux_path", "preprocessed_path"]:
126
  if getattr(args, key) != None:
 
 
1
  import matplotlib.pyplot as plt
2
  import json
3
  import torch
4
  import torchaudio
5
  import hifigan
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  def load_vocoder(config):
8
  with open(
9
  "hifigan/config_{}.json".format(config["general"]["feature_type"]), "r"
 
16
  param.requires_grad = False
17
  return vocoder
18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  def configure_args(config, args):
20
  for key in ["stage", "corpus_type", "source_path", "aux_path", "preprocessed_path"]:
21
  if getattr(args, key) != None: