English
Sound Classification
CNN14
File size: 1,855 Bytes
422a112
d24c3c9
 
 
422a112
 
 
 
 
 
 
 
 
 
 
 
d24c3c9
 
 
8e55a46
d24c3c9
 
 
 
 
 
 
 
 
 
8e55a46
d24c3c9
 
 
8e55a46
d24c3c9
 
 
 
8e55a46
d24c3c9
 
 
 
 
 
 
 
8e55a46
 
d24c3c9
 
 
 
 
8e55a46
d24c3c9
 
 
 
 
8e55a46
 
 
 
 
 
d24c3c9
bdaad7b
 
13c6450
bdaad7b
d24c3c9
 
 
 
bdaad7b
 
318166a
bdaad7b
8924ed5
bdaad7b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
# CNN14 inference

sample_rate: 44100

# Feature parameters
n_mels: 80
left_frames: 0
right_frames: 0
deltas: false
amp_to_db: true
normalize: true
use_melspectra: true

# Number of classes
out_n_neurons: 50

device: cpu

# Functions
compute_features: !new:speechbrain.lobes.features.Fbank
  n_mels: 80
  left_frames: 0
  right_frames: 0
  deltas: false
  sample_rate: 44100
  n_fft: 1024
  win_length: 20
  hop_length: 10

use_pretrain: false
embedding_model: !new:speechbrain.lobes.models.Cnn14.Cnn14
  mel_bins: 80
  emb_dim: 2048

classifier: !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier
  input_size: 2048
  out_neurons: 50
  lin_blocks: 1

mean_var_norm: !new:speechbrain.processing.features.InputNormalization
  norm_type: sentence
  std_norm: false

# pre-processing
n_fft: 1024
spec_mag_power: 0.5
hop_length: 11.6099
win_length: 23.2199

compute_stft: !new:speechbrain.processing.features.STFT
  n_fft: 1024
  hop_length: 11.6099
  win_length: 23.2199
  sample_rate: 44100

compute_fbank: !new:speechbrain.processing.features.Filterbank
  n_mels: 80
  n_fft: 1024
  sample_rate: 44100

modules:
  compute_stft: !ref <compute_stft>
  compute_fbank: !ref <compute_fbank>
  compute_features: !ref <compute_features>
  embedding_model: !ref <embedding_model>
  classifier: !ref <classifier>
  mean_var_norm: !ref <mean_var_norm>


label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
pretrained_path: speechbrain/cnn14-esc50

pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
  loadables:
    embedding_model: !ref <embedding_model>
    classifier: !ref <classifier>
    label_encoder: !ref <label_encoder>
  paths:
    embedding_model: !ref <pretrained_path>/embedding_model_esc50ft.ckpt
    classifier: !ref <pretrained_path>/classifier_esc50.ckpt
    label_encoder: !ref <pretrained_path>/label_encoder.txt