gorinars commited on
Commit
cb3f1b9
1 Parent(s): fd0294d

model added

Browse files
Files changed (7) hide show
  1. .gitattributes +2 -0
  2. classifier.ckpt +3 -0
  3. config.json +3 -0
  4. embedding_model.ckpt +3 -0
  5. hyperparams.yaml +52 -0
  6. label_encoder.txt +310 -0
  7. test.py +7 -0
.gitattributes CHANGED
@@ -32,3 +32,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
+ classifier.ckpt filter=lfs diff=lfs merge=lfs -text
36
+ embedding_model.ckpt filter=lfs diff=lfs merge=lfs -text
classifier.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a59a8ff03fc9b88c20e56c05dafca58b4947da0b13109bb8f2a85f0a55f90f1
3
+ size 237355
config.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "speechbrain_interface": "SpeakerRecognition"
3
+ }
embedding_model.ckpt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b137c365f7b6399196dcfde86a60de175309bf3e464aa5b0ebde9651f1695a37
3
+ size 83310835
hyperparams.yaml ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # ############################################################################
2
+ # Model: ECAPA big for Speaker verification
3
+ # ############################################################################
4
+
5
+ # Feature parameters
6
+ n_mels: 80
7
+
8
+ # Pretrain folder (HuggingFace)
9
+ pretrained_path: gorinars/sb-ecapa-vggsound
10
+
11
+ # Output parameters
12
+ out_n_neurons: 308
13
+
14
+ # Model params
15
+ compute_features: !new:speechbrain.lobes.features.Fbank
16
+ n_mels: !ref <n_mels>
17
+
18
+ mean_var_norm: !new:speechbrain.processing.features.InputNormalization
19
+ norm_type: sentence
20
+ std_norm: False
21
+
22
+ embedding_model: !new:speechbrain.lobes.models.ECAPA_TDNN.ECAPA_TDNN
23
+ input_size: !ref <n_mels>
24
+ channels: [1024, 1024, 1024, 1024, 3072]
25
+ kernel_sizes: [5, 3, 3, 3, 1]
26
+ dilations: [1, 2, 3, 4, 1]
27
+ attention_channels: 128
28
+ lin_neurons: 192
29
+
30
+ classifier: !new:speechbrain.lobes.models.ECAPA_TDNN.Classifier
31
+ input_size: 192
32
+ out_neurons: !ref <out_n_neurons>
33
+
34
+ modules:
35
+ compute_features: !ref <compute_features>
36
+ mean_var_norm: !ref <mean_var_norm>
37
+ embedding_model: !ref <embedding_model>
38
+ classifier: !ref <classifier>
39
+
40
+ label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder
41
+
42
+
43
+ pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer
44
+ loadables:
45
+ embedding_model: !ref <embedding_model>
46
+ classifier: !ref <classifier>
47
+ label_encoder: !ref <label_encoder>
48
+ paths:
49
+ embedding_model: !ref <pretrained_path>/embedding_model.ckpt
50
+ classifier: !ref <pretrained_path>/classifier.ckpt
51
+ label_encoder: !ref <pretrained_path>/label_encoder.txt
52
+
label_encoder.txt ADDED
@@ -0,0 +1,310 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ 'air conditioning noise' => 0
2
+ 'air horn' => 1
3
+ 'airplane' => 2
4
+ 'airplane flyby' => 3
5
+ 'alarm clock ringing' => 4
6
+ 'alligators, crocodiles hissing' => 5
7
+ 'ambulance siren' => 6
8
+ 'arc welding' => 7
9
+ 'baby babbling' => 8
10
+ 'baby crying' => 9
11
+ 'baby laughter' => 10
12
+ 'baltimore oriole calling' => 11
13
+ 'barn swallow calling' => 12
14
+ 'basketball bounce' => 13
15
+ 'bathroom ventilation fan running' => 14
16
+ 'beat boxing' => 15
17
+ 'bee, wasp, etc. buzzing' => 16
18
+ 'bird chirping, tweeting' => 17
19
+ 'bird squawking' => 18
20
+ 'bird wings flapping' => 19
21
+ 'black capped chickadee calling' => 20
22
+ 'blowtorch igniting' => 21
23
+ 'bouncing on trampoline' => 22
24
+ 'bowling impact' => 23
25
+ 'bull bellowing' => 24
26
+ 'canary calling' => 25
27
+ 'cap gun shooting' => 26
28
+ 'car engine idling' => 27
29
+ 'car engine knocking' => 28
30
+ 'car engine starting' => 29
31
+ 'car passing by' => 30
32
+ 'cat caterwauling' => 31
33
+ 'cat growling' => 32
34
+ 'cat hissing' => 33
35
+ 'cat meowing' => 34
36
+ 'cat purring' => 35
37
+ 'cattle mooing' => 36
38
+ 'cattle, bovinae cowbell' => 37
39
+ 'cell phone buzzing' => 38
40
+ 'chainsawing trees' => 39
41
+ 'cheetah chirrup' => 40
42
+ 'chicken clucking' => 41
43
+ 'chicken crowing' => 42
44
+ 'child singing' => 43
45
+ 'child speech, kid speaking' => 44
46
+ 'children shouting' => 45
47
+ 'chimpanzee pant-hooting' => 46
48
+ 'chinchilla barking' => 47
49
+ 'chipmunk chirping' => 48
50
+ 'chopping food' => 49
51
+ 'chopping wood' => 50
52
+ 'church bell ringing' => 51
53
+ 'civil defense siren' => 52
54
+ 'cow lowing' => 53
55
+ 'coyote howling' => 54
56
+ 'cricket chirping' => 55
57
+ 'crow cawing' => 56
58
+ 'cuckoo bird calling' => 57
59
+ 'cutting hair with electric trimmers' => 58
60
+ 'dinosaurs bellowing' => 59
61
+ 'disc scratching' => 60
62
+ 'dog barking' => 61
63
+ 'dog baying' => 62
64
+ 'dog bow-wow' => 63
65
+ 'dog growling' => 64
66
+ 'dog howling' => 65
67
+ 'dog whimpering' => 66
68
+ 'donkey, ass braying' => 67
69
+ 'door slamming' => 68
70
+ 'driving buses' => 69
71
+ 'driving motorcycle' => 70
72
+ 'driving snowmobile' => 71
73
+ 'duck quacking' => 72
74
+ 'eagle screaming' => 73
75
+ 'eating with cutlery' => 74
76
+ 'electric grinder grinding' => 75
77
+ 'electric shaver, electric razor shaving' => 76
78
+ 'elephant trumpeting' => 77
79
+ 'eletric blender running' => 78
80
+ 'elk bugling' => 79
81
+ 'engine accelerating, revving, vroom' => 80
82
+ 'female singing' => 81
83
+ 'female speech, woman speaking' => 82
84
+ 'ferret dooking' => 83
85
+ 'fire crackling' => 84
86
+ 'fire truck siren' => 85
87
+ 'fireworks banging' => 86
88
+ 'firing cannon' => 87
89
+ 'firing muskets' => 88
90
+ 'fly, housefly buzzing' => 89
91
+ 'foghorn' => 90
92
+ 'footsteps on snow' => 91
93
+ 'forging swords' => 92
94
+ 'fox barking' => 93
95
+ 'francolin calling' => 94
96
+ 'frog croaking' => 95
97
+ 'gibbon howling' => 96
98
+ 'goat bleating' => 97
99
+ 'golf driving' => 98
100
+ 'goose honking' => 99
101
+ 'hail' => 100
102
+ 'hair dryer drying' => 101
103
+ 'hammering nails' => 102
104
+ 'heart sounds, heartbeat' => 103
105
+ 'hedge trimmer running' => 104
106
+ 'helicopter' => 105
107
+ 'horse clip-clop' => 106
108
+ 'horse neighing' => 107
109
+ 'ice cracking' => 108
110
+ 'ice cream truck, ice cream van' => 109
111
+ 'lathe spinning' => 110
112
+ 'lawn mowing' => 111
113
+ 'lighting firecrackers' => 112
114
+ 'lions growling' => 113
115
+ 'lions roaring' => 114
116
+ 'lip smacking' => 115
117
+ 'machine gun shooting' => 116
118
+ 'magpie calling' => 117
119
+ 'male singing' => 118
120
+ 'male speech, man speaking' => 119
121
+ 'metronome' => 120
122
+ 'missile launch' => 121
123
+ 'mosquito buzzing' => 122
124
+ 'motorboat, speedboat acceleration' => 123
125
+ 'mouse clicking' => 124
126
+ 'mouse pattering' => 125
127
+ 'mouse squeaking' => 126
128
+ 'mynah bird singing' => 127
129
+ 'ocean burbling' => 128
130
+ 'opening or closing car doors' => 129
131
+ 'opening or closing car electric windows' => 130
132
+ 'opening or closing drawers' => 131
133
+ 'orchestra' => 132
134
+ 'otter growling' => 133
135
+ 'owl hooting' => 134
136
+ 'parrot talking' => 135
137
+ 'penguins braying' => 136
138
+ 'people babbling' => 137
139
+ 'people battle cry' => 138
140
+ 'people belly laughing' => 139
141
+ 'people booing' => 140
142
+ 'people burping' => 141
143
+ 'people cheering' => 142
144
+ 'people clapping' => 143
145
+ 'people coughing' => 144
146
+ 'people crowd' => 145
147
+ 'people eating' => 146
148
+ 'people eating apple' => 147
149
+ 'people eating crisps' => 148
150
+ 'people eating noodle' => 149
151
+ 'people farting' => 150
152
+ 'people finger snapping' => 151
153
+ 'people gargling' => 152
154
+ 'people giggling' => 153
155
+ 'people hiccup' => 154
156
+ 'people humming' => 155
157
+ 'people marching' => 156
158
+ 'people nose blowing' => 157
159
+ 'people running' => 158
160
+ 'people screaming' => 159
161
+ 'people shuffling' => 160
162
+ 'people slapping' => 161
163
+ 'people slurping' => 162
164
+ 'people sneezing' => 163
165
+ 'people sniggering' => 164
166
+ 'people sobbing' => 165
167
+ 'people whispering' => 166
168
+ 'people whistling' => 167
169
+ 'pheasant crowing' => 168
170
+ 'pig oinking' => 169
171
+ 'pigeon, dove cooing' => 170
172
+ 'planing timber' => 171
173
+ 'plastic bottle crushing' => 172
174
+ 'playing accordion' => 173
175
+ 'playing acoustic guitar' => 174
176
+ 'playing badminton' => 175
177
+ 'playing bagpipes' => 176
178
+ 'playing banjo' => 177
179
+ 'playing bass drum' => 178
180
+ 'playing bass guitar' => 179
181
+ 'playing bassoon' => 180
182
+ 'playing bongo' => 181
183
+ 'playing bugle' => 182
184
+ 'playing castanets' => 183
185
+ 'playing cello' => 184
186
+ 'playing clarinet' => 185
187
+ 'playing congas' => 186
188
+ 'playing cornet' => 187
189
+ 'playing cymbal' => 188
190
+ 'playing darts' => 189
191
+ 'playing didgeridoo' => 190
192
+ 'playing djembe' => 191
193
+ 'playing double bass' => 192
194
+ 'playing drum kit' => 193
195
+ 'playing electric guitar' => 194
196
+ 'playing electronic organ' => 195
197
+ 'playing erhu' => 196
198
+ 'playing flute' => 197
199
+ 'playing french horn' => 198
200
+ 'playing glockenspiel' => 199
201
+ 'playing gong' => 200
202
+ 'playing guiro' => 201
203
+ 'playing hammond organ' => 202
204
+ 'playing harmonica' => 203
205
+ 'playing harp' => 204
206
+ 'playing harpsichord' => 205
207
+ 'playing hockey' => 206
208
+ 'playing lacrosse' => 207
209
+ 'playing mandolin' => 208
210
+ 'playing marimba, xylophone' => 209
211
+ 'playing oboe' => 210
212
+ 'playing piano' => 211
213
+ 'playing saxophone' => 212
214
+ 'playing shofar' => 213
215
+ 'playing sitar' => 214
216
+ 'playing snare drum' => 215
217
+ 'playing squash' => 216
218
+ 'playing steel guitar, slide guitar' => 217
219
+ 'playing steelpan' => 218
220
+ 'playing synthesizer' => 219
221
+ 'playing tabla' => 220
222
+ 'playing table tennis' => 221
223
+ 'playing tambourine' => 222
224
+ 'playing tennis' => 223
225
+ 'playing theremin' => 224
226
+ 'playing timbales' => 225
227
+ 'playing timpani' => 226
228
+ 'playing trombone' => 227
229
+ 'playing trumpet' => 228
230
+ 'playing tuning fork' => 229
231
+ 'playing tympani' => 230
232
+ 'playing ukulele' => 231
233
+ 'playing vibraphone' => 232
234
+ 'playing violin, fiddle' => 233
235
+ 'playing volleyball' => 234
236
+ 'playing washboard' => 235
237
+ 'playing zither' => 236
238
+ 'police car (siren)' => 237
239
+ 'police radio chatter' => 238
240
+ 'popping popcorn' => 239
241
+ 'printer printing' => 240
242
+ 'pumping water' => 241
243
+ 'race car, auto racing' => 242
244
+ 'railroad car, train wagon' => 243
245
+ 'raining' => 244
246
+ 'rapping' => 245
247
+ 'reversing beeps' => 246
248
+ 'ripping paper' => 247
249
+ 'roller coaster running' => 248
250
+ 'rope skipping' => 249
251
+ 'rowboat, canoe, kayak rowing' => 250
252
+ 'running electric fan' => 251
253
+ 'sailing' => 252
254
+ 'scuba diving' => 253
255
+ 'sea lion barking' => 254
256
+ 'sea waves' => 255
257
+ 'sharpen knife' => 256
258
+ 'sheep bleating' => 257
259
+ 'shot football' => 258
260
+ 'singing bowl' => 259
261
+ 'singing choir' => 260
262
+ 'skateboarding' => 261
263
+ 'skidding' => 262
264
+ 'skiing' => 263
265
+ 'sliding door' => 264
266
+ 'sloshing water' => 265
267
+ 'slot machine' => 266
268
+ 'smoke detector beeping' => 267
269
+ 'snake hissing' => 268
270
+ 'snake rattling' => 269
271
+ 'splashing water' => 270
272
+ 'spraying water' => 271
273
+ 'squishing water' => 272
274
+ 'stream burbling' => 273
275
+ 'strike lighter' => 274
276
+ 'striking bowling' => 275
277
+ 'striking pool' => 276
278
+ 'subway, metro, underground' => 277
279
+ 'swimming' => 278
280
+ 'tap dancing' => 279
281
+ 'tapping guitar' => 280
282
+ 'telephone bell ringing' => 281
283
+ 'thunder' => 282
284
+ 'toilet flushing' => 283
285
+ 'tornado roaring' => 284
286
+ 'tractor digging' => 285
287
+ 'train horning' => 286
288
+ 'train wheels squealing' => 287
289
+ 'train whistling' => 288
290
+ 'turkey gobbling' => 289
291
+ 'typing on computer keyboard' => 290
292
+ 'typing on typewriter' => 291
293
+ 'underwater bubbling' => 292
294
+ 'using sewing machines' => 293
295
+ 'vacuum cleaner cleaning floors' => 294
296
+ 'vehicle horn, car horn, honking' => 295
297
+ 'volcano explosion' => 296
298
+ 'warbler chirping' => 297
299
+ 'waterfall burbling' => 298
300
+ 'whale calling' => 299
301
+ 'wind chime' => 300
302
+ 'wind noise' => 301
303
+ 'wind rustling leaves' => 302
304
+ 'wood thrush calling' => 303
305
+ 'woodpecker pecking tree' => 304
306
+ 'writing on blackboard with chalk' => 305
307
+ 'yodelling' => 306
308
+ 'zebra braying' => 307
309
+ ================
310
+ 'starting_index' => 0
test.py ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ import torchaudio
2
+ from speechbrain.pretrained import EncoderClassifier
3
+ classifier = EncoderClassifier.from_hparams(source=".")
4
+ signal, fs =torchaudio.load('example1.wav')
5
+ embeddings = classifier.encode_batch(signal)
6
+
7
+ print(embeddings)