GradientDescent2718 commited on
Commit
fca8ec9
·
verified ·
1 Parent(s): cc0de96

Added models

Browse files
Files changed (40) hide show
  1. AMI/.DS_Store +0 -0
  2. AMI/ls_eend_ami_step.json +80 -0
  3. AMI/ls_eend_ami_step.mlmodelc/analytics/coremldata.bin +3 -0
  4. AMI/ls_eend_ami_step.mlmodelc/coremldata.bin +3 -0
  5. AMI/ls_eend_ami_step.mlmodelc/metadata.json +224 -0
  6. AMI/ls_eend_ami_step.mlmodelc/model.mil +0 -0
  7. AMI/ls_eend_ami_step.mlmodelc/weights/weight.bin +3 -0
  8. AMI/ls_eend_ami_step.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
  9. AMI/ls_eend_ami_step.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
  10. AMI/ls_eend_ami_step.mlpackage/Manifest.json +18 -0
  11. CALLHOME/.DS_Store +0 -0
  12. CALLHOME/ls_eend_callhome_step.json +80 -0
  13. CALLHOME/ls_eend_callhome_step.mlmodelc/analytics/coremldata.bin +3 -0
  14. CALLHOME/ls_eend_callhome_step.mlmodelc/coremldata.bin +3 -0
  15. CALLHOME/ls_eend_callhome_step.mlmodelc/metadata.json +224 -0
  16. CALLHOME/ls_eend_callhome_step.mlmodelc/model.mil +0 -0
  17. CALLHOME/ls_eend_callhome_step.mlmodelc/weights/weight.bin +3 -0
  18. CALLHOME/ls_eend_callhome_step.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
  19. CALLHOME/ls_eend_callhome_step.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
  20. CALLHOME/ls_eend_callhome_step.mlpackage/Manifest.json +18 -0
  21. DIHARD II/.DS_Store +0 -0
  22. DIHARD II/ls_eend_dih2_step.json +80 -0
  23. DIHARD II/ls_eend_dih2_step.mlmodelc/analytics/coremldata.bin +3 -0
  24. DIHARD II/ls_eend_dih2_step.mlmodelc/coremldata.bin +3 -0
  25. DIHARD II/ls_eend_dih2_step.mlmodelc/metadata.json +224 -0
  26. DIHARD II/ls_eend_dih2_step.mlmodelc/model.mil +0 -0
  27. DIHARD II/ls_eend_dih2_step.mlmodelc/weights/weight.bin +3 -0
  28. DIHARD II/ls_eend_dih2_step.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
  29. DIHARD II/ls_eend_dih2_step.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
  30. DIHARD II/ls_eend_dih2_step.mlpackage/Manifest.json +18 -0
  31. DIHARD III/.DS_Store +0 -0
  32. DIHARD III/ls_eend_dih3_step.json +80 -0
  33. DIHARD III/ls_eend_dih3_step.mlmodelc/analytics/coremldata.bin +3 -0
  34. DIHARD III/ls_eend_dih3_step.mlmodelc/coremldata.bin +3 -0
  35. DIHARD III/ls_eend_dih3_step.mlmodelc/metadata.json +224 -0
  36. DIHARD III/ls_eend_dih3_step.mlmodelc/model.mil +0 -0
  37. DIHARD III/ls_eend_dih3_step.mlmodelc/weights/weight.bin +3 -0
  38. DIHARD III/ls_eend_dih3_step.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
  39. DIHARD III/ls_eend_dih3_step.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
  40. DIHARD III/ls_eend_dih3_step.mlpackage/Manifest.json +18 -0
AMI/.DS_Store ADDED
Binary file (6.15 kB). View file
 
AMI/ls_eend_ami_step.json ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "checkpoint": "/Users/benjaminlee/PycharmProjects/FS-EEND/LS-EEND/ls_eend_ami_allspk_model.ckpt",
3
+ "compute_precision": "float32",
4
+ "config": "/Users/benjaminlee/PycharmProjects/FS-EEND/LS-EEND/conf/spk_onl_conformer_retention_enc_dec_nonautoreg_ami_infer.yaml",
5
+ "context_recp": 7,
6
+ "conv_delay": 9,
7
+ "decoder_layers": 2,
8
+ "encoder_conv_cache_len": 15,
9
+ "encoder_dim": 256,
10
+ "encoder_layers": 4,
11
+ "feat_type": "logmel23_cummn",
12
+ "frame_hz": 10.0,
13
+ "full_output_dim": 6,
14
+ "head_dim": 64,
15
+ "hop_length": 80,
16
+ "input_dim": 345,
17
+ "key_dim": 64,
18
+ "max_nspks": 6,
19
+ "mixed_fp16_exclude_markers": [
20
+ "model.dec.",
21
+ "dec_ret",
22
+ "candidate_dec",
23
+ "attractor",
24
+ "full_logits",
25
+ "decode",
26
+ "convert"
27
+ ],
28
+ "mixed_fp16_include_markers": [
29
+ "model.enc.",
30
+ "model.cnn.",
31
+ "enc_ret_",
32
+ "enc_conv_cache"
33
+ ],
34
+ "n_fft": 256,
35
+ "n_mels": 23,
36
+ "num_heads": 4,
37
+ "real_output_dim": 4,
38
+ "sample_rate": 8000,
39
+ "state_shapes": {
40
+ "dec_ret_kv": [
41
+ 2,
42
+ 6,
43
+ 4,
44
+ 64,
45
+ 64
46
+ ],
47
+ "dec_ret_scale": [
48
+ 2,
49
+ 6,
50
+ 4
51
+ ],
52
+ "enc_conv_cache": [
53
+ 4,
54
+ 1,
55
+ 15,
56
+ 256
57
+ ],
58
+ "enc_ret_kv": [
59
+ 4,
60
+ 1,
61
+ 4,
62
+ 64,
63
+ 64
64
+ ],
65
+ "enc_ret_scale": [
66
+ 4,
67
+ 1,
68
+ 4
69
+ ],
70
+ "top_buffer": [
71
+ 1,
72
+ 19,
73
+ 256
74
+ ]
75
+ },
76
+ "subsampling": 10,
77
+ "target_sample_rate": 8000,
78
+ "top_buffer_len": 19,
79
+ "win_length": 200
80
+ }
AMI/ls_eend_ami_step.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c29e4e605586aca4f45ad01254b2fe365180fb9a42ac6c2fa65f114cc603c27
3
+ size 243
AMI/ls_eend_ami_step.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e88c8cc32b10c236622478631ba236544a578cab395119d1db21efc7e9de08b0
3
+ size 742
AMI/ls_eend_ami_step.mlmodelc/metadata.json ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Float32",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float32",
10
+ "formattedType" : "MultiArray (Float32 1 × 1 × 6)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 1, 6]",
13
+ "name" : "full_logits",
14
+ "type" : "MultiArray"
15
+ },
16
+ {
17
+ "hasShapeFlexibility" : "0",
18
+ "isOptional" : "0",
19
+ "dataType" : "Float32",
20
+ "formattedType" : "MultiArray (Float32 4 × 1 × 4 × 64 × 64)",
21
+ "shortDescription" : "",
22
+ "shape" : "[4, 1, 4, 64, 64]",
23
+ "name" : "enc_ret_kv_out",
24
+ "type" : "MultiArray"
25
+ },
26
+ {
27
+ "hasShapeFlexibility" : "0",
28
+ "isOptional" : "0",
29
+ "dataType" : "Float32",
30
+ "formattedType" : "MultiArray (Float32 4 × 1 × 4)",
31
+ "shortDescription" : "",
32
+ "shape" : "[4, 1, 4]",
33
+ "name" : "enc_ret_scale_out",
34
+ "type" : "MultiArray"
35
+ },
36
+ {
37
+ "hasShapeFlexibility" : "0",
38
+ "isOptional" : "0",
39
+ "dataType" : "Float32",
40
+ "formattedType" : "MultiArray (Float32 4 × 1 × 15 × 256)",
41
+ "shortDescription" : "",
42
+ "shape" : "[4, 1, 15, 256]",
43
+ "name" : "enc_conv_cache_out",
44
+ "type" : "MultiArray"
45
+ },
46
+ {
47
+ "hasShapeFlexibility" : "0",
48
+ "isOptional" : "0",
49
+ "dataType" : "Float32",
50
+ "formattedType" : "MultiArray (Float32 2 × 6 × 4 × 64 × 64)",
51
+ "shortDescription" : "",
52
+ "shape" : "[2, 6, 4, 64, 64]",
53
+ "name" : "dec_ret_kv_out",
54
+ "type" : "MultiArray"
55
+ },
56
+ {
57
+ "hasShapeFlexibility" : "0",
58
+ "isOptional" : "0",
59
+ "dataType" : "Float32",
60
+ "formattedType" : "MultiArray (Float32 2 × 6 × 4)",
61
+ "shortDescription" : "",
62
+ "shape" : "[2, 6, 4]",
63
+ "name" : "dec_ret_scale_out",
64
+ "type" : "MultiArray"
65
+ },
66
+ {
67
+ "hasShapeFlexibility" : "0",
68
+ "isOptional" : "0",
69
+ "dataType" : "Float32",
70
+ "formattedType" : "MultiArray (Float32 1 × 19 × 256)",
71
+ "shortDescription" : "",
72
+ "shape" : "[1, 19, 256]",
73
+ "name" : "top_buffer_out",
74
+ "type" : "MultiArray"
75
+ }
76
+ ],
77
+ "modelParameters" : [
78
+
79
+ ],
80
+ "specificationVersion" : 9,
81
+ "mlProgramOperationTypeHistogram" : {
82
+ "Ios18.expandDims" : 29,
83
+ "Ios18.mul" : 60,
84
+ "Ios18.softmax" : 2,
85
+ "Ios18.matmul" : 5,
86
+ "Ios18.realDiv" : 14,
87
+ "Ios18.sigmoid" : 4,
88
+ "Split" : 4,
89
+ "Tile" : 1,
90
+ "Ios18.add" : 48,
91
+ "Ios16.reduceSum" : 6,
92
+ "Ios18.layerNorm" : 33,
93
+ "Ios18.reshape" : 44,
94
+ "Ios18.maximum" : 2,
95
+ "Ios18.linear" : 60,
96
+ "Ios18.conv" : 13,
97
+ "Ios18.concat" : 6,
98
+ "Ios18.sub" : 14,
99
+ "Ios18.silu" : 18,
100
+ "Ios18.transpose" : 42,
101
+ "Ios18.sqrt" : 12,
102
+ "Ios18.relu" : 2,
103
+ "Stack" : 5,
104
+ "Ios18.sliceByIndex" : 21,
105
+ "Ios18.squeeze" : 1,
106
+ "Ios16.reduceL2Norm" : 2
107
+ },
108
+ "computePrecision" : "Mixed (Float32, Int32)",
109
+ "isUpdatable" : "0",
110
+ "stateSchema" : [
111
+
112
+ ],
113
+ "availability" : {
114
+ "macOS" : "15.0",
115
+ "tvOS" : "18.0",
116
+ "visionOS" : "2.0",
117
+ "watchOS" : "11.0",
118
+ "iOS" : "18.0",
119
+ "macCatalyst" : "18.0"
120
+ },
121
+ "modelType" : {
122
+ "name" : "MLModelType_mlProgram"
123
+ },
124
+ "userDefinedMetadata" : {
125
+ "com.github.apple.coremltools.source_dialect" : "TorchScript",
126
+ "com.github.apple.coremltools.source" : "torch==1.13.0",
127
+ "com.github.apple.coremltools.version" : "8.3.0"
128
+ },
129
+ "inputSchema" : [
130
+ {
131
+ "hasShapeFlexibility" : "0",
132
+ "isOptional" : "0",
133
+ "dataType" : "Float32",
134
+ "formattedType" : "MultiArray (Float32 1 × 1 × 345)",
135
+ "shortDescription" : "",
136
+ "shape" : "[1, 1, 345]",
137
+ "name" : "frame",
138
+ "type" : "MultiArray"
139
+ },
140
+ {
141
+ "hasShapeFlexibility" : "0",
142
+ "isOptional" : "0",
143
+ "dataType" : "Float32",
144
+ "formattedType" : "MultiArray (Float32 4 × 1 × 4 × 64 × 64)",
145
+ "shortDescription" : "",
146
+ "shape" : "[4, 1, 4, 64, 64]",
147
+ "name" : "enc_ret_kv",
148
+ "type" : "MultiArray"
149
+ },
150
+ {
151
+ "hasShapeFlexibility" : "0",
152
+ "isOptional" : "0",
153
+ "dataType" : "Float32",
154
+ "formattedType" : "MultiArray (Float32 4 × 1 × 4)",
155
+ "shortDescription" : "",
156
+ "shape" : "[4, 1, 4]",
157
+ "name" : "enc_ret_scale",
158
+ "type" : "MultiArray"
159
+ },
160
+ {
161
+ "hasShapeFlexibility" : "0",
162
+ "isOptional" : "0",
163
+ "dataType" : "Float32",
164
+ "formattedType" : "MultiArray (Float32 4 × 1 × 15 × 256)",
165
+ "shortDescription" : "",
166
+ "shape" : "[4, 1, 15, 256]",
167
+ "name" : "enc_conv_cache",
168
+ "type" : "MultiArray"
169
+ },
170
+ {
171
+ "hasShapeFlexibility" : "0",
172
+ "isOptional" : "0",
173
+ "dataType" : "Float32",
174
+ "formattedType" : "MultiArray (Float32 2 × 6 × 4 × 64 × 64)",
175
+ "shortDescription" : "",
176
+ "shape" : "[2, 6, 4, 64, 64]",
177
+ "name" : "dec_ret_kv",
178
+ "type" : "MultiArray"
179
+ },
180
+ {
181
+ "hasShapeFlexibility" : "0",
182
+ "isOptional" : "0",
183
+ "dataType" : "Float32",
184
+ "formattedType" : "MultiArray (Float32 2 × 6 × 4)",
185
+ "shortDescription" : "",
186
+ "shape" : "[2, 6, 4]",
187
+ "name" : "dec_ret_scale",
188
+ "type" : "MultiArray"
189
+ },
190
+ {
191
+ "hasShapeFlexibility" : "0",
192
+ "isOptional" : "0",
193
+ "dataType" : "Float32",
194
+ "formattedType" : "MultiArray (Float32 1 × 19 × 256)",
195
+ "shortDescription" : "",
196
+ "shape" : "[1, 19, 256]",
197
+ "name" : "top_buffer",
198
+ "type" : "MultiArray"
199
+ },
200
+ {
201
+ "hasShapeFlexibility" : "0",
202
+ "isOptional" : "0",
203
+ "dataType" : "Float32",
204
+ "formattedType" : "MultiArray (Float32 1)",
205
+ "shortDescription" : "",
206
+ "shape" : "[1]",
207
+ "name" : "ingest",
208
+ "type" : "MultiArray"
209
+ },
210
+ {
211
+ "hasShapeFlexibility" : "0",
212
+ "isOptional" : "0",
213
+ "dataType" : "Float32",
214
+ "formattedType" : "MultiArray (Float32 1)",
215
+ "shortDescription" : "",
216
+ "shape" : "[1]",
217
+ "name" : "decode",
218
+ "type" : "MultiArray"
219
+ }
220
+ ],
221
+ "generatedClassName" : "ls_eend_ami_step",
222
+ "method" : "predict"
223
+ }
224
+ ]
AMI/ls_eend_ami_step.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
AMI/ls_eend_ami_step.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56e27813e10448eb4454b17574becc305015ad657ce6cd3896044b7c8b95bcc3
3
+ size 44388992
AMI/ls_eend_ami_step.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:954289d9924242a1d00b0db4421bdd866b6dfbd73ef5c6d327f2eeec95f9ed41
3
+ size 168873
AMI/ls_eend_ami_step.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56e27813e10448eb4454b17574becc305015ad657ce6cd3896044b7c8b95bcc3
3
+ size 44388992
AMI/ls_eend_ami_step.mlpackage/Manifest.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fileFormatVersion": "1.0.0",
3
+ "itemInfoEntries": {
4
+ "067F46DA-51CD-4543-8F1B-B1F48617AE5E": {
5
+ "author": "com.apple.CoreML",
6
+ "description": "CoreML Model Weights",
7
+ "name": "weights",
8
+ "path": "com.apple.CoreML/weights"
9
+ },
10
+ "E609CD24-18D6-48CF-886E-13B634913B46": {
11
+ "author": "com.apple.CoreML",
12
+ "description": "CoreML Model Specification",
13
+ "name": "model.mlmodel",
14
+ "path": "com.apple.CoreML/model.mlmodel"
15
+ }
16
+ },
17
+ "rootModelIdentifier": "E609CD24-18D6-48CF-886E-13B634913B46"
18
+ }
CALLHOME/.DS_Store ADDED
Binary file (6.15 kB). View file
 
CALLHOME/ls_eend_callhome_step.json ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "checkpoint": "/Users/benjaminlee/PycharmProjects/FS-EEND/LS-EEND/ls_eend_ch_allspk_model.ckpt",
3
+ "compute_precision": "float32",
4
+ "config": "/Users/benjaminlee/PycharmProjects/FS-EEND/LS-EEND/conf/spk_onl_conformer_retention_enc_dec_nonautoreg_callhome_infer.yaml",
5
+ "context_recp": 7,
6
+ "conv_delay": 9,
7
+ "decoder_layers": 2,
8
+ "encoder_conv_cache_len": 15,
9
+ "encoder_dim": 256,
10
+ "encoder_layers": 4,
11
+ "feat_type": "logmel23_cummn",
12
+ "frame_hz": 10.0,
13
+ "full_output_dim": 9,
14
+ "head_dim": 64,
15
+ "hop_length": 80,
16
+ "input_dim": 345,
17
+ "key_dim": 64,
18
+ "max_nspks": 9,
19
+ "mixed_fp16_exclude_markers": [
20
+ "model.dec.",
21
+ "dec_ret",
22
+ "candidate_dec",
23
+ "attractor",
24
+ "full_logits",
25
+ "decode",
26
+ "convert"
27
+ ],
28
+ "mixed_fp16_include_markers": [
29
+ "model.enc.",
30
+ "model.cnn.",
31
+ "enc_ret_",
32
+ "enc_conv_cache"
33
+ ],
34
+ "n_fft": 256,
35
+ "n_mels": 23,
36
+ "num_heads": 4,
37
+ "real_output_dim": 7,
38
+ "sample_rate": 8000,
39
+ "state_shapes": {
40
+ "dec_ret_kv": [
41
+ 2,
42
+ 9,
43
+ 4,
44
+ 64,
45
+ 64
46
+ ],
47
+ "dec_ret_scale": [
48
+ 2,
49
+ 9,
50
+ 4
51
+ ],
52
+ "enc_conv_cache": [
53
+ 4,
54
+ 1,
55
+ 15,
56
+ 256
57
+ ],
58
+ "enc_ret_kv": [
59
+ 4,
60
+ 1,
61
+ 4,
62
+ 64,
63
+ 64
64
+ ],
65
+ "enc_ret_scale": [
66
+ 4,
67
+ 1,
68
+ 4
69
+ ],
70
+ "top_buffer": [
71
+ 1,
72
+ 19,
73
+ 256
74
+ ]
75
+ },
76
+ "subsampling": 10,
77
+ "target_sample_rate": 8000,
78
+ "top_buffer_len": 19,
79
+ "win_length": 200
80
+ }
CALLHOME/ls_eend_callhome_step.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29d845d6bc108c1cc29a328350926dcbcb127b6d3fdbea46114e2421afa21722
3
+ size 243
CALLHOME/ls_eend_callhome_step.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b93e7cbca1ed52e213164f7922a6eb0d7d9708060bb25686a005a5cab3d6f07a
3
+ size 742
CALLHOME/ls_eend_callhome_step.mlmodelc/metadata.json ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Float32",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float32",
10
+ "formattedType" : "MultiArray (Float32 1 × 1 × 9)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 1, 9]",
13
+ "name" : "full_logits",
14
+ "type" : "MultiArray"
15
+ },
16
+ {
17
+ "hasShapeFlexibility" : "0",
18
+ "isOptional" : "0",
19
+ "dataType" : "Float32",
20
+ "formattedType" : "MultiArray (Float32 4 × 1 × 4 × 64 × 64)",
21
+ "shortDescription" : "",
22
+ "shape" : "[4, 1, 4, 64, 64]",
23
+ "name" : "enc_ret_kv_out",
24
+ "type" : "MultiArray"
25
+ },
26
+ {
27
+ "hasShapeFlexibility" : "0",
28
+ "isOptional" : "0",
29
+ "dataType" : "Float32",
30
+ "formattedType" : "MultiArray (Float32 4 × 1 × 4)",
31
+ "shortDescription" : "",
32
+ "shape" : "[4, 1, 4]",
33
+ "name" : "enc_ret_scale_out",
34
+ "type" : "MultiArray"
35
+ },
36
+ {
37
+ "hasShapeFlexibility" : "0",
38
+ "isOptional" : "0",
39
+ "dataType" : "Float32",
40
+ "formattedType" : "MultiArray (Float32 4 × 1 × 15 × 256)",
41
+ "shortDescription" : "",
42
+ "shape" : "[4, 1, 15, 256]",
43
+ "name" : "enc_conv_cache_out",
44
+ "type" : "MultiArray"
45
+ },
46
+ {
47
+ "hasShapeFlexibility" : "0",
48
+ "isOptional" : "0",
49
+ "dataType" : "Float32",
50
+ "formattedType" : "MultiArray (Float32 2 × 9 × 4 × 64 × 64)",
51
+ "shortDescription" : "",
52
+ "shape" : "[2, 9, 4, 64, 64]",
53
+ "name" : "dec_ret_kv_out",
54
+ "type" : "MultiArray"
55
+ },
56
+ {
57
+ "hasShapeFlexibility" : "0",
58
+ "isOptional" : "0",
59
+ "dataType" : "Float32",
60
+ "formattedType" : "MultiArray (Float32 2 × 9 × 4)",
61
+ "shortDescription" : "",
62
+ "shape" : "[2, 9, 4]",
63
+ "name" : "dec_ret_scale_out",
64
+ "type" : "MultiArray"
65
+ },
66
+ {
67
+ "hasShapeFlexibility" : "0",
68
+ "isOptional" : "0",
69
+ "dataType" : "Float32",
70
+ "formattedType" : "MultiArray (Float32 1 × 19 × 256)",
71
+ "shortDescription" : "",
72
+ "shape" : "[1, 19, 256]",
73
+ "name" : "top_buffer_out",
74
+ "type" : "MultiArray"
75
+ }
76
+ ],
77
+ "modelParameters" : [
78
+
79
+ ],
80
+ "specificationVersion" : 9,
81
+ "mlProgramOperationTypeHistogram" : {
82
+ "Ios18.expandDims" : 29,
83
+ "Ios18.mul" : 60,
84
+ "Ios18.softmax" : 2,
85
+ "Ios18.matmul" : 5,
86
+ "Ios18.realDiv" : 14,
87
+ "Ios18.sigmoid" : 4,
88
+ "Split" : 4,
89
+ "Tile" : 1,
90
+ "Ios18.add" : 48,
91
+ "Ios16.reduceSum" : 6,
92
+ "Ios18.layerNorm" : 33,
93
+ "Ios18.reshape" : 44,
94
+ "Ios18.maximum" : 2,
95
+ "Ios18.linear" : 60,
96
+ "Ios18.conv" : 13,
97
+ "Ios18.concat" : 6,
98
+ "Ios18.sub" : 14,
99
+ "Ios18.silu" : 18,
100
+ "Ios18.transpose" : 42,
101
+ "Ios18.sqrt" : 12,
102
+ "Ios18.relu" : 2,
103
+ "Stack" : 5,
104
+ "Ios18.sliceByIndex" : 21,
105
+ "Ios18.squeeze" : 1,
106
+ "Ios16.reduceL2Norm" : 2
107
+ },
108
+ "computePrecision" : "Mixed (Float32, Int32)",
109
+ "isUpdatable" : "0",
110
+ "stateSchema" : [
111
+
112
+ ],
113
+ "availability" : {
114
+ "macOS" : "15.0",
115
+ "tvOS" : "18.0",
116
+ "visionOS" : "2.0",
117
+ "watchOS" : "11.0",
118
+ "iOS" : "18.0",
119
+ "macCatalyst" : "18.0"
120
+ },
121
+ "modelType" : {
122
+ "name" : "MLModelType_mlProgram"
123
+ },
124
+ "userDefinedMetadata" : {
125
+ "com.github.apple.coremltools.version" : "8.3.0",
126
+ "com.github.apple.coremltools.source" : "torch==1.13.0",
127
+ "com.github.apple.coremltools.source_dialect" : "TorchScript"
128
+ },
129
+ "inputSchema" : [
130
+ {
131
+ "hasShapeFlexibility" : "0",
132
+ "isOptional" : "0",
133
+ "dataType" : "Float32",
134
+ "formattedType" : "MultiArray (Float32 1 × 1 × 345)",
135
+ "shortDescription" : "",
136
+ "shape" : "[1, 1, 345]",
137
+ "name" : "frame",
138
+ "type" : "MultiArray"
139
+ },
140
+ {
141
+ "hasShapeFlexibility" : "0",
142
+ "isOptional" : "0",
143
+ "dataType" : "Float32",
144
+ "formattedType" : "MultiArray (Float32 4 × 1 × 4 × 64 × 64)",
145
+ "shortDescription" : "",
146
+ "shape" : "[4, 1, 4, 64, 64]",
147
+ "name" : "enc_ret_kv",
148
+ "type" : "MultiArray"
149
+ },
150
+ {
151
+ "hasShapeFlexibility" : "0",
152
+ "isOptional" : "0",
153
+ "dataType" : "Float32",
154
+ "formattedType" : "MultiArray (Float32 4 × 1 × 4)",
155
+ "shortDescription" : "",
156
+ "shape" : "[4, 1, 4]",
157
+ "name" : "enc_ret_scale",
158
+ "type" : "MultiArray"
159
+ },
160
+ {
161
+ "hasShapeFlexibility" : "0",
162
+ "isOptional" : "0",
163
+ "dataType" : "Float32",
164
+ "formattedType" : "MultiArray (Float32 4 × 1 × 15 × 256)",
165
+ "shortDescription" : "",
166
+ "shape" : "[4, 1, 15, 256]",
167
+ "name" : "enc_conv_cache",
168
+ "type" : "MultiArray"
169
+ },
170
+ {
171
+ "hasShapeFlexibility" : "0",
172
+ "isOptional" : "0",
173
+ "dataType" : "Float32",
174
+ "formattedType" : "MultiArray (Float32 2 × 9 × 4 × 64 × 64)",
175
+ "shortDescription" : "",
176
+ "shape" : "[2, 9, 4, 64, 64]",
177
+ "name" : "dec_ret_kv",
178
+ "type" : "MultiArray"
179
+ },
180
+ {
181
+ "hasShapeFlexibility" : "0",
182
+ "isOptional" : "0",
183
+ "dataType" : "Float32",
184
+ "formattedType" : "MultiArray (Float32 2 × 9 × 4)",
185
+ "shortDescription" : "",
186
+ "shape" : "[2, 9, 4]",
187
+ "name" : "dec_ret_scale",
188
+ "type" : "MultiArray"
189
+ },
190
+ {
191
+ "hasShapeFlexibility" : "0",
192
+ "isOptional" : "0",
193
+ "dataType" : "Float32",
194
+ "formattedType" : "MultiArray (Float32 1 × 19 × 256)",
195
+ "shortDescription" : "",
196
+ "shape" : "[1, 19, 256]",
197
+ "name" : "top_buffer",
198
+ "type" : "MultiArray"
199
+ },
200
+ {
201
+ "hasShapeFlexibility" : "0",
202
+ "isOptional" : "0",
203
+ "dataType" : "Float32",
204
+ "formattedType" : "MultiArray (Float32 1)",
205
+ "shortDescription" : "",
206
+ "shape" : "[1]",
207
+ "name" : "ingest",
208
+ "type" : "MultiArray"
209
+ },
210
+ {
211
+ "hasShapeFlexibility" : "0",
212
+ "isOptional" : "0",
213
+ "dataType" : "Float32",
214
+ "formattedType" : "MultiArray (Float32 1)",
215
+ "shortDescription" : "",
216
+ "shape" : "[1]",
217
+ "name" : "decode",
218
+ "type" : "MultiArray"
219
+ }
220
+ ],
221
+ "generatedClassName" : "ls_eend_callhome_step",
222
+ "method" : "predict"
223
+ }
224
+ ]
CALLHOME/ls_eend_callhome_step.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
CALLHOME/ls_eend_callhome_step.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:771f7359f23837443571e2d726d830f221b402d70ef631eb02d54a7e9a954849
3
+ size 44392064
CALLHOME/ls_eend_callhome_step.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de6c0df092b3822346da0d2c8f3a1fb81d90305ff22029737f5ebd2a4870c97b
3
+ size 168873
CALLHOME/ls_eend_callhome_step.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:771f7359f23837443571e2d726d830f221b402d70ef631eb02d54a7e9a954849
3
+ size 44392064
CALLHOME/ls_eend_callhome_step.mlpackage/Manifest.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fileFormatVersion": "1.0.0",
3
+ "itemInfoEntries": {
4
+ "16D2CD1C-2109-40F0-8885-182FAD0FDC95": {
5
+ "author": "com.apple.CoreML",
6
+ "description": "CoreML Model Specification",
7
+ "name": "model.mlmodel",
8
+ "path": "com.apple.CoreML/model.mlmodel"
9
+ },
10
+ "57EC3F2B-2646-49D3-AEAC-0B15C73EDE52": {
11
+ "author": "com.apple.CoreML",
12
+ "description": "CoreML Model Weights",
13
+ "name": "weights",
14
+ "path": "com.apple.CoreML/weights"
15
+ }
16
+ },
17
+ "rootModelIdentifier": "16D2CD1C-2109-40F0-8885-182FAD0FDC95"
18
+ }
DIHARD II/.DS_Store ADDED
Binary file (6.15 kB). View file
 
DIHARD II/ls_eend_dih2_step.json ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "checkpoint": "/Users/benjaminlee/PycharmProjects/FS-EEND/LS-EEND/ls_eend_dih2_allspk_model.ckpt",
3
+ "compute_precision": "float32",
4
+ "config": "/Users/benjaminlee/PycharmProjects/FS-EEND/LS-EEND/conf/spk_onl_conformer_retention_enc_dec_nonautoreg_dihard2_infer.yaml",
5
+ "context_recp": 7,
6
+ "conv_delay": 9,
7
+ "decoder_layers": 2,
8
+ "encoder_conv_cache_len": 15,
9
+ "encoder_dim": 256,
10
+ "encoder_layers": 4,
11
+ "feat_type": "logmel23_cummn",
12
+ "frame_hz": 10.0,
13
+ "full_output_dim": 12,
14
+ "head_dim": 64,
15
+ "hop_length": 80,
16
+ "input_dim": 345,
17
+ "key_dim": 64,
18
+ "max_nspks": 12,
19
+ "mixed_fp16_exclude_markers": [
20
+ "model.dec.",
21
+ "dec_ret",
22
+ "candidate_dec",
23
+ "attractor",
24
+ "full_logits",
25
+ "decode",
26
+ "convert"
27
+ ],
28
+ "mixed_fp16_include_markers": [
29
+ "model.enc.",
30
+ "model.cnn.",
31
+ "enc_ret_",
32
+ "enc_conv_cache"
33
+ ],
34
+ "n_fft": 256,
35
+ "n_mels": 23,
36
+ "num_heads": 4,
37
+ "real_output_dim": 10,
38
+ "sample_rate": 8000,
39
+ "state_shapes": {
40
+ "dec_ret_kv": [
41
+ 2,
42
+ 12,
43
+ 4,
44
+ 64,
45
+ 64
46
+ ],
47
+ "dec_ret_scale": [
48
+ 2,
49
+ 12,
50
+ 4
51
+ ],
52
+ "enc_conv_cache": [
53
+ 4,
54
+ 1,
55
+ 15,
56
+ 256
57
+ ],
58
+ "enc_ret_kv": [
59
+ 4,
60
+ 1,
61
+ 4,
62
+ 64,
63
+ 64
64
+ ],
65
+ "enc_ret_scale": [
66
+ 4,
67
+ 1,
68
+ 4
69
+ ],
70
+ "top_buffer": [
71
+ 1,
72
+ 19,
73
+ 256
74
+ ]
75
+ },
76
+ "subsampling": 10,
77
+ "target_sample_rate": 8000,
78
+ "top_buffer_len": 19,
79
+ "win_length": 200
80
+ }
DIHARD II/ls_eend_dih2_step.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86d79d7e9f2bc30200422f9ec4e3e10d341c7cc4683a7a9a57adb1c887154326
3
+ size 243
DIHARD II/ls_eend_dih2_step.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75fdd2dc79af0fac42dfa410fa8c542fea7ed32addb93a12c165c9ec77f2c431
3
+ size 742
DIHARD II/ls_eend_dih2_step.mlmodelc/metadata.json ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Float32",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float32",
10
+ "formattedType" : "MultiArray (Float32 1 × 1 × 12)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 1, 12]",
13
+ "name" : "full_logits",
14
+ "type" : "MultiArray"
15
+ },
16
+ {
17
+ "hasShapeFlexibility" : "0",
18
+ "isOptional" : "0",
19
+ "dataType" : "Float32",
20
+ "formattedType" : "MultiArray (Float32 4 × 1 × 4 × 64 × 64)",
21
+ "shortDescription" : "",
22
+ "shape" : "[4, 1, 4, 64, 64]",
23
+ "name" : "enc_ret_kv_out",
24
+ "type" : "MultiArray"
25
+ },
26
+ {
27
+ "hasShapeFlexibility" : "0",
28
+ "isOptional" : "0",
29
+ "dataType" : "Float32",
30
+ "formattedType" : "MultiArray (Float32 4 × 1 × 4)",
31
+ "shortDescription" : "",
32
+ "shape" : "[4, 1, 4]",
33
+ "name" : "enc_ret_scale_out",
34
+ "type" : "MultiArray"
35
+ },
36
+ {
37
+ "hasShapeFlexibility" : "0",
38
+ "isOptional" : "0",
39
+ "dataType" : "Float32",
40
+ "formattedType" : "MultiArray (Float32 4 × 1 × 15 × 256)",
41
+ "shortDescription" : "",
42
+ "shape" : "[4, 1, 15, 256]",
43
+ "name" : "enc_conv_cache_out",
44
+ "type" : "MultiArray"
45
+ },
46
+ {
47
+ "hasShapeFlexibility" : "0",
48
+ "isOptional" : "0",
49
+ "dataType" : "Float32",
50
+ "formattedType" : "MultiArray (Float32 2 × 12 × 4 × 64 × 64)",
51
+ "shortDescription" : "",
52
+ "shape" : "[2, 12, 4, 64, 64]",
53
+ "name" : "dec_ret_kv_out",
54
+ "type" : "MultiArray"
55
+ },
56
+ {
57
+ "hasShapeFlexibility" : "0",
58
+ "isOptional" : "0",
59
+ "dataType" : "Float32",
60
+ "formattedType" : "MultiArray (Float32 2 × 12 × 4)",
61
+ "shortDescription" : "",
62
+ "shape" : "[2, 12, 4]",
63
+ "name" : "dec_ret_scale_out",
64
+ "type" : "MultiArray"
65
+ },
66
+ {
67
+ "hasShapeFlexibility" : "0",
68
+ "isOptional" : "0",
69
+ "dataType" : "Float32",
70
+ "formattedType" : "MultiArray (Float32 1 × 19 × 256)",
71
+ "shortDescription" : "",
72
+ "shape" : "[1, 19, 256]",
73
+ "name" : "top_buffer_out",
74
+ "type" : "MultiArray"
75
+ }
76
+ ],
77
+ "modelParameters" : [
78
+
79
+ ],
80
+ "specificationVersion" : 9,
81
+ "mlProgramOperationTypeHistogram" : {
82
+ "Ios18.expandDims" : 29,
83
+ "Ios18.mul" : 60,
84
+ "Ios18.softmax" : 2,
85
+ "Ios18.matmul" : 5,
86
+ "Ios18.realDiv" : 14,
87
+ "Ios18.sigmoid" : 4,
88
+ "Split" : 4,
89
+ "Tile" : 1,
90
+ "Ios18.add" : 48,
91
+ "Ios16.reduceSum" : 6,
92
+ "Ios18.layerNorm" : 33,
93
+ "Ios18.reshape" : 44,
94
+ "Ios18.maximum" : 2,
95
+ "Ios18.linear" : 60,
96
+ "Ios18.conv" : 13,
97
+ "Ios18.concat" : 6,
98
+ "Ios18.sub" : 14,
99
+ "Ios18.silu" : 18,
100
+ "Ios18.transpose" : 42,
101
+ "Ios18.sqrt" : 12,
102
+ "Ios18.relu" : 2,
103
+ "Stack" : 5,
104
+ "Ios18.sliceByIndex" : 21,
105
+ "Ios18.squeeze" : 1,
106
+ "Ios16.reduceL2Norm" : 2
107
+ },
108
+ "computePrecision" : "Mixed (Float32, Int32)",
109
+ "isUpdatable" : "0",
110
+ "stateSchema" : [
111
+
112
+ ],
113
+ "availability" : {
114
+ "macOS" : "15.0",
115
+ "tvOS" : "18.0",
116
+ "visionOS" : "2.0",
117
+ "watchOS" : "11.0",
118
+ "iOS" : "18.0",
119
+ "macCatalyst" : "18.0"
120
+ },
121
+ "modelType" : {
122
+ "name" : "MLModelType_mlProgram"
123
+ },
124
+ "userDefinedMetadata" : {
125
+ "com.github.apple.coremltools.version" : "8.3.0",
126
+ "com.github.apple.coremltools.source" : "torch==1.13.0",
127
+ "com.github.apple.coremltools.source_dialect" : "TorchScript"
128
+ },
129
+ "inputSchema" : [
130
+ {
131
+ "hasShapeFlexibility" : "0",
132
+ "isOptional" : "0",
133
+ "dataType" : "Float32",
134
+ "formattedType" : "MultiArray (Float32 1 × 1 × 345)",
135
+ "shortDescription" : "",
136
+ "shape" : "[1, 1, 345]",
137
+ "name" : "frame",
138
+ "type" : "MultiArray"
139
+ },
140
+ {
141
+ "hasShapeFlexibility" : "0",
142
+ "isOptional" : "0",
143
+ "dataType" : "Float32",
144
+ "formattedType" : "MultiArray (Float32 4 × 1 × 4 × 64 × 64)",
145
+ "shortDescription" : "",
146
+ "shape" : "[4, 1, 4, 64, 64]",
147
+ "name" : "enc_ret_kv",
148
+ "type" : "MultiArray"
149
+ },
150
+ {
151
+ "hasShapeFlexibility" : "0",
152
+ "isOptional" : "0",
153
+ "dataType" : "Float32",
154
+ "formattedType" : "MultiArray (Float32 4 × 1 × 4)",
155
+ "shortDescription" : "",
156
+ "shape" : "[4, 1, 4]",
157
+ "name" : "enc_ret_scale",
158
+ "type" : "MultiArray"
159
+ },
160
+ {
161
+ "hasShapeFlexibility" : "0",
162
+ "isOptional" : "0",
163
+ "dataType" : "Float32",
164
+ "formattedType" : "MultiArray (Float32 4 × 1 × 15 × 256)",
165
+ "shortDescription" : "",
166
+ "shape" : "[4, 1, 15, 256]",
167
+ "name" : "enc_conv_cache",
168
+ "type" : "MultiArray"
169
+ },
170
+ {
171
+ "hasShapeFlexibility" : "0",
172
+ "isOptional" : "0",
173
+ "dataType" : "Float32",
174
+ "formattedType" : "MultiArray (Float32 2 × 12 × 4 × 64 × 64)",
175
+ "shortDescription" : "",
176
+ "shape" : "[2, 12, 4, 64, 64]",
177
+ "name" : "dec_ret_kv",
178
+ "type" : "MultiArray"
179
+ },
180
+ {
181
+ "hasShapeFlexibility" : "0",
182
+ "isOptional" : "0",
183
+ "dataType" : "Float32",
184
+ "formattedType" : "MultiArray (Float32 2 × 12 × 4)",
185
+ "shortDescription" : "",
186
+ "shape" : "[2, 12, 4]",
187
+ "name" : "dec_ret_scale",
188
+ "type" : "MultiArray"
189
+ },
190
+ {
191
+ "hasShapeFlexibility" : "0",
192
+ "isOptional" : "0",
193
+ "dataType" : "Float32",
194
+ "formattedType" : "MultiArray (Float32 1 × 19 × 256)",
195
+ "shortDescription" : "",
196
+ "shape" : "[1, 19, 256]",
197
+ "name" : "top_buffer",
198
+ "type" : "MultiArray"
199
+ },
200
+ {
201
+ "hasShapeFlexibility" : "0",
202
+ "isOptional" : "0",
203
+ "dataType" : "Float32",
204
+ "formattedType" : "MultiArray (Float32 1)",
205
+ "shortDescription" : "",
206
+ "shape" : "[1]",
207
+ "name" : "ingest",
208
+ "type" : "MultiArray"
209
+ },
210
+ {
211
+ "hasShapeFlexibility" : "0",
212
+ "isOptional" : "0",
213
+ "dataType" : "Float32",
214
+ "formattedType" : "MultiArray (Float32 1)",
215
+ "shortDescription" : "",
216
+ "shape" : "[1]",
217
+ "name" : "decode",
218
+ "type" : "MultiArray"
219
+ }
220
+ ],
221
+ "generatedClassName" : "ls_eend_dih2_step",
222
+ "method" : "predict"
223
+ }
224
+ ]
DIHARD II/ls_eend_dih2_step.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
DIHARD II/ls_eend_dih2_step.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d30b3ca77b8c6fb34d398d93064c5378d7ae6623c05512be554ca18d5b5eb886
3
+ size 44395136
DIHARD II/ls_eend_dih2_step.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdcd6e15d0978052bd162c71e15025e9288f0e7d08f80c2afdc70f5592a1c5a0
3
+ size 168873
DIHARD II/ls_eend_dih2_step.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d30b3ca77b8c6fb34d398d93064c5378d7ae6623c05512be554ca18d5b5eb886
3
+ size 44395136
DIHARD II/ls_eend_dih2_step.mlpackage/Manifest.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fileFormatVersion": "1.0.0",
3
+ "itemInfoEntries": {
4
+ "2F7A4BA5-180F-4E22-8D25-55A5EA5707B1": {
5
+ "author": "com.apple.CoreML",
6
+ "description": "CoreML Model Specification",
7
+ "name": "model.mlmodel",
8
+ "path": "com.apple.CoreML/model.mlmodel"
9
+ },
10
+ "491A6A2C-B8BC-41B0-B513-3D277C9464F2": {
11
+ "author": "com.apple.CoreML",
12
+ "description": "CoreML Model Weights",
13
+ "name": "weights",
14
+ "path": "com.apple.CoreML/weights"
15
+ }
16
+ },
17
+ "rootModelIdentifier": "2F7A4BA5-180F-4E22-8D25-55A5EA5707B1"
18
+ }
DIHARD III/.DS_Store ADDED
Binary file (6.15 kB). View file
 
DIHARD III/ls_eend_dih3_step.json ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "checkpoint": "/Users/benjaminlee/PycharmProjects/FS-EEND/LS-EEND/ls_eend_dih3_allspk_model.ckpt",
3
+ "compute_precision": "float32",
4
+ "config": "/Users/benjaminlee/PycharmProjects/FS-EEND/LS-EEND/conf/spk_onl_conformer_retention_enc_dec_nonautoreg_dihard3_infer.yaml",
5
+ "context_recp": 7,
6
+ "conv_delay": 9,
7
+ "decoder_layers": 2,
8
+ "encoder_conv_cache_len": 15,
9
+ "encoder_dim": 256,
10
+ "encoder_layers": 4,
11
+ "feat_type": "logmel23_cummn",
12
+ "frame_hz": 10.0,
13
+ "full_output_dim": 12,
14
+ "head_dim": 64,
15
+ "hop_length": 80,
16
+ "input_dim": 345,
17
+ "key_dim": 64,
18
+ "max_nspks": 12,
19
+ "mixed_fp16_exclude_markers": [
20
+ "model.dec.",
21
+ "dec_ret",
22
+ "candidate_dec",
23
+ "attractor",
24
+ "full_logits",
25
+ "decode",
26
+ "convert"
27
+ ],
28
+ "mixed_fp16_include_markers": [
29
+ "model.enc.",
30
+ "model.cnn.",
31
+ "enc_ret_",
32
+ "enc_conv_cache"
33
+ ],
34
+ "n_fft": 256,
35
+ "n_mels": 23,
36
+ "num_heads": 4,
37
+ "real_output_dim": 10,
38
+ "sample_rate": 8000,
39
+ "state_shapes": {
40
+ "dec_ret_kv": [
41
+ 2,
42
+ 12,
43
+ 4,
44
+ 64,
45
+ 64
46
+ ],
47
+ "dec_ret_scale": [
48
+ 2,
49
+ 12,
50
+ 4
51
+ ],
52
+ "enc_conv_cache": [
53
+ 4,
54
+ 1,
55
+ 15,
56
+ 256
57
+ ],
58
+ "enc_ret_kv": [
59
+ 4,
60
+ 1,
61
+ 4,
62
+ 64,
63
+ 64
64
+ ],
65
+ "enc_ret_scale": [
66
+ 4,
67
+ 1,
68
+ 4
69
+ ],
70
+ "top_buffer": [
71
+ 1,
72
+ 19,
73
+ 256
74
+ ]
75
+ },
76
+ "subsampling": 10,
77
+ "target_sample_rate": 8000,
78
+ "top_buffer_len": 19,
79
+ "win_length": 200
80
+ }
DIHARD III/ls_eend_dih3_step.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86d79d7e9f2bc30200422f9ec4e3e10d341c7cc4683a7a9a57adb1c887154326
3
+ size 243
DIHARD III/ls_eend_dih3_step.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82d59494af55a0183c02bb3e2cd6e16c43c7521a45f26cabf86ce65ef7ce7079
3
+ size 742
DIHARD III/ls_eend_dih3_step.mlmodelc/metadata.json ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Float32",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float32",
10
+ "formattedType" : "MultiArray (Float32 1 × 1 × 12)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 1, 12]",
13
+ "name" : "full_logits",
14
+ "type" : "MultiArray"
15
+ },
16
+ {
17
+ "hasShapeFlexibility" : "0",
18
+ "isOptional" : "0",
19
+ "dataType" : "Float32",
20
+ "formattedType" : "MultiArray (Float32 4 × 1 × 4 × 64 × 64)",
21
+ "shortDescription" : "",
22
+ "shape" : "[4, 1, 4, 64, 64]",
23
+ "name" : "enc_ret_kv_out",
24
+ "type" : "MultiArray"
25
+ },
26
+ {
27
+ "hasShapeFlexibility" : "0",
28
+ "isOptional" : "0",
29
+ "dataType" : "Float32",
30
+ "formattedType" : "MultiArray (Float32 4 × 1 × 4)",
31
+ "shortDescription" : "",
32
+ "shape" : "[4, 1, 4]",
33
+ "name" : "enc_ret_scale_out",
34
+ "type" : "MultiArray"
35
+ },
36
+ {
37
+ "hasShapeFlexibility" : "0",
38
+ "isOptional" : "0",
39
+ "dataType" : "Float32",
40
+ "formattedType" : "MultiArray (Float32 4 × 1 × 15 × 256)",
41
+ "shortDescription" : "",
42
+ "shape" : "[4, 1, 15, 256]",
43
+ "name" : "enc_conv_cache_out",
44
+ "type" : "MultiArray"
45
+ },
46
+ {
47
+ "hasShapeFlexibility" : "0",
48
+ "isOptional" : "0",
49
+ "dataType" : "Float32",
50
+ "formattedType" : "MultiArray (Float32 2 × 12 × 4 × 64 × 64)",
51
+ "shortDescription" : "",
52
+ "shape" : "[2, 12, 4, 64, 64]",
53
+ "name" : "dec_ret_kv_out",
54
+ "type" : "MultiArray"
55
+ },
56
+ {
57
+ "hasShapeFlexibility" : "0",
58
+ "isOptional" : "0",
59
+ "dataType" : "Float32",
60
+ "formattedType" : "MultiArray (Float32 2 × 12 × 4)",
61
+ "shortDescription" : "",
62
+ "shape" : "[2, 12, 4]",
63
+ "name" : "dec_ret_scale_out",
64
+ "type" : "MultiArray"
65
+ },
66
+ {
67
+ "hasShapeFlexibility" : "0",
68
+ "isOptional" : "0",
69
+ "dataType" : "Float32",
70
+ "formattedType" : "MultiArray (Float32 1 × 19 × 256)",
71
+ "shortDescription" : "",
72
+ "shape" : "[1, 19, 256]",
73
+ "name" : "top_buffer_out",
74
+ "type" : "MultiArray"
75
+ }
76
+ ],
77
+ "modelParameters" : [
78
+
79
+ ],
80
+ "specificationVersion" : 9,
81
+ "mlProgramOperationTypeHistogram" : {
82
+ "Ios18.expandDims" : 29,
83
+ "Ios18.mul" : 60,
84
+ "Ios18.softmax" : 2,
85
+ "Ios18.matmul" : 5,
86
+ "Ios18.realDiv" : 14,
87
+ "Ios18.sigmoid" : 4,
88
+ "Split" : 4,
89
+ "Tile" : 1,
90
+ "Ios18.add" : 48,
91
+ "Ios16.reduceSum" : 6,
92
+ "Ios18.layerNorm" : 33,
93
+ "Ios18.reshape" : 44,
94
+ "Ios18.maximum" : 2,
95
+ "Ios18.linear" : 60,
96
+ "Ios18.conv" : 13,
97
+ "Ios18.concat" : 6,
98
+ "Ios18.sub" : 14,
99
+ "Ios18.silu" : 18,
100
+ "Ios18.transpose" : 42,
101
+ "Ios18.sqrt" : 12,
102
+ "Ios18.relu" : 2,
103
+ "Stack" : 5,
104
+ "Ios18.sliceByIndex" : 21,
105
+ "Ios18.squeeze" : 1,
106
+ "Ios16.reduceL2Norm" : 2
107
+ },
108
+ "computePrecision" : "Mixed (Float32, Int32)",
109
+ "isUpdatable" : "0",
110
+ "stateSchema" : [
111
+
112
+ ],
113
+ "availability" : {
114
+ "macOS" : "15.0",
115
+ "tvOS" : "18.0",
116
+ "visionOS" : "2.0",
117
+ "watchOS" : "11.0",
118
+ "iOS" : "18.0",
119
+ "macCatalyst" : "18.0"
120
+ },
121
+ "modelType" : {
122
+ "name" : "MLModelType_mlProgram"
123
+ },
124
+ "userDefinedMetadata" : {
125
+ "com.github.apple.coremltools.source_dialect" : "TorchScript",
126
+ "com.github.apple.coremltools.version" : "8.3.0",
127
+ "com.github.apple.coremltools.source" : "torch==1.13.0"
128
+ },
129
+ "inputSchema" : [
130
+ {
131
+ "hasShapeFlexibility" : "0",
132
+ "isOptional" : "0",
133
+ "dataType" : "Float32",
134
+ "formattedType" : "MultiArray (Float32 1 × 1 × 345)",
135
+ "shortDescription" : "",
136
+ "shape" : "[1, 1, 345]",
137
+ "name" : "frame",
138
+ "type" : "MultiArray"
139
+ },
140
+ {
141
+ "hasShapeFlexibility" : "0",
142
+ "isOptional" : "0",
143
+ "dataType" : "Float32",
144
+ "formattedType" : "MultiArray (Float32 4 × 1 × 4 × 64 × 64)",
145
+ "shortDescription" : "",
146
+ "shape" : "[4, 1, 4, 64, 64]",
147
+ "name" : "enc_ret_kv",
148
+ "type" : "MultiArray"
149
+ },
150
+ {
151
+ "hasShapeFlexibility" : "0",
152
+ "isOptional" : "0",
153
+ "dataType" : "Float32",
154
+ "formattedType" : "MultiArray (Float32 4 × 1 × 4)",
155
+ "shortDescription" : "",
156
+ "shape" : "[4, 1, 4]",
157
+ "name" : "enc_ret_scale",
158
+ "type" : "MultiArray"
159
+ },
160
+ {
161
+ "hasShapeFlexibility" : "0",
162
+ "isOptional" : "0",
163
+ "dataType" : "Float32",
164
+ "formattedType" : "MultiArray (Float32 4 × 1 × 15 × 256)",
165
+ "shortDescription" : "",
166
+ "shape" : "[4, 1, 15, 256]",
167
+ "name" : "enc_conv_cache",
168
+ "type" : "MultiArray"
169
+ },
170
+ {
171
+ "hasShapeFlexibility" : "0",
172
+ "isOptional" : "0",
173
+ "dataType" : "Float32",
174
+ "formattedType" : "MultiArray (Float32 2 × 12 × 4 × 64 × 64)",
175
+ "shortDescription" : "",
176
+ "shape" : "[2, 12, 4, 64, 64]",
177
+ "name" : "dec_ret_kv",
178
+ "type" : "MultiArray"
179
+ },
180
+ {
181
+ "hasShapeFlexibility" : "0",
182
+ "isOptional" : "0",
183
+ "dataType" : "Float32",
184
+ "formattedType" : "MultiArray (Float32 2 × 12 × 4)",
185
+ "shortDescription" : "",
186
+ "shape" : "[2, 12, 4]",
187
+ "name" : "dec_ret_scale",
188
+ "type" : "MultiArray"
189
+ },
190
+ {
191
+ "hasShapeFlexibility" : "0",
192
+ "isOptional" : "0",
193
+ "dataType" : "Float32",
194
+ "formattedType" : "MultiArray (Float32 1 × 19 × 256)",
195
+ "shortDescription" : "",
196
+ "shape" : "[1, 19, 256]",
197
+ "name" : "top_buffer",
198
+ "type" : "MultiArray"
199
+ },
200
+ {
201
+ "hasShapeFlexibility" : "0",
202
+ "isOptional" : "0",
203
+ "dataType" : "Float32",
204
+ "formattedType" : "MultiArray (Float32 1)",
205
+ "shortDescription" : "",
206
+ "shape" : "[1]",
207
+ "name" : "ingest",
208
+ "type" : "MultiArray"
209
+ },
210
+ {
211
+ "hasShapeFlexibility" : "0",
212
+ "isOptional" : "0",
213
+ "dataType" : "Float32",
214
+ "formattedType" : "MultiArray (Float32 1)",
215
+ "shortDescription" : "",
216
+ "shape" : "[1]",
217
+ "name" : "decode",
218
+ "type" : "MultiArray"
219
+ }
220
+ ],
221
+ "generatedClassName" : "ls_eend_dih3_step",
222
+ "method" : "predict"
223
+ }
224
+ ]
DIHARD III/ls_eend_dih3_step.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
DIHARD III/ls_eend_dih3_step.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:565eb30c4d7f5f9fa55451b4fe06ac1e11d64afd9331f833cb56e8cb1edd7519
3
+ size 44395136
DIHARD III/ls_eend_dih3_step.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdcd6e15d0978052bd162c71e15025e9288f0e7d08f80c2afdc70f5592a1c5a0
3
+ size 168873
DIHARD III/ls_eend_dih3_step.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:565eb30c4d7f5f9fa55451b4fe06ac1e11d64afd9331f833cb56e8cb1edd7519
3
+ size 44395136
DIHARD III/ls_eend_dih3_step.mlpackage/Manifest.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fileFormatVersion": "1.0.0",
3
+ "itemInfoEntries": {
4
+ "558EA27C-8294-42D9-A499-AAC37BC78889": {
5
+ "author": "com.apple.CoreML",
6
+ "description": "CoreML Model Weights",
7
+ "name": "weights",
8
+ "path": "com.apple.CoreML/weights"
9
+ },
10
+ "571CA3B6-D04D-4E55-B668-FBECBE7C9802": {
11
+ "author": "com.apple.CoreML",
12
+ "description": "CoreML Model Specification",
13
+ "name": "model.mlmodel",
14
+ "path": "com.apple.CoreML/model.mlmodel"
15
+ }
16
+ },
17
+ "rootModelIdentifier": "571CA3B6-D04D-4E55-B668-FBECBE7C9802"
18
+ }