aoiandroid alexwengg commited on
Commit
c6dad3a
·
0 Parent(s):

Duplicate from FluidInference/ls-eend-coreml

Browse files

Co-authored-by: Alex Weng <alexwengg@users.noreply.huggingface.co>

This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .gitattributes +35 -0
  2. AMI/.DS_Store +0 -0
  3. AMI/ls_eend_ami_step.json +79 -0
  4. AMI/ls_eend_ami_step.mlmodelc/analytics/coremldata.bin +3 -0
  5. AMI/ls_eend_ami_step.mlmodelc/coremldata.bin +3 -0
  6. AMI/ls_eend_ami_step.mlmodelc/metadata.json +224 -0
  7. AMI/ls_eend_ami_step.mlmodelc/model.mil +0 -0
  8. AMI/ls_eend_ami_step.mlmodelc/weights/weight.bin +3 -0
  9. AMI/ls_eend_ami_step.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
  10. AMI/ls_eend_ami_step.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
  11. AMI/ls_eend_ami_step.mlpackage/Manifest.json +18 -0
  12. CALLHOME/.DS_Store +0 -0
  13. CALLHOME/ls_eend_callhome_step.json +79 -0
  14. CALLHOME/ls_eend_callhome_step.mlmodelc/analytics/coremldata.bin +3 -0
  15. CALLHOME/ls_eend_callhome_step.mlmodelc/coremldata.bin +3 -0
  16. CALLHOME/ls_eend_callhome_step.mlmodelc/metadata.json +224 -0
  17. CALLHOME/ls_eend_callhome_step.mlmodelc/model.mil +0 -0
  18. CALLHOME/ls_eend_callhome_step.mlmodelc/weights/weight.bin +3 -0
  19. CALLHOME/ls_eend_callhome_step.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
  20. CALLHOME/ls_eend_callhome_step.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
  21. CALLHOME/ls_eend_callhome_step.mlpackage/Manifest.json +18 -0
  22. DIHARD II/.DS_Store +0 -0
  23. DIHARD II/ls_eend_dih2_step.json +79 -0
  24. DIHARD II/ls_eend_dih2_step.mlmodelc/analytics/coremldata.bin +3 -0
  25. DIHARD II/ls_eend_dih2_step.mlmodelc/coremldata.bin +3 -0
  26. DIHARD II/ls_eend_dih2_step.mlmodelc/metadata.json +224 -0
  27. DIHARD II/ls_eend_dih2_step.mlmodelc/model.mil +0 -0
  28. DIHARD II/ls_eend_dih2_step.mlmodelc/weights/weight.bin +3 -0
  29. DIHARD II/ls_eend_dih2_step.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
  30. DIHARD II/ls_eend_dih2_step.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
  31. DIHARD II/ls_eend_dih2_step.mlpackage/Manifest.json +18 -0
  32. DIHARD III/.DS_Store +0 -0
  33. DIHARD III/ls_eend_dih3_step.json +79 -0
  34. DIHARD III/ls_eend_dih3_step.mlmodelc/analytics/coremldata.bin +3 -0
  35. DIHARD III/ls_eend_dih3_step.mlmodelc/coremldata.bin +3 -0
  36. DIHARD III/ls_eend_dih3_step.mlmodelc/metadata.json +224 -0
  37. DIHARD III/ls_eend_dih3_step.mlmodelc/model.mil +0 -0
  38. DIHARD III/ls_eend_dih3_step.mlmodelc/weights/weight.bin +3 -0
  39. DIHARD III/ls_eend_dih3_step.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
  40. DIHARD III/ls_eend_dih3_step.mlpackage/Data/com.apple.CoreML/weights/weight.bin +3 -0
  41. DIHARD III/ls_eend_dih3_step.mlpackage/Manifest.json +18 -0
  42. LICENSE +21 -0
  43. README.md +188 -0
  44. config.json +1 -0
  45. optimized/ami/100ms/ls_eend_ami_100ms.mlmodelc/analytics/coremldata.bin +3 -0
  46. optimized/ami/100ms/ls_eend_ami_100ms.mlmodelc/coremldata.bin +3 -0
  47. optimized/ami/100ms/ls_eend_ami_100ms.mlmodelc/metadata.json +218 -0
  48. optimized/ami/100ms/ls_eend_ami_100ms.mlmodelc/model.mil +0 -0
  49. optimized/ami/100ms/ls_eend_ami_100ms.mlmodelc/weights/weight.bin +3 -0
  50. optimized/ami/100ms/ls_eend_ami_100ms.mlpackage/Data/com.apple.CoreML/model.mlmodel +3 -0
.gitattributes ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
AMI/.DS_Store ADDED
Binary file (6.15 kB). View file
 
AMI/ls_eend_ami_step.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "compute_precision": "float32",
3
+ "context_recp": 7,
4
+ "conv_delay": 9,
5
+ "decoder_layers": 2,
6
+ "encoder_conv_cache_len": 15,
7
+ "encoder_dim": 256,
8
+ "encoder_layers": 4,
9
+ "feat_type": "logmel23_cummn",
10
+ "frame_hz": 10.0,
11
+ "full_output_dim": 6,
12
+ "head_dim": 64,
13
+ "hop_length": 80,
14
+ "input_dim": 345,
15
+ "key_dim": 64,
16
+ "max_nspks": 6,
17
+ "max_speakers": 4,
18
+ "mixed_fp16_exclude_markers": [
19
+ "model.dec.",
20
+ "dec_ret",
21
+ "candidate_dec",
22
+ "attractor",
23
+ "full_logits",
24
+ "decode",
25
+ "convert"
26
+ ],
27
+ "mixed_fp16_include_markers": [
28
+ "model.enc.",
29
+ "model.cnn.",
30
+ "enc_ret_",
31
+ "enc_conv_cache"
32
+ ],
33
+ "n_fft": 1024,
34
+ "n_mels": 23,
35
+ "num_heads": 4,
36
+ "real_output_dim": 4,
37
+ "sample_rate": 8000,
38
+ "state_shapes": {
39
+ "dec_ret_kv": [
40
+ 2,
41
+ 6,
42
+ 4,
43
+ 64,
44
+ 64
45
+ ],
46
+ "dec_ret_scale": [
47
+ 2,
48
+ 6,
49
+ 4
50
+ ],
51
+ "enc_conv_cache": [
52
+ 4,
53
+ 1,
54
+ 15,
55
+ 256
56
+ ],
57
+ "enc_ret_kv": [
58
+ 4,
59
+ 1,
60
+ 4,
61
+ 64,
62
+ 64
63
+ ],
64
+ "enc_ret_scale": [
65
+ 4,
66
+ 1,
67
+ 4
68
+ ],
69
+ "top_buffer": [
70
+ 1,
71
+ 19,
72
+ 256
73
+ ]
74
+ },
75
+ "subsampling": 10,
76
+ "target_sample_rate": 8000,
77
+ "top_buffer_len": 19,
78
+ "win_length": 200
79
+ }
AMI/ls_eend_ami_step.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c29e4e605586aca4f45ad01254b2fe365180fb9a42ac6c2fa65f114cc603c27
3
+ size 243
AMI/ls_eend_ami_step.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e88c8cc32b10c236622478631ba236544a578cab395119d1db21efc7e9de08b0
3
+ size 742
AMI/ls_eend_ami_step.mlmodelc/metadata.json ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Float32",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float32",
10
+ "formattedType" : "MultiArray (Float32 1 × 1 × 6)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 1, 6]",
13
+ "name" : "full_logits",
14
+ "type" : "MultiArray"
15
+ },
16
+ {
17
+ "hasShapeFlexibility" : "0",
18
+ "isOptional" : "0",
19
+ "dataType" : "Float32",
20
+ "formattedType" : "MultiArray (Float32 4 × 1 × 4 × 64 × 64)",
21
+ "shortDescription" : "",
22
+ "shape" : "[4, 1, 4, 64, 64]",
23
+ "name" : "enc_ret_kv_out",
24
+ "type" : "MultiArray"
25
+ },
26
+ {
27
+ "hasShapeFlexibility" : "0",
28
+ "isOptional" : "0",
29
+ "dataType" : "Float32",
30
+ "formattedType" : "MultiArray (Float32 4 × 1 × 4)",
31
+ "shortDescription" : "",
32
+ "shape" : "[4, 1, 4]",
33
+ "name" : "enc_ret_scale_out",
34
+ "type" : "MultiArray"
35
+ },
36
+ {
37
+ "hasShapeFlexibility" : "0",
38
+ "isOptional" : "0",
39
+ "dataType" : "Float32",
40
+ "formattedType" : "MultiArray (Float32 4 × 1 × 15 × 256)",
41
+ "shortDescription" : "",
42
+ "shape" : "[4, 1, 15, 256]",
43
+ "name" : "enc_conv_cache_out",
44
+ "type" : "MultiArray"
45
+ },
46
+ {
47
+ "hasShapeFlexibility" : "0",
48
+ "isOptional" : "0",
49
+ "dataType" : "Float32",
50
+ "formattedType" : "MultiArray (Float32 2 × 6 × 4 × 64 × 64)",
51
+ "shortDescription" : "",
52
+ "shape" : "[2, 6, 4, 64, 64]",
53
+ "name" : "dec_ret_kv_out",
54
+ "type" : "MultiArray"
55
+ },
56
+ {
57
+ "hasShapeFlexibility" : "0",
58
+ "isOptional" : "0",
59
+ "dataType" : "Float32",
60
+ "formattedType" : "MultiArray (Float32 2 × 6 × 4)",
61
+ "shortDescription" : "",
62
+ "shape" : "[2, 6, 4]",
63
+ "name" : "dec_ret_scale_out",
64
+ "type" : "MultiArray"
65
+ },
66
+ {
67
+ "hasShapeFlexibility" : "0",
68
+ "isOptional" : "0",
69
+ "dataType" : "Float32",
70
+ "formattedType" : "MultiArray (Float32 1 × 19 × 256)",
71
+ "shortDescription" : "",
72
+ "shape" : "[1, 19, 256]",
73
+ "name" : "top_buffer_out",
74
+ "type" : "MultiArray"
75
+ }
76
+ ],
77
+ "modelParameters" : [
78
+
79
+ ],
80
+ "specificationVersion" : 9,
81
+ "mlProgramOperationTypeHistogram" : {
82
+ "Ios18.expandDims" : 29,
83
+ "Ios18.mul" : 60,
84
+ "Ios18.softmax" : 2,
85
+ "Ios18.matmul" : 5,
86
+ "Ios18.realDiv" : 14,
87
+ "Ios18.sigmoid" : 4,
88
+ "Split" : 4,
89
+ "Tile" : 1,
90
+ "Ios18.add" : 48,
91
+ "Ios16.reduceSum" : 6,
92
+ "Ios18.layerNorm" : 33,
93
+ "Ios18.reshape" : 44,
94
+ "Ios18.maximum" : 2,
95
+ "Ios18.linear" : 60,
96
+ "Ios18.conv" : 13,
97
+ "Ios18.concat" : 6,
98
+ "Ios18.sub" : 14,
99
+ "Ios18.silu" : 18,
100
+ "Ios18.transpose" : 42,
101
+ "Ios18.sqrt" : 12,
102
+ "Ios18.relu" : 2,
103
+ "Stack" : 5,
104
+ "Ios18.sliceByIndex" : 21,
105
+ "Ios18.squeeze" : 1,
106
+ "Ios16.reduceL2Norm" : 2
107
+ },
108
+ "computePrecision" : "Mixed (Float32, Int32)",
109
+ "isUpdatable" : "0",
110
+ "stateSchema" : [
111
+
112
+ ],
113
+ "availability" : {
114
+ "macOS" : "15.0",
115
+ "tvOS" : "18.0",
116
+ "visionOS" : "2.0",
117
+ "watchOS" : "11.0",
118
+ "iOS" : "18.0",
119
+ "macCatalyst" : "18.0"
120
+ },
121
+ "modelType" : {
122
+ "name" : "MLModelType_mlProgram"
123
+ },
124
+ "userDefinedMetadata" : {
125
+ "com.github.apple.coremltools.source_dialect" : "TorchScript",
126
+ "com.github.apple.coremltools.source" : "torch==1.13.0",
127
+ "com.github.apple.coremltools.version" : "8.3.0"
128
+ },
129
+ "inputSchema" : [
130
+ {
131
+ "hasShapeFlexibility" : "0",
132
+ "isOptional" : "0",
133
+ "dataType" : "Float32",
134
+ "formattedType" : "MultiArray (Float32 1 × 1 × 345)",
135
+ "shortDescription" : "",
136
+ "shape" : "[1, 1, 345]",
137
+ "name" : "frame",
138
+ "type" : "MultiArray"
139
+ },
140
+ {
141
+ "hasShapeFlexibility" : "0",
142
+ "isOptional" : "0",
143
+ "dataType" : "Float32",
144
+ "formattedType" : "MultiArray (Float32 4 × 1 × 4 × 64 × 64)",
145
+ "shortDescription" : "",
146
+ "shape" : "[4, 1, 4, 64, 64]",
147
+ "name" : "enc_ret_kv",
148
+ "type" : "MultiArray"
149
+ },
150
+ {
151
+ "hasShapeFlexibility" : "0",
152
+ "isOptional" : "0",
153
+ "dataType" : "Float32",
154
+ "formattedType" : "MultiArray (Float32 4 × 1 × 4)",
155
+ "shortDescription" : "",
156
+ "shape" : "[4, 1, 4]",
157
+ "name" : "enc_ret_scale",
158
+ "type" : "MultiArray"
159
+ },
160
+ {
161
+ "hasShapeFlexibility" : "0",
162
+ "isOptional" : "0",
163
+ "dataType" : "Float32",
164
+ "formattedType" : "MultiArray (Float32 4 × 1 × 15 × 256)",
165
+ "shortDescription" : "",
166
+ "shape" : "[4, 1, 15, 256]",
167
+ "name" : "enc_conv_cache",
168
+ "type" : "MultiArray"
169
+ },
170
+ {
171
+ "hasShapeFlexibility" : "0",
172
+ "isOptional" : "0",
173
+ "dataType" : "Float32",
174
+ "formattedType" : "MultiArray (Float32 2 × 6 × 4 × 64 × 64)",
175
+ "shortDescription" : "",
176
+ "shape" : "[2, 6, 4, 64, 64]",
177
+ "name" : "dec_ret_kv",
178
+ "type" : "MultiArray"
179
+ },
180
+ {
181
+ "hasShapeFlexibility" : "0",
182
+ "isOptional" : "0",
183
+ "dataType" : "Float32",
184
+ "formattedType" : "MultiArray (Float32 2 × 6 × 4)",
185
+ "shortDescription" : "",
186
+ "shape" : "[2, 6, 4]",
187
+ "name" : "dec_ret_scale",
188
+ "type" : "MultiArray"
189
+ },
190
+ {
191
+ "hasShapeFlexibility" : "0",
192
+ "isOptional" : "0",
193
+ "dataType" : "Float32",
194
+ "formattedType" : "MultiArray (Float32 1 × 19 × 256)",
195
+ "shortDescription" : "",
196
+ "shape" : "[1, 19, 256]",
197
+ "name" : "top_buffer",
198
+ "type" : "MultiArray"
199
+ },
200
+ {
201
+ "hasShapeFlexibility" : "0",
202
+ "isOptional" : "0",
203
+ "dataType" : "Float32",
204
+ "formattedType" : "MultiArray (Float32 1)",
205
+ "shortDescription" : "",
206
+ "shape" : "[1]",
207
+ "name" : "ingest",
208
+ "type" : "MultiArray"
209
+ },
210
+ {
211
+ "hasShapeFlexibility" : "0",
212
+ "isOptional" : "0",
213
+ "dataType" : "Float32",
214
+ "formattedType" : "MultiArray (Float32 1)",
215
+ "shortDescription" : "",
216
+ "shape" : "[1]",
217
+ "name" : "decode",
218
+ "type" : "MultiArray"
219
+ }
220
+ ],
221
+ "generatedClassName" : "ls_eend_ami_step",
222
+ "method" : "predict"
223
+ }
224
+ ]
AMI/ls_eend_ami_step.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
AMI/ls_eend_ami_step.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56e27813e10448eb4454b17574becc305015ad657ce6cd3896044b7c8b95bcc3
3
+ size 44388992
AMI/ls_eend_ami_step.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:954289d9924242a1d00b0db4421bdd866b6dfbd73ef5c6d327f2eeec95f9ed41
3
+ size 168873
AMI/ls_eend_ami_step.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:56e27813e10448eb4454b17574becc305015ad657ce6cd3896044b7c8b95bcc3
3
+ size 44388992
AMI/ls_eend_ami_step.mlpackage/Manifest.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fileFormatVersion": "1.0.0",
3
+ "itemInfoEntries": {
4
+ "067F46DA-51CD-4543-8F1B-B1F48617AE5E": {
5
+ "author": "com.apple.CoreML",
6
+ "description": "CoreML Model Weights",
7
+ "name": "weights",
8
+ "path": "com.apple.CoreML/weights"
9
+ },
10
+ "E609CD24-18D6-48CF-886E-13B634913B46": {
11
+ "author": "com.apple.CoreML",
12
+ "description": "CoreML Model Specification",
13
+ "name": "model.mlmodel",
14
+ "path": "com.apple.CoreML/model.mlmodel"
15
+ }
16
+ },
17
+ "rootModelIdentifier": "E609CD24-18D6-48CF-886E-13B634913B46"
18
+ }
CALLHOME/.DS_Store ADDED
Binary file (6.15 kB). View file
 
CALLHOME/ls_eend_callhome_step.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "compute_precision": "float32",
3
+ "context_recp": 7,
4
+ "conv_delay": 9,
5
+ "decoder_layers": 2,
6
+ "encoder_conv_cache_len": 15,
7
+ "encoder_dim": 256,
8
+ "encoder_layers": 4,
9
+ "feat_type": "logmel23_cummn",
10
+ "frame_hz": 10.0,
11
+ "full_output_dim": 9,
12
+ "head_dim": 64,
13
+ "hop_length": 80,
14
+ "input_dim": 345,
15
+ "key_dim": 64,
16
+ "max_nspks": 9,
17
+ "max_speakers": 7,
18
+ "mixed_fp16_exclude_markers": [
19
+ "model.dec.",
20
+ "dec_ret",
21
+ "candidate_dec",
22
+ "attractor",
23
+ "full_logits",
24
+ "decode",
25
+ "convert"
26
+ ],
27
+ "mixed_fp16_include_markers": [
28
+ "model.enc.",
29
+ "model.cnn.",
30
+ "enc_ret_",
31
+ "enc_conv_cache"
32
+ ],
33
+ "n_fft": 1024,
34
+ "n_mels": 23,
35
+ "num_heads": 4,
36
+ "real_output_dim": 7,
37
+ "sample_rate": 8000,
38
+ "state_shapes": {
39
+ "dec_ret_kv": [
40
+ 2,
41
+ 9,
42
+ 4,
43
+ 64,
44
+ 64
45
+ ],
46
+ "dec_ret_scale": [
47
+ 2,
48
+ 9,
49
+ 4
50
+ ],
51
+ "enc_conv_cache": [
52
+ 4,
53
+ 1,
54
+ 15,
55
+ 256
56
+ ],
57
+ "enc_ret_kv": [
58
+ 4,
59
+ 1,
60
+ 4,
61
+ 64,
62
+ 64
63
+ ],
64
+ "enc_ret_scale": [
65
+ 4,
66
+ 1,
67
+ 4
68
+ ],
69
+ "top_buffer": [
70
+ 1,
71
+ 19,
72
+ 256
73
+ ]
74
+ },
75
+ "subsampling": 10,
76
+ "target_sample_rate": 8000,
77
+ "top_buffer_len": 19,
78
+ "win_length": 200
79
+ }
CALLHOME/ls_eend_callhome_step.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29d845d6bc108c1cc29a328350926dcbcb127b6d3fdbea46114e2421afa21722
3
+ size 243
CALLHOME/ls_eend_callhome_step.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b93e7cbca1ed52e213164f7922a6eb0d7d9708060bb25686a005a5cab3d6f07a
3
+ size 742
CALLHOME/ls_eend_callhome_step.mlmodelc/metadata.json ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Float32",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float32",
10
+ "formattedType" : "MultiArray (Float32 1 × 1 × 9)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 1, 9]",
13
+ "name" : "full_logits",
14
+ "type" : "MultiArray"
15
+ },
16
+ {
17
+ "hasShapeFlexibility" : "0",
18
+ "isOptional" : "0",
19
+ "dataType" : "Float32",
20
+ "formattedType" : "MultiArray (Float32 4 × 1 × 4 × 64 × 64)",
21
+ "shortDescription" : "",
22
+ "shape" : "[4, 1, 4, 64, 64]",
23
+ "name" : "enc_ret_kv_out",
24
+ "type" : "MultiArray"
25
+ },
26
+ {
27
+ "hasShapeFlexibility" : "0",
28
+ "isOptional" : "0",
29
+ "dataType" : "Float32",
30
+ "formattedType" : "MultiArray (Float32 4 × 1 × 4)",
31
+ "shortDescription" : "",
32
+ "shape" : "[4, 1, 4]",
33
+ "name" : "enc_ret_scale_out",
34
+ "type" : "MultiArray"
35
+ },
36
+ {
37
+ "hasShapeFlexibility" : "0",
38
+ "isOptional" : "0",
39
+ "dataType" : "Float32",
40
+ "formattedType" : "MultiArray (Float32 4 × 1 × 15 × 256)",
41
+ "shortDescription" : "",
42
+ "shape" : "[4, 1, 15, 256]",
43
+ "name" : "enc_conv_cache_out",
44
+ "type" : "MultiArray"
45
+ },
46
+ {
47
+ "hasShapeFlexibility" : "0",
48
+ "isOptional" : "0",
49
+ "dataType" : "Float32",
50
+ "formattedType" : "MultiArray (Float32 2 × 9 × 4 × 64 × 64)",
51
+ "shortDescription" : "",
52
+ "shape" : "[2, 9, 4, 64, 64]",
53
+ "name" : "dec_ret_kv_out",
54
+ "type" : "MultiArray"
55
+ },
56
+ {
57
+ "hasShapeFlexibility" : "0",
58
+ "isOptional" : "0",
59
+ "dataType" : "Float32",
60
+ "formattedType" : "MultiArray (Float32 2 × 9 × 4)",
61
+ "shortDescription" : "",
62
+ "shape" : "[2, 9, 4]",
63
+ "name" : "dec_ret_scale_out",
64
+ "type" : "MultiArray"
65
+ },
66
+ {
67
+ "hasShapeFlexibility" : "0",
68
+ "isOptional" : "0",
69
+ "dataType" : "Float32",
70
+ "formattedType" : "MultiArray (Float32 1 × 19 × 256)",
71
+ "shortDescription" : "",
72
+ "shape" : "[1, 19, 256]",
73
+ "name" : "top_buffer_out",
74
+ "type" : "MultiArray"
75
+ }
76
+ ],
77
+ "modelParameters" : [
78
+
79
+ ],
80
+ "specificationVersion" : 9,
81
+ "mlProgramOperationTypeHistogram" : {
82
+ "Ios18.expandDims" : 29,
83
+ "Ios18.mul" : 60,
84
+ "Ios18.softmax" : 2,
85
+ "Ios18.matmul" : 5,
86
+ "Ios18.realDiv" : 14,
87
+ "Ios18.sigmoid" : 4,
88
+ "Split" : 4,
89
+ "Tile" : 1,
90
+ "Ios18.add" : 48,
91
+ "Ios16.reduceSum" : 6,
92
+ "Ios18.layerNorm" : 33,
93
+ "Ios18.reshape" : 44,
94
+ "Ios18.maximum" : 2,
95
+ "Ios18.linear" : 60,
96
+ "Ios18.conv" : 13,
97
+ "Ios18.concat" : 6,
98
+ "Ios18.sub" : 14,
99
+ "Ios18.silu" : 18,
100
+ "Ios18.transpose" : 42,
101
+ "Ios18.sqrt" : 12,
102
+ "Ios18.relu" : 2,
103
+ "Stack" : 5,
104
+ "Ios18.sliceByIndex" : 21,
105
+ "Ios18.squeeze" : 1,
106
+ "Ios16.reduceL2Norm" : 2
107
+ },
108
+ "computePrecision" : "Mixed (Float32, Int32)",
109
+ "isUpdatable" : "0",
110
+ "stateSchema" : [
111
+
112
+ ],
113
+ "availability" : {
114
+ "macOS" : "15.0",
115
+ "tvOS" : "18.0",
116
+ "visionOS" : "2.0",
117
+ "watchOS" : "11.0",
118
+ "iOS" : "18.0",
119
+ "macCatalyst" : "18.0"
120
+ },
121
+ "modelType" : {
122
+ "name" : "MLModelType_mlProgram"
123
+ },
124
+ "userDefinedMetadata" : {
125
+ "com.github.apple.coremltools.version" : "8.3.0",
126
+ "com.github.apple.coremltools.source" : "torch==1.13.0",
127
+ "com.github.apple.coremltools.source_dialect" : "TorchScript"
128
+ },
129
+ "inputSchema" : [
130
+ {
131
+ "hasShapeFlexibility" : "0",
132
+ "isOptional" : "0",
133
+ "dataType" : "Float32",
134
+ "formattedType" : "MultiArray (Float32 1 × 1 × 345)",
135
+ "shortDescription" : "",
136
+ "shape" : "[1, 1, 345]",
137
+ "name" : "frame",
138
+ "type" : "MultiArray"
139
+ },
140
+ {
141
+ "hasShapeFlexibility" : "0",
142
+ "isOptional" : "0",
143
+ "dataType" : "Float32",
144
+ "formattedType" : "MultiArray (Float32 4 × 1 × 4 × 64 × 64)",
145
+ "shortDescription" : "",
146
+ "shape" : "[4, 1, 4, 64, 64]",
147
+ "name" : "enc_ret_kv",
148
+ "type" : "MultiArray"
149
+ },
150
+ {
151
+ "hasShapeFlexibility" : "0",
152
+ "isOptional" : "0",
153
+ "dataType" : "Float32",
154
+ "formattedType" : "MultiArray (Float32 4 × 1 × 4)",
155
+ "shortDescription" : "",
156
+ "shape" : "[4, 1, 4]",
157
+ "name" : "enc_ret_scale",
158
+ "type" : "MultiArray"
159
+ },
160
+ {
161
+ "hasShapeFlexibility" : "0",
162
+ "isOptional" : "0",
163
+ "dataType" : "Float32",
164
+ "formattedType" : "MultiArray (Float32 4 × 1 × 15 × 256)",
165
+ "shortDescription" : "",
166
+ "shape" : "[4, 1, 15, 256]",
167
+ "name" : "enc_conv_cache",
168
+ "type" : "MultiArray"
169
+ },
170
+ {
171
+ "hasShapeFlexibility" : "0",
172
+ "isOptional" : "0",
173
+ "dataType" : "Float32",
174
+ "formattedType" : "MultiArray (Float32 2 × 9 × 4 × 64 × 64)",
175
+ "shortDescription" : "",
176
+ "shape" : "[2, 9, 4, 64, 64]",
177
+ "name" : "dec_ret_kv",
178
+ "type" : "MultiArray"
179
+ },
180
+ {
181
+ "hasShapeFlexibility" : "0",
182
+ "isOptional" : "0",
183
+ "dataType" : "Float32",
184
+ "formattedType" : "MultiArray (Float32 2 × 9 × 4)",
185
+ "shortDescription" : "",
186
+ "shape" : "[2, 9, 4]",
187
+ "name" : "dec_ret_scale",
188
+ "type" : "MultiArray"
189
+ },
190
+ {
191
+ "hasShapeFlexibility" : "0",
192
+ "isOptional" : "0",
193
+ "dataType" : "Float32",
194
+ "formattedType" : "MultiArray (Float32 1 × 19 × 256)",
195
+ "shortDescription" : "",
196
+ "shape" : "[1, 19, 256]",
197
+ "name" : "top_buffer",
198
+ "type" : "MultiArray"
199
+ },
200
+ {
201
+ "hasShapeFlexibility" : "0",
202
+ "isOptional" : "0",
203
+ "dataType" : "Float32",
204
+ "formattedType" : "MultiArray (Float32 1)",
205
+ "shortDescription" : "",
206
+ "shape" : "[1]",
207
+ "name" : "ingest",
208
+ "type" : "MultiArray"
209
+ },
210
+ {
211
+ "hasShapeFlexibility" : "0",
212
+ "isOptional" : "0",
213
+ "dataType" : "Float32",
214
+ "formattedType" : "MultiArray (Float32 1)",
215
+ "shortDescription" : "",
216
+ "shape" : "[1]",
217
+ "name" : "decode",
218
+ "type" : "MultiArray"
219
+ }
220
+ ],
221
+ "generatedClassName" : "ls_eend_callhome_step",
222
+ "method" : "predict"
223
+ }
224
+ ]
CALLHOME/ls_eend_callhome_step.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
CALLHOME/ls_eend_callhome_step.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:771f7359f23837443571e2d726d830f221b402d70ef631eb02d54a7e9a954849
3
+ size 44392064
CALLHOME/ls_eend_callhome_step.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de6c0df092b3822346da0d2c8f3a1fb81d90305ff22029737f5ebd2a4870c97b
3
+ size 168873
CALLHOME/ls_eend_callhome_step.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:771f7359f23837443571e2d726d830f221b402d70ef631eb02d54a7e9a954849
3
+ size 44392064
CALLHOME/ls_eend_callhome_step.mlpackage/Manifest.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fileFormatVersion": "1.0.0",
3
+ "itemInfoEntries": {
4
+ "16D2CD1C-2109-40F0-8885-182FAD0FDC95": {
5
+ "author": "com.apple.CoreML",
6
+ "description": "CoreML Model Specification",
7
+ "name": "model.mlmodel",
8
+ "path": "com.apple.CoreML/model.mlmodel"
9
+ },
10
+ "57EC3F2B-2646-49D3-AEAC-0B15C73EDE52": {
11
+ "author": "com.apple.CoreML",
12
+ "description": "CoreML Model Weights",
13
+ "name": "weights",
14
+ "path": "com.apple.CoreML/weights"
15
+ }
16
+ },
17
+ "rootModelIdentifier": "16D2CD1C-2109-40F0-8885-182FAD0FDC95"
18
+ }
DIHARD II/.DS_Store ADDED
Binary file (6.15 kB). View file
 
DIHARD II/ls_eend_dih2_step.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "compute_precision": "float32",
3
+ "context_recp": 7,
4
+ "conv_delay": 9,
5
+ "decoder_layers": 2,
6
+ "encoder_conv_cache_len": 15,
7
+ "encoder_dim": 256,
8
+ "encoder_layers": 4,
9
+ "feat_type": "logmel23_cummn",
10
+ "frame_hz": 10.0,
11
+ "full_output_dim": 12,
12
+ "head_dim": 64,
13
+ "hop_length": 80,
14
+ "input_dim": 345,
15
+ "key_dim": 64,
16
+ "max_nspks": 12,
17
+ "max_speakers": 10,
18
+ "mixed_fp16_exclude_markers": [
19
+ "model.dec.",
20
+ "dec_ret",
21
+ "candidate_dec",
22
+ "attractor",
23
+ "full_logits",
24
+ "decode",
25
+ "convert"
26
+ ],
27
+ "mixed_fp16_include_markers": [
28
+ "model.enc.",
29
+ "model.cnn.",
30
+ "enc_ret_",
31
+ "enc_conv_cache"
32
+ ],
33
+ "n_fft": 1024,
34
+ "n_mels": 23,
35
+ "num_heads": 4,
36
+ "real_output_dim": 10,
37
+ "sample_rate": 8000,
38
+ "state_shapes": {
39
+ "dec_ret_kv": [
40
+ 2,
41
+ 12,
42
+ 4,
43
+ 64,
44
+ 64
45
+ ],
46
+ "dec_ret_scale": [
47
+ 2,
48
+ 12,
49
+ 4
50
+ ],
51
+ "enc_conv_cache": [
52
+ 4,
53
+ 1,
54
+ 15,
55
+ 256
56
+ ],
57
+ "enc_ret_kv": [
58
+ 4,
59
+ 1,
60
+ 4,
61
+ 64,
62
+ 64
63
+ ],
64
+ "enc_ret_scale": [
65
+ 4,
66
+ 1,
67
+ 4
68
+ ],
69
+ "top_buffer": [
70
+ 1,
71
+ 19,
72
+ 256
73
+ ]
74
+ },
75
+ "subsampling": 10,
76
+ "target_sample_rate": 8000,
77
+ "top_buffer_len": 19,
78
+ "win_length": 200
79
+ }
DIHARD II/ls_eend_dih2_step.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86d79d7e9f2bc30200422f9ec4e3e10d341c7cc4683a7a9a57adb1c887154326
3
+ size 243
DIHARD II/ls_eend_dih2_step.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75fdd2dc79af0fac42dfa410fa8c542fea7ed32addb93a12c165c9ec77f2c431
3
+ size 742
DIHARD II/ls_eend_dih2_step.mlmodelc/metadata.json ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Float32",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float32",
10
+ "formattedType" : "MultiArray (Float32 1 × 1 × 12)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 1, 12]",
13
+ "name" : "full_logits",
14
+ "type" : "MultiArray"
15
+ },
16
+ {
17
+ "hasShapeFlexibility" : "0",
18
+ "isOptional" : "0",
19
+ "dataType" : "Float32",
20
+ "formattedType" : "MultiArray (Float32 4 × 1 × 4 × 64 × 64)",
21
+ "shortDescription" : "",
22
+ "shape" : "[4, 1, 4, 64, 64]",
23
+ "name" : "enc_ret_kv_out",
24
+ "type" : "MultiArray"
25
+ },
26
+ {
27
+ "hasShapeFlexibility" : "0",
28
+ "isOptional" : "0",
29
+ "dataType" : "Float32",
30
+ "formattedType" : "MultiArray (Float32 4 × 1 × 4)",
31
+ "shortDescription" : "",
32
+ "shape" : "[4, 1, 4]",
33
+ "name" : "enc_ret_scale_out",
34
+ "type" : "MultiArray"
35
+ },
36
+ {
37
+ "hasShapeFlexibility" : "0",
38
+ "isOptional" : "0",
39
+ "dataType" : "Float32",
40
+ "formattedType" : "MultiArray (Float32 4 × 1 × 15 × 256)",
41
+ "shortDescription" : "",
42
+ "shape" : "[4, 1, 15, 256]",
43
+ "name" : "enc_conv_cache_out",
44
+ "type" : "MultiArray"
45
+ },
46
+ {
47
+ "hasShapeFlexibility" : "0",
48
+ "isOptional" : "0",
49
+ "dataType" : "Float32",
50
+ "formattedType" : "MultiArray (Float32 2 × 12 × 4 × 64 × 64)",
51
+ "shortDescription" : "",
52
+ "shape" : "[2, 12, 4, 64, 64]",
53
+ "name" : "dec_ret_kv_out",
54
+ "type" : "MultiArray"
55
+ },
56
+ {
57
+ "hasShapeFlexibility" : "0",
58
+ "isOptional" : "0",
59
+ "dataType" : "Float32",
60
+ "formattedType" : "MultiArray (Float32 2 × 12 × 4)",
61
+ "shortDescription" : "",
62
+ "shape" : "[2, 12, 4]",
63
+ "name" : "dec_ret_scale_out",
64
+ "type" : "MultiArray"
65
+ },
66
+ {
67
+ "hasShapeFlexibility" : "0",
68
+ "isOptional" : "0",
69
+ "dataType" : "Float32",
70
+ "formattedType" : "MultiArray (Float32 1 × 19 × 256)",
71
+ "shortDescription" : "",
72
+ "shape" : "[1, 19, 256]",
73
+ "name" : "top_buffer_out",
74
+ "type" : "MultiArray"
75
+ }
76
+ ],
77
+ "modelParameters" : [
78
+
79
+ ],
80
+ "specificationVersion" : 9,
81
+ "mlProgramOperationTypeHistogram" : {
82
+ "Ios18.expandDims" : 29,
83
+ "Ios18.mul" : 60,
84
+ "Ios18.softmax" : 2,
85
+ "Ios18.matmul" : 5,
86
+ "Ios18.realDiv" : 14,
87
+ "Ios18.sigmoid" : 4,
88
+ "Split" : 4,
89
+ "Tile" : 1,
90
+ "Ios18.add" : 48,
91
+ "Ios16.reduceSum" : 6,
92
+ "Ios18.layerNorm" : 33,
93
+ "Ios18.reshape" : 44,
94
+ "Ios18.maximum" : 2,
95
+ "Ios18.linear" : 60,
96
+ "Ios18.conv" : 13,
97
+ "Ios18.concat" : 6,
98
+ "Ios18.sub" : 14,
99
+ "Ios18.silu" : 18,
100
+ "Ios18.transpose" : 42,
101
+ "Ios18.sqrt" : 12,
102
+ "Ios18.relu" : 2,
103
+ "Stack" : 5,
104
+ "Ios18.sliceByIndex" : 21,
105
+ "Ios18.squeeze" : 1,
106
+ "Ios16.reduceL2Norm" : 2
107
+ },
108
+ "computePrecision" : "Mixed (Float32, Int32)",
109
+ "isUpdatable" : "0",
110
+ "stateSchema" : [
111
+
112
+ ],
113
+ "availability" : {
114
+ "macOS" : "15.0",
115
+ "tvOS" : "18.0",
116
+ "visionOS" : "2.0",
117
+ "watchOS" : "11.0",
118
+ "iOS" : "18.0",
119
+ "macCatalyst" : "18.0"
120
+ },
121
+ "modelType" : {
122
+ "name" : "MLModelType_mlProgram"
123
+ },
124
+ "userDefinedMetadata" : {
125
+ "com.github.apple.coremltools.version" : "8.3.0",
126
+ "com.github.apple.coremltools.source" : "torch==1.13.0",
127
+ "com.github.apple.coremltools.source_dialect" : "TorchScript"
128
+ },
129
+ "inputSchema" : [
130
+ {
131
+ "hasShapeFlexibility" : "0",
132
+ "isOptional" : "0",
133
+ "dataType" : "Float32",
134
+ "formattedType" : "MultiArray (Float32 1 × 1 × 345)",
135
+ "shortDescription" : "",
136
+ "shape" : "[1, 1, 345]",
137
+ "name" : "frame",
138
+ "type" : "MultiArray"
139
+ },
140
+ {
141
+ "hasShapeFlexibility" : "0",
142
+ "isOptional" : "0",
143
+ "dataType" : "Float32",
144
+ "formattedType" : "MultiArray (Float32 4 × 1 × 4 × 64 × 64)",
145
+ "shortDescription" : "",
146
+ "shape" : "[4, 1, 4, 64, 64]",
147
+ "name" : "enc_ret_kv",
148
+ "type" : "MultiArray"
149
+ },
150
+ {
151
+ "hasShapeFlexibility" : "0",
152
+ "isOptional" : "0",
153
+ "dataType" : "Float32",
154
+ "formattedType" : "MultiArray (Float32 4 × 1 × 4)",
155
+ "shortDescription" : "",
156
+ "shape" : "[4, 1, 4]",
157
+ "name" : "enc_ret_scale",
158
+ "type" : "MultiArray"
159
+ },
160
+ {
161
+ "hasShapeFlexibility" : "0",
162
+ "isOptional" : "0",
163
+ "dataType" : "Float32",
164
+ "formattedType" : "MultiArray (Float32 4 × 1 × 15 × 256)",
165
+ "shortDescription" : "",
166
+ "shape" : "[4, 1, 15, 256]",
167
+ "name" : "enc_conv_cache",
168
+ "type" : "MultiArray"
169
+ },
170
+ {
171
+ "hasShapeFlexibility" : "0",
172
+ "isOptional" : "0",
173
+ "dataType" : "Float32",
174
+ "formattedType" : "MultiArray (Float32 2 × 12 × 4 × 64 × 64)",
175
+ "shortDescription" : "",
176
+ "shape" : "[2, 12, 4, 64, 64]",
177
+ "name" : "dec_ret_kv",
178
+ "type" : "MultiArray"
179
+ },
180
+ {
181
+ "hasShapeFlexibility" : "0",
182
+ "isOptional" : "0",
183
+ "dataType" : "Float32",
184
+ "formattedType" : "MultiArray (Float32 2 × 12 × 4)",
185
+ "shortDescription" : "",
186
+ "shape" : "[2, 12, 4]",
187
+ "name" : "dec_ret_scale",
188
+ "type" : "MultiArray"
189
+ },
190
+ {
191
+ "hasShapeFlexibility" : "0",
192
+ "isOptional" : "0",
193
+ "dataType" : "Float32",
194
+ "formattedType" : "MultiArray (Float32 1 × 19 × 256)",
195
+ "shortDescription" : "",
196
+ "shape" : "[1, 19, 256]",
197
+ "name" : "top_buffer",
198
+ "type" : "MultiArray"
199
+ },
200
+ {
201
+ "hasShapeFlexibility" : "0",
202
+ "isOptional" : "0",
203
+ "dataType" : "Float32",
204
+ "formattedType" : "MultiArray (Float32 1)",
205
+ "shortDescription" : "",
206
+ "shape" : "[1]",
207
+ "name" : "ingest",
208
+ "type" : "MultiArray"
209
+ },
210
+ {
211
+ "hasShapeFlexibility" : "0",
212
+ "isOptional" : "0",
213
+ "dataType" : "Float32",
214
+ "formattedType" : "MultiArray (Float32 1)",
215
+ "shortDescription" : "",
216
+ "shape" : "[1]",
217
+ "name" : "decode",
218
+ "type" : "MultiArray"
219
+ }
220
+ ],
221
+ "generatedClassName" : "ls_eend_dih2_step",
222
+ "method" : "predict"
223
+ }
224
+ ]
DIHARD II/ls_eend_dih2_step.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
DIHARD II/ls_eend_dih2_step.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d30b3ca77b8c6fb34d398d93064c5378d7ae6623c05512be554ca18d5b5eb886
3
+ size 44395136
DIHARD II/ls_eend_dih2_step.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdcd6e15d0978052bd162c71e15025e9288f0e7d08f80c2afdc70f5592a1c5a0
3
+ size 168873
DIHARD II/ls_eend_dih2_step.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d30b3ca77b8c6fb34d398d93064c5378d7ae6623c05512be554ca18d5b5eb886
3
+ size 44395136
DIHARD II/ls_eend_dih2_step.mlpackage/Manifest.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fileFormatVersion": "1.0.0",
3
+ "itemInfoEntries": {
4
+ "2F7A4BA5-180F-4E22-8D25-55A5EA5707B1": {
5
+ "author": "com.apple.CoreML",
6
+ "description": "CoreML Model Specification",
7
+ "name": "model.mlmodel",
8
+ "path": "com.apple.CoreML/model.mlmodel"
9
+ },
10
+ "491A6A2C-B8BC-41B0-B513-3D277C9464F2": {
11
+ "author": "com.apple.CoreML",
12
+ "description": "CoreML Model Weights",
13
+ "name": "weights",
14
+ "path": "com.apple.CoreML/weights"
15
+ }
16
+ },
17
+ "rootModelIdentifier": "2F7A4BA5-180F-4E22-8D25-55A5EA5707B1"
18
+ }
DIHARD III/.DS_Store ADDED
Binary file (6.15 kB). View file
 
DIHARD III/ls_eend_dih3_step.json ADDED
@@ -0,0 +1,79 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "compute_precision": "float32",
3
+ "context_recp": 7,
4
+ "conv_delay": 9,
5
+ "decoder_layers": 2,
6
+ "encoder_conv_cache_len": 15,
7
+ "encoder_dim": 256,
8
+ "encoder_layers": 4,
9
+ "feat_type": "logmel23_cummn",
10
+ "frame_hz": 10.0,
11
+ "full_output_dim": 12,
12
+ "head_dim": 64,
13
+ "hop_length": 80,
14
+ "input_dim": 345,
15
+ "key_dim": 64,
16
+ "max_nspks": 12,
17
+ "max_speakers": 10,
18
+ "mixed_fp16_exclude_markers": [
19
+ "model.dec.",
20
+ "dec_ret",
21
+ "candidate_dec",
22
+ "attractor",
23
+ "full_logits",
24
+ "decode",
25
+ "convert"
26
+ ],
27
+ "mixed_fp16_include_markers": [
28
+ "model.enc.",
29
+ "model.cnn.",
30
+ "enc_ret_",
31
+ "enc_conv_cache"
32
+ ],
33
+ "n_fft": 1024,
34
+ "n_mels": 23,
35
+ "num_heads": 4,
36
+ "real_output_dim": 10,
37
+ "sample_rate": 8000,
38
+ "state_shapes": {
39
+ "dec_ret_kv": [
40
+ 2,
41
+ 12,
42
+ 4,
43
+ 64,
44
+ 64
45
+ ],
46
+ "dec_ret_scale": [
47
+ 2,
48
+ 12,
49
+ 4
50
+ ],
51
+ "enc_conv_cache": [
52
+ 4,
53
+ 1,
54
+ 15,
55
+ 256
56
+ ],
57
+ "enc_ret_kv": [
58
+ 4,
59
+ 1,
60
+ 4,
61
+ 64,
62
+ 64
63
+ ],
64
+ "enc_ret_scale": [
65
+ 4,
66
+ 1,
67
+ 4
68
+ ],
69
+ "top_buffer": [
70
+ 1,
71
+ 19,
72
+ 256
73
+ ]
74
+ },
75
+ "subsampling": 10,
76
+ "target_sample_rate": 8000,
77
+ "top_buffer_len": 19,
78
+ "win_length": 200
79
+ }
DIHARD III/ls_eend_dih3_step.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86d79d7e9f2bc30200422f9ec4e3e10d341c7cc4683a7a9a57adb1c887154326
3
+ size 243
DIHARD III/ls_eend_dih3_step.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82d59494af55a0183c02bb3e2cd6e16c43c7521a45f26cabf86ce65ef7ce7079
3
+ size 742
DIHARD III/ls_eend_dih3_step.mlmodelc/metadata.json ADDED
@@ -0,0 +1,224 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "storagePrecision" : "Float32",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float32",
10
+ "formattedType" : "MultiArray (Float32 1 × 1 × 12)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 1, 12]",
13
+ "name" : "full_logits",
14
+ "type" : "MultiArray"
15
+ },
16
+ {
17
+ "hasShapeFlexibility" : "0",
18
+ "isOptional" : "0",
19
+ "dataType" : "Float32",
20
+ "formattedType" : "MultiArray (Float32 4 × 1 × 4 × 64 × 64)",
21
+ "shortDescription" : "",
22
+ "shape" : "[4, 1, 4, 64, 64]",
23
+ "name" : "enc_ret_kv_out",
24
+ "type" : "MultiArray"
25
+ },
26
+ {
27
+ "hasShapeFlexibility" : "0",
28
+ "isOptional" : "0",
29
+ "dataType" : "Float32",
30
+ "formattedType" : "MultiArray (Float32 4 × 1 × 4)",
31
+ "shortDescription" : "",
32
+ "shape" : "[4, 1, 4]",
33
+ "name" : "enc_ret_scale_out",
34
+ "type" : "MultiArray"
35
+ },
36
+ {
37
+ "hasShapeFlexibility" : "0",
38
+ "isOptional" : "0",
39
+ "dataType" : "Float32",
40
+ "formattedType" : "MultiArray (Float32 4 × 1 × 15 × 256)",
41
+ "shortDescription" : "",
42
+ "shape" : "[4, 1, 15, 256]",
43
+ "name" : "enc_conv_cache_out",
44
+ "type" : "MultiArray"
45
+ },
46
+ {
47
+ "hasShapeFlexibility" : "0",
48
+ "isOptional" : "0",
49
+ "dataType" : "Float32",
50
+ "formattedType" : "MultiArray (Float32 2 × 12 × 4 × 64 × 64)",
51
+ "shortDescription" : "",
52
+ "shape" : "[2, 12, 4, 64, 64]",
53
+ "name" : "dec_ret_kv_out",
54
+ "type" : "MultiArray"
55
+ },
56
+ {
57
+ "hasShapeFlexibility" : "0",
58
+ "isOptional" : "0",
59
+ "dataType" : "Float32",
60
+ "formattedType" : "MultiArray (Float32 2 × 12 × 4)",
61
+ "shortDescription" : "",
62
+ "shape" : "[2, 12, 4]",
63
+ "name" : "dec_ret_scale_out",
64
+ "type" : "MultiArray"
65
+ },
66
+ {
67
+ "hasShapeFlexibility" : "0",
68
+ "isOptional" : "0",
69
+ "dataType" : "Float32",
70
+ "formattedType" : "MultiArray (Float32 1 × 19 × 256)",
71
+ "shortDescription" : "",
72
+ "shape" : "[1, 19, 256]",
73
+ "name" : "top_buffer_out",
74
+ "type" : "MultiArray"
75
+ }
76
+ ],
77
+ "modelParameters" : [
78
+
79
+ ],
80
+ "specificationVersion" : 9,
81
+ "mlProgramOperationTypeHistogram" : {
82
+ "Ios18.expandDims" : 29,
83
+ "Ios18.mul" : 60,
84
+ "Ios18.softmax" : 2,
85
+ "Ios18.matmul" : 5,
86
+ "Ios18.realDiv" : 14,
87
+ "Ios18.sigmoid" : 4,
88
+ "Split" : 4,
89
+ "Tile" : 1,
90
+ "Ios18.add" : 48,
91
+ "Ios16.reduceSum" : 6,
92
+ "Ios18.layerNorm" : 33,
93
+ "Ios18.reshape" : 44,
94
+ "Ios18.maximum" : 2,
95
+ "Ios18.linear" : 60,
96
+ "Ios18.conv" : 13,
97
+ "Ios18.concat" : 6,
98
+ "Ios18.sub" : 14,
99
+ "Ios18.silu" : 18,
100
+ "Ios18.transpose" : 42,
101
+ "Ios18.sqrt" : 12,
102
+ "Ios18.relu" : 2,
103
+ "Stack" : 5,
104
+ "Ios18.sliceByIndex" : 21,
105
+ "Ios18.squeeze" : 1,
106
+ "Ios16.reduceL2Norm" : 2
107
+ },
108
+ "computePrecision" : "Mixed (Float32, Int32)",
109
+ "isUpdatable" : "0",
110
+ "stateSchema" : [
111
+
112
+ ],
113
+ "availability" : {
114
+ "macOS" : "15.0",
115
+ "tvOS" : "18.0",
116
+ "visionOS" : "2.0",
117
+ "watchOS" : "11.0",
118
+ "iOS" : "18.0",
119
+ "macCatalyst" : "18.0"
120
+ },
121
+ "modelType" : {
122
+ "name" : "MLModelType_mlProgram"
123
+ },
124
+ "userDefinedMetadata" : {
125
+ "com.github.apple.coremltools.source_dialect" : "TorchScript",
126
+ "com.github.apple.coremltools.version" : "8.3.0",
127
+ "com.github.apple.coremltools.source" : "torch==1.13.0"
128
+ },
129
+ "inputSchema" : [
130
+ {
131
+ "hasShapeFlexibility" : "0",
132
+ "isOptional" : "0",
133
+ "dataType" : "Float32",
134
+ "formattedType" : "MultiArray (Float32 1 × 1 × 345)",
135
+ "shortDescription" : "",
136
+ "shape" : "[1, 1, 345]",
137
+ "name" : "frame",
138
+ "type" : "MultiArray"
139
+ },
140
+ {
141
+ "hasShapeFlexibility" : "0",
142
+ "isOptional" : "0",
143
+ "dataType" : "Float32",
144
+ "formattedType" : "MultiArray (Float32 4 × 1 × 4 × 64 × 64)",
145
+ "shortDescription" : "",
146
+ "shape" : "[4, 1, 4, 64, 64]",
147
+ "name" : "enc_ret_kv",
148
+ "type" : "MultiArray"
149
+ },
150
+ {
151
+ "hasShapeFlexibility" : "0",
152
+ "isOptional" : "0",
153
+ "dataType" : "Float32",
154
+ "formattedType" : "MultiArray (Float32 4 × 1 × 4)",
155
+ "shortDescription" : "",
156
+ "shape" : "[4, 1, 4]",
157
+ "name" : "enc_ret_scale",
158
+ "type" : "MultiArray"
159
+ },
160
+ {
161
+ "hasShapeFlexibility" : "0",
162
+ "isOptional" : "0",
163
+ "dataType" : "Float32",
164
+ "formattedType" : "MultiArray (Float32 4 × 1 × 15 × 256)",
165
+ "shortDescription" : "",
166
+ "shape" : "[4, 1, 15, 256]",
167
+ "name" : "enc_conv_cache",
168
+ "type" : "MultiArray"
169
+ },
170
+ {
171
+ "hasShapeFlexibility" : "0",
172
+ "isOptional" : "0",
173
+ "dataType" : "Float32",
174
+ "formattedType" : "MultiArray (Float32 2 × 12 × 4 × 64 × 64)",
175
+ "shortDescription" : "",
176
+ "shape" : "[2, 12, 4, 64, 64]",
177
+ "name" : "dec_ret_kv",
178
+ "type" : "MultiArray"
179
+ },
180
+ {
181
+ "hasShapeFlexibility" : "0",
182
+ "isOptional" : "0",
183
+ "dataType" : "Float32",
184
+ "formattedType" : "MultiArray (Float32 2 × 12 × 4)",
185
+ "shortDescription" : "",
186
+ "shape" : "[2, 12, 4]",
187
+ "name" : "dec_ret_scale",
188
+ "type" : "MultiArray"
189
+ },
190
+ {
191
+ "hasShapeFlexibility" : "0",
192
+ "isOptional" : "0",
193
+ "dataType" : "Float32",
194
+ "formattedType" : "MultiArray (Float32 1 × 19 × 256)",
195
+ "shortDescription" : "",
196
+ "shape" : "[1, 19, 256]",
197
+ "name" : "top_buffer",
198
+ "type" : "MultiArray"
199
+ },
200
+ {
201
+ "hasShapeFlexibility" : "0",
202
+ "isOptional" : "0",
203
+ "dataType" : "Float32",
204
+ "formattedType" : "MultiArray (Float32 1)",
205
+ "shortDescription" : "",
206
+ "shape" : "[1]",
207
+ "name" : "ingest",
208
+ "type" : "MultiArray"
209
+ },
210
+ {
211
+ "hasShapeFlexibility" : "0",
212
+ "isOptional" : "0",
213
+ "dataType" : "Float32",
214
+ "formattedType" : "MultiArray (Float32 1)",
215
+ "shortDescription" : "",
216
+ "shape" : "[1]",
217
+ "name" : "decode",
218
+ "type" : "MultiArray"
219
+ }
220
+ ],
221
+ "generatedClassName" : "ls_eend_dih3_step",
222
+ "method" : "predict"
223
+ }
224
+ ]
DIHARD III/ls_eend_dih3_step.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
DIHARD III/ls_eend_dih3_step.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:565eb30c4d7f5f9fa55451b4fe06ac1e11d64afd9331f833cb56e8cb1edd7519
3
+ size 44395136
DIHARD III/ls_eend_dih3_step.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bdcd6e15d0978052bd162c71e15025e9288f0e7d08f80c2afdc70f5592a1c5a0
3
+ size 168873
DIHARD III/ls_eend_dih3_step.mlpackage/Data/com.apple.CoreML/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:565eb30c4d7f5f9fa55451b4fe06ac1e11d64afd9331f833cb56e8cb1edd7519
3
+ size 44395136
DIHARD III/ls_eend_dih3_step.mlpackage/Manifest.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "fileFormatVersion": "1.0.0",
3
+ "itemInfoEntries": {
4
+ "558EA27C-8294-42D9-A499-AAC37BC78889": {
5
+ "author": "com.apple.CoreML",
6
+ "description": "CoreML Model Weights",
7
+ "name": "weights",
8
+ "path": "com.apple.CoreML/weights"
9
+ },
10
+ "571CA3B6-D04D-4E55-B668-FBECBE7C9802": {
11
+ "author": "com.apple.CoreML",
12
+ "description": "CoreML Model Specification",
13
+ "name": "model.mlmodel",
14
+ "path": "com.apple.CoreML/model.mlmodel"
15
+ }
16
+ },
17
+ "rootModelIdentifier": "571CA3B6-D04D-4E55-B668-FBECBE7C9802"
18
+ }
LICENSE ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ MIT License
2
+
3
+ Copyright (c) 2023 Audio-WestlakeU
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
README.md ADDED
@@ -0,0 +1,188 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ language:
3
+ - en
4
+ license: mit
5
+ library_name: coreml
6
+ pipeline_tag: audio-classification
7
+ tags:
8
+ - speaker-diarization
9
+ - diarization
10
+ - coreml
11
+ - apple
12
+ - streaming
13
+ - audio
14
+ - ls-eend
15
+ - eend
16
+ pretty_name: LS-EEND CoreML Models
17
+ model-index:
18
+ - name: LS-EEND CoreML Models
19
+ results: []
20
+ ---
21
+
22
+ # LS-EEND CoreML Models
23
+
24
+ CoreML exports of LS-EEND, a long-form streaming end-to-end neural diarization model with online attractor extraction.
25
+
26
+ This repository contains non-quantized CoreML step models for four LS-EEND variants:
27
+
28
+ - `AMI`
29
+ - `CALLHOME`
30
+ - `DIHARD II`
31
+ - `DIHARD III`
32
+
33
+ These models are intended for stateful streaming inference. Each package runs one LS-EEND step at a time with explicit recurrent/cache tensors, rather than processing an entire utterance in a single call.
34
+
35
+ ## Included files
36
+
37
+ Each variant directory contains:
38
+
39
+ - `*.mlpackage`: the CoreML model package
40
+ - `*.json`: metadata needed by the runtime
41
+ - `*.mlmodelc`: a compiled CoreML bundle generated locally for convenience
42
+
43
+ Variant directories:
44
+
45
+ - `AMI/`
46
+ - `CALLHOME/`
47
+ - `DIHARD II/`
48
+ - `DIHARD III/`
49
+
50
+ ## Variants
51
+
52
+ | Variant | Package | Configured max speakers | Model output capacity |
53
+ | --- | --- | ---: | ---: |
54
+ | AMI | `AMI/ls_eend_ami_step.mlpackage` | 4 | 6 |
55
+ | CALLHOME | `CALLHOME/ls_eend_callhome_step.mlpackage` | 7 | 9 |
56
+ | DIHARD II | `DIHARD II/ls_eend_dih2_step.mlpackage` | 10 | 12 |
57
+ | DIHARD III | `DIHARD III/ls_eend_dih3_step.mlpackage` | 10 | 12 |
58
+
59
+ The metadata JSON distinguishes between:
60
+
61
+ - `max_speakers`: the dataset/config speaker setting from the LS-EEND infer YAML
62
+ - `max_nspks`: the exported model's full decode/output capacity
63
+
64
+ ## Frontend and runtime assumptions
65
+
66
+ All four non-quantized exports in this repo use the same frontend settings:
67
+
68
+ - sample rate: `8000 Hz`
69
+ - window length: `200` samples
70
+ - hop length: `80` samples
71
+ - FFT size: `1024`
72
+ - mel bins: `23`
73
+ - context receptive field: `7`
74
+ - subsampling: `10`
75
+ - feature type: `logmel23_cummn`
76
+ - output frame rate: `10 Hz`
77
+ - compute precision: `float32`
78
+
79
+ These are step-wise streaming models. A runtime must maintain and feed the recurrent state tensors between calls:
80
+
81
+ - `enc_ret_kv`
82
+ - `enc_ret_scale`
83
+ - `enc_conv_cache`
84
+ - `dec_ret_kv`
85
+ - `dec_ret_scale`
86
+ - `top_buffer`
87
+
88
+ The CoreML inputs and outputs follow the LS-EEND step export used by the reference Python and Swift runtimes.
89
+
90
+ ## Intended usage
91
+
92
+ Use these packages with a runtime that:
93
+
94
+ 1. Resamples audio to mono `8 kHz`
95
+ 2. Extracts LS-EEND features with the settings above
96
+ 3. Preserves model state across step calls
97
+ 4. Uses `ingest`/`decode` control inputs to handle the encoder delay and final tail flush
98
+ 5. Applies postprocessing such as sigmoid, thresholding, optional median filtering, and RTTM conversion outside the CoreML graph
99
+
100
+ This repository is not a drop-in replacement for generic Hugging Face `transformers` inference. It is meant for custom CoreML runtimes, such as:
101
+
102
+ - the Python LS-EEND CoreML runtime from the FS-EEND project
103
+ - the Swift/macOS runtime used for the LS-EEND CoreML microphone demo
104
+
105
+ ## Minimal metadata example
106
+
107
+ Each variant ships a sidecar JSON with fields like:
108
+
109
+ ```json
110
+ {
111
+ "sample_rate": 8000,
112
+ "win_length": 200,
113
+ "hop_length": 80,
114
+ "n_fft": 1024,
115
+ "n_mels": 23,
116
+ "context_recp": 7,
117
+ "subsampling": 10,
118
+ "feat_type": "logmel23_cummn",
119
+ "frame_hz": 10.0,
120
+ "max_speakers": 10,
121
+ "max_nspks": 12
122
+ }
123
+ ```
124
+
125
+ Check the variant-specific `*.json` file for the exact state tensor shapes and output dimensions.
126
+
127
+ ## Credits
128
+
129
+ - **Base model**: [LS-EEND](https://github.com/Audio-WestlakeU/FS-EEND) by Di Liang & Xiaofei Li (Westlake University). Paper: [LS-EEND: Long-Form Streaming End-to-End Neural Diarization with Online Attractor Extraction](https://arxiv.org/abs/2410.06670) (IEEE TASLP 2025). The original model is not hosted on HuggingFace; pretrained weights are available on [GitHub](https://github.com/Audio-WestlakeU/FS-EEND).
130
+ - **CoreML conversion**: [@GradientDescent2718](https://huggingface.co/GradientDescent2718). Original repo: [GradientDescent2718/ls-eend-coreml](https://huggingface.co/GradientDescent2718/ls-eend-coreml).
131
+
132
+ ## Source project
133
+
134
+ These CoreML exports were produced from the LS-EEND code in the FS-EEND repository:
135
+
136
+ - GitHub: [Audio-WestlakeU/FS-EEND](https://github.com/Audio-WestlakeU/FS-EEND)
137
+
138
+ The export path is based on the LS-EEND CoreML step exporter and variant batch exporter in that project.
139
+
140
+ ## Training and evaluation context
141
+
142
+ From the source project, the reported real-world diarization error rates are:
143
+
144
+ | Dataset | DER (%) |
145
+ | --- | ---: |
146
+ | CALLHOME | 12.11 |
147
+ | DIHARD II | 27.58 |
148
+ | DIHARD III | 19.61 |
149
+ | AMI Dev | 20.97 |
150
+ | AMI Eval | 20.76 |
151
+
152
+ These numbers come from the upstream LS-EEND project README and reflect the original training/evaluation setup, not a Hugging Face evaluation pipeline.
153
+
154
+ ## Limitations
155
+
156
+ - These models are exported for Apple CoreML runtimes, not for PyTorch or ONNX consumers.
157
+ - They are stateful streaming step models, so they require a custom driver loop.
158
+ - They assume an 8 kHz LS-EEND frontend and will not produce matching results if you use a different spectrogram pipeline.
159
+ - Speaker identities are output as activity tracks/slots and still require downstream diarization postprocessing and speaker-slot alignment where appropriate.
160
+
161
+ ## License and dataset constraints
162
+
163
+ The upstream LS-EEND model/codebase used for these CoreML exports is MIT-licensed, and this repository is published as MIT accordingly.
164
+
165
+ The underlying evaluation and fine-tuning datasets still have their own access and usage terms:
166
+
167
+ - AMI
168
+ - CALLHOME
169
+ - DIHARD II
170
+ - DIHARD III
171
+
172
+ This repository redistributes CoreML exports of the LS-EEND model variants. Dataset licensing and access requirements remain governed by the original dataset providers.
173
+
174
+ ## Citation
175
+
176
+ If you use LS-EEND, cite the original paper:
177
+
178
+ ```bibtex
179
+ @ARTICLE{11122273,
180
+ author={Liang, Di and Li, Xiaofei},
181
+ journal={IEEE Transactions on Audio, Speech and Language Processing},
182
+ title={LS-EEND: Long-Form Streaming End-to-End Neural Diarization With Online Attractor Extraction},
183
+ year={2025},
184
+ volume={33},
185
+ pages={3568-3581},
186
+ doi={10.1109/TASLPRO.2025.3597446}
187
+ }
188
+ ```
config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {}
optimized/ami/100ms/ls_eend_ami_100ms.mlmodelc/analytics/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acd29bd7bc0274452e77f5735e705d136916e5bd978ca14d6a383c7a2018618a
3
+ size 243
optimized/ami/100ms/ls_eend_ami_100ms.mlmodelc/coremldata.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c45f751d1fdb81520abae505c4a969a6b817d816487ce6e901e0e02e60a5f0b
3
+ size 1395
optimized/ami/100ms/ls_eend_ami_100ms.mlmodelc/metadata.json ADDED
@@ -0,0 +1,218 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ [
2
+ {
3
+ "metadataOutputVersion" : "3.0",
4
+ "shortDescription" : "LS-EEND AMI streaming diarizer (pipeline, T=1, max_speakers=4, layout=raw_mel, cu=all)",
5
+ "outputSchema" : [
6
+ {
7
+ "hasShapeFlexibility" : "0",
8
+ "isOptional" : "0",
9
+ "dataType" : "Float32",
10
+ "formattedType" : "MultiArray (Float32 1 × 1 × 4)",
11
+ "shortDescription" : "",
12
+ "shape" : "[1, 1, 4]",
13
+ "name" : "probs",
14
+ "type" : "MultiArray"
15
+ },
16
+ {
17
+ "hasShapeFlexibility" : "0",
18
+ "isOptional" : "0",
19
+ "dataType" : "Float32",
20
+ "formattedType" : "MultiArray (Float32 4 × 1 × 4 × 64 × 64)",
21
+ "shortDescription" : "",
22
+ "shape" : "[4, 1, 4, 64, 64]",
23
+ "name" : "enc_kv_new",
24
+ "type" : "MultiArray"
25
+ },
26
+ {
27
+ "hasShapeFlexibility" : "0",
28
+ "isOptional" : "0",
29
+ "dataType" : "Float32",
30
+ "formattedType" : "MultiArray (Float32 4 × 1)",
31
+ "shortDescription" : "",
32
+ "shape" : "[4, 1]",
33
+ "name" : "enc_scale_new",
34
+ "type" : "MultiArray"
35
+ },
36
+ {
37
+ "hasShapeFlexibility" : "0",
38
+ "isOptional" : "0",
39
+ "dataType" : "Float32",
40
+ "formattedType" : "MultiArray (Float32 4 × 1 × 16 × 256)",
41
+ "shortDescription" : "",
42
+ "shape" : "[4, 1, 16, 256]",
43
+ "name" : "enc_conv_cache_new",
44
+ "type" : "MultiArray"
45
+ },
46
+ {
47
+ "hasShapeFlexibility" : "0",
48
+ "isOptional" : "0",
49
+ "dataType" : "Float32",
50
+ "formattedType" : "MultiArray (Float32 1 × 256 × 18)",
51
+ "shortDescription" : "",
52
+ "shape" : "[1, 256, 18]",
53
+ "name" : "cnn_window_new",
54
+ "type" : "MultiArray"
55
+ },
56
+ {
57
+ "hasShapeFlexibility" : "0",
58
+ "isOptional" : "0",
59
+ "dataType" : "Float32",
60
+ "formattedType" : "MultiArray (Float32 2 × 6 × 4 × 64 × 64)",
61
+ "shortDescription" : "",
62
+ "shape" : "[2, 6, 4, 64, 64]",
63
+ "name" : "dec_kv_new",
64
+ "type" : "MultiArray"
65
+ },
66
+ {
67
+ "hasShapeFlexibility" : "0",
68
+ "isOptional" : "0",
69
+ "dataType" : "Float32",
70
+ "formattedType" : "MultiArray (Float32 2 × 1)",
71
+ "shortDescription" : "",
72
+ "shape" : "[2, 1]",
73
+ "name" : "dec_scale_new",
74
+ "type" : "MultiArray"
75
+ }
76
+ ],
77
+ "storagePrecision" : "Float32",
78
+ "modelParameters" : [
79
+
80
+ ],
81
+ "specificationVersion" : 8,
82
+ "mlProgramOperationTypeHistogram" : {
83
+ "Ios16.reduceL2Norm" : 2,
84
+ "Ios17.reshape" : 67,
85
+ "Ios16.softmax" : 2,
86
+ "Ios17.matmul" : 29,
87
+ "Ios17.transpose" : 57,
88
+ "Split" : 4,
89
+ "Ios17.expandDims" : 4,
90
+ "Ios17.add" : 46,
91
+ "Ios16.sigmoid" : 5,
92
+ "Ios17.sliceByIndex" : 36,
93
+ "Tile" : 1,
94
+ "Ios16.reduceSum" : 1,
95
+ "Ios17.squeeze" : 2,
96
+ "Ios17.layerNorm" : 33,
97
+ "Ios17.batchNorm" : 4,
98
+ "Ios17.sqrt" : 18,
99
+ "Ios17.conv" : 13,
100
+ "Ios17.clip" : 6,
101
+ "Ios16.silu" : 18,
102
+ "Ios17.realDiv" : 20,
103
+ "Ios17.linear" : 56,
104
+ "Stack" : 5,
105
+ "Ios17.concat" : 10,
106
+ "Ios16.relu" : 2,
107
+ "Ios16.cumsum" : 1,
108
+ "Ios17.mul" : 46
109
+ },
110
+ "computePrecision" : "Mixed (Float32, Int32)",
111
+ "isUpdatable" : "0",
112
+ "stateSchema" : [
113
+
114
+ ],
115
+ "availability" : {
116
+ "macOS" : "14.0",
117
+ "tvOS" : "17.0",
118
+ "visionOS" : "1.0",
119
+ "watchOS" : "10.0",
120
+ "iOS" : "17.0",
121
+ "macCatalyst" : "17.0"
122
+ },
123
+ "modelType" : {
124
+ "name" : "MLModelType_mlProgram"
125
+ },
126
+ "inputSchema" : [
127
+ {
128
+ "hasShapeFlexibility" : "0",
129
+ "isOptional" : "0",
130
+ "dataType" : "Float32",
131
+ "formattedType" : "MultiArray (Float32 1 × 15 × 23)",
132
+ "shortDescription" : "",
133
+ "shape" : "[1, 15, 23]",
134
+ "name" : "features",
135
+ "type" : "MultiArray"
136
+ },
137
+ {
138
+ "hasShapeFlexibility" : "0",
139
+ "isOptional" : "0",
140
+ "dataType" : "Float32",
141
+ "formattedType" : "MultiArray (Float32 4 × 1 × 4 × 64 × 64)",
142
+ "shortDescription" : "",
143
+ "shape" : "[4, 1, 4, 64, 64]",
144
+ "name" : "enc_kv",
145
+ "type" : "MultiArray"
146
+ },
147
+ {
148
+ "hasShapeFlexibility" : "0",
149
+ "isOptional" : "0",
150
+ "dataType" : "Float32",
151
+ "formattedType" : "MultiArray (Float32 4 × 1)",
152
+ "shortDescription" : "",
153
+ "shape" : "[4, 1]",
154
+ "name" : "enc_scale",
155
+ "type" : "MultiArray"
156
+ },
157
+ {
158
+ "hasShapeFlexibility" : "0",
159
+ "isOptional" : "0",
160
+ "dataType" : "Float32",
161
+ "formattedType" : "MultiArray (Float32 4 × 1 × 16 × 256)",
162
+ "shortDescription" : "",
163
+ "shape" : "[4, 1, 16, 256]",
164
+ "name" : "enc_conv_cache",
165
+ "type" : "MultiArray"
166
+ },
167
+ {
168
+ "hasShapeFlexibility" : "0",
169
+ "isOptional" : "0",
170
+ "dataType" : "Float32",
171
+ "formattedType" : "MultiArray (Float32 1 × 256 × 18)",
172
+ "shortDescription" : "",
173
+ "shape" : "[1, 256, 18]",
174
+ "name" : "cnn_window",
175
+ "type" : "MultiArray"
176
+ },
177
+ {
178
+ "hasShapeFlexibility" : "0",
179
+ "isOptional" : "0",
180
+ "dataType" : "Float32",
181
+ "formattedType" : "MultiArray (Float32 2 × 6 × 4 × 64 × 64)",
182
+ "shortDescription" : "",
183
+ "shape" : "[2, 6, 4, 64, 64]",
184
+ "name" : "dec_kv",
185
+ "type" : "MultiArray"
186
+ },
187
+ {
188
+ "hasShapeFlexibility" : "0",
189
+ "isOptional" : "0",
190
+ "dataType" : "Float32",
191
+ "formattedType" : "MultiArray (Float32 2 × 1)",
192
+ "shortDescription" : "",
193
+ "shape" : "[2, 1]",
194
+ "name" : "dec_scale",
195
+ "type" : "MultiArray"
196
+ },
197
+ {
198
+ "hasShapeFlexibility" : "0",
199
+ "isOptional" : "0",
200
+ "dataType" : "Float32",
201
+ "formattedType" : "MultiArray (Float32 1)",
202
+ "shortDescription" : "",
203
+ "shape" : "[1]",
204
+ "name" : "valid_mask",
205
+ "type" : "MultiArray"
206
+ }
207
+ ],
208
+ "userDefinedMetadata" : {
209
+ "com.github.apple.coremltools.conversion_date" : "2026-04-18",
210
+ "config" : "{\"model_name\": \"ami\", \"model_label\": \"AMI\", \"variant\": \"pipeline\", \"chunk_size\": 1, \"step_duration_ms\": 100, \"frame_duration_ms\": 100, \"frame_duration_seconds\": 0.1, \"max_speakers\": 4, \"max_nspks\": 6, \"n_units\": 256, \"n_heads\": 4, \"enc_n_layers\": 4, \"dec_n_layers\": 2, \"conv_kernel_size\": 16, \"conv_delay\": 9, \"sample_rate\": 8000, \"win_length\": 200, \"hop_length\": 80, \"n_mels\": 23, \"context_size\": 7, \"subsampling\": 10, \"feat_type\": \"logmel23_cummn\", \"pure_roll\": true, \"input_layout\": \"raw_mel\", \"compute_units_export\": \"all\", \"raw_mel_length\": 15}",
211
+ "com.github.apple.coremltools.source" : "torch==2.6.0",
212
+ "com.github.apple.coremltools.version" : "9.0",
213
+ "com.github.apple.coremltools.source_dialect" : "TorchScript"
214
+ },
215
+ "generatedClassName" : "ls_eend_ami_100ms",
216
+ "method" : "predict"
217
+ }
218
+ ]
optimized/ami/100ms/ls_eend_ami_100ms.mlmodelc/model.mil ADDED
The diff for this file is too large to render. See raw diff
 
optimized/ami/100ms/ls_eend_ami_100ms.mlmodelc/weights/weight.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f77a11836cfdeff2a24070c9eb01c8a3146d23216ebd63a5f02bf154a5e95aa7
3
+ size 44401536
optimized/ami/100ms/ls_eend_ami_100ms.mlpackage/Data/com.apple.CoreML/model.mlmodel ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc177110db14e37e532be41bb47accb450f1ce89bb7876b675b2d4a25d797e1e
3
+ size 175266