Xenova HF staff commited on
Commit
58d08a9
1 Parent(s): cdaf8fe

Upload folder using huggingface_hub

Browse files
.gitattributes CHANGED
@@ -33,3 +33,6 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ onnx/decoder_model.onnx_data filter=lfs diff=lfs merge=lfs -text
37
+ onnx/decoder_model_merged.onnx_data filter=lfs diff=lfs merge=lfs -text
38
+ onnx/decoder_with_past_model.onnx_data filter=lfs diff=lfs merge=lfs -text
config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "./moondream2-text_model",
3
+ "architectures": [
4
+ "PhiForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "embd_pdrop": 0.0,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu_new",
11
+ "hidden_size": 2048,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 8192,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 2048,
16
+ "model_type": "phi",
17
+ "num_attention_heads": 32,
18
+ "num_hidden_layers": 24,
19
+ "num_key_value_heads": 32,
20
+ "partial_rotary_factor": 0.5,
21
+ "qk_layernorm": false,
22
+ "resid_pdrop": 0.0,
23
+ "rope_scaling": null,
24
+ "rope_theta": 10000.0,
25
+ "tie_word_embeddings": false,
26
+ "transformers_version": "4.38.2",
27
+ "use_cache": true,
28
+ "vocab_size": 51200
29
+ }
generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "eos_token_id": 2,
5
+ "transformers_version": "4.38.2"
6
+ }
onnx/decoder_model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66e3a2eece79e2986c9061c6ba3b548c959d21acff08abd8dd279ff0650a6ea1
3
+ size 1332769
onnx/decoder_model.onnx_data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3eb2cb6ac64b581d8b3cefb3f941194fb19fa1fd90687405a21b87ce1ab51a8a
3
+ size 5251522560
onnx/decoder_model_bnb4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca0b8e6b5aa27524a08702d7111050c5a2d880245c171d9c4382f47aaf857007
3
+ size 740058641
onnx/decoder_model_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c868b4c46feb8e29b1eb3150e6216cff6f31a58c26e87cc79ba9afc890f61ae6
3
+ size 1314802563
onnx/decoder_model_merged.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d2d10e2714ea2855de5aba2ab6b2f72401e27e249b1631ab79a703739a1ea2a
3
+ size 2683038
onnx/decoder_model_merged.onnx_data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3eb2cb6ac64b581d8b3cefb3f941194fb19fa1fd90687405a21b87ce1ab51a8a
3
+ size 5251522560
onnx/decoder_model_merged_bnb4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9a7ef8141d935d5b55c5b942346be9693f659593931f4c6bb7522f13b7f8e827
3
+ size 741428457
onnx/decoder_model_merged_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f60146bf85d9fa669fa6ea5ab6a47461ad7bbc5ccbc79794b6f5298d86e8d855
3
+ size 1316563302
onnx/decoder_model_merged_q4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d49207b7a5dba74e08ac780c216ba1618070ddbe9cd78639d71721758a0a66bf
3
+ size 823477041
onnx/decoder_model_merged_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f60146bf85d9fa669fa6ea5ab6a47461ad7bbc5ccbc79794b6f5298d86e8d855
3
+ size 1316563302
onnx/decoder_model_merged_uint8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8067e5e7f201435304a139fe9c18901c881b4700e46b5cf775158fdef1c4a88a
3
+ size 1316563370
onnx/decoder_model_q4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b2847e6f2bd670eb4bb8435d15cc18869a1538b2993777487ec5267ac62fac0
3
+ size 822108530
onnx/decoder_model_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c868b4c46feb8e29b1eb3150e6216cff6f31a58c26e87cc79ba9afc890f61ae6
3
+ size 1314802563
onnx/decoder_model_uint8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32f40e751fa6b87242f423c4adc308361500a31ecf519a2ca893589fb883b68b
3
+ size 1314802631
onnx/decoder_with_past_model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e62ed7b277d41f43ac3c3e2613afed8d07b2a34743a3c476e7fb2dc5df44ea31
3
+ size 1355086
onnx/decoder_with_past_model.onnx_data ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3eb2cb6ac64b581d8b3cefb3f941194fb19fa1fd90687405a21b87ce1ab51a8a
3
+ size 5251522560
onnx/decoder_with_past_model_bnb4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f882ae5a7cf9edb6c310cbc2d475c7157bcc7f26cacbe6e447639e256324069
3
+ size 740079468
onnx/decoder_with_past_model_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aaf4beaa2ececc67d56a22f3d09408b144556934baa919945c07c40a8dbb469f
3
+ size 1314829011
onnx/decoder_with_past_model_q4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a1fb50282c8d42cfeb2fba02bc93660656dbffe63150b83824de91e17c9fe7d
3
+ size 822129357
onnx/decoder_with_past_model_quantized.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aaf4beaa2ececc67d56a22f3d09408b144556934baa919945c07c40a8dbb469f
3
+ size 1314829011
onnx/decoder_with_past_model_uint8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecd25b5d7d466dfff9e34d34eea5e9a98320f77f14ca76b9056da08c5325deb2
3
+ size 1314829079
quantize_config.json ADDED
@@ -0,0 +1,329 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "q8": {
3
+ "per_model_config": {
4
+ "decoder_with_past_model": {
5
+ "op_types": [
6
+ "Add",
7
+ "Cast",
8
+ "Concat",
9
+ "Constant",
10
+ "ConstantOfShape",
11
+ "Div",
12
+ "Equal",
13
+ "Expand",
14
+ "Gather",
15
+ "Identity",
16
+ "Less",
17
+ "MatMul",
18
+ "Mul",
19
+ "Neg",
20
+ "Pow",
21
+ "Range",
22
+ "ReduceMean",
23
+ "Reshape",
24
+ "Shape",
25
+ "Slice",
26
+ "Softmax",
27
+ "Sqrt",
28
+ "Squeeze",
29
+ "Sub",
30
+ "Tanh",
31
+ "Transpose",
32
+ "Unsqueeze",
33
+ "Where"
34
+ ],
35
+ "weight_type": "QInt8"
36
+ },
37
+ "decoder_model": {
38
+ "op_types": [
39
+ "Add",
40
+ "Cast",
41
+ "Concat",
42
+ "Constant",
43
+ "ConstantOfShape",
44
+ "Div",
45
+ "Equal",
46
+ "Expand",
47
+ "Gather",
48
+ "Identity",
49
+ "Less",
50
+ "MatMul",
51
+ "Mul",
52
+ "Neg",
53
+ "Pow",
54
+ "Range",
55
+ "ReduceMean",
56
+ "Reshape",
57
+ "Shape",
58
+ "Slice",
59
+ "Softmax",
60
+ "Sqrt",
61
+ "Squeeze",
62
+ "Sub",
63
+ "Tanh",
64
+ "Transpose",
65
+ "Unsqueeze",
66
+ "Where"
67
+ ],
68
+ "weight_type": "QInt8"
69
+ },
70
+ "decoder_model_merged": {
71
+ "op_types": [
72
+ "Add",
73
+ "Cast",
74
+ "Concat",
75
+ "Constant",
76
+ "ConstantOfShape",
77
+ "Div",
78
+ "Equal",
79
+ "Expand",
80
+ "Gather",
81
+ "Identity",
82
+ "If",
83
+ "Less",
84
+ "MatMul",
85
+ "Mul",
86
+ "Neg",
87
+ "Pow",
88
+ "Range",
89
+ "ReduceMean",
90
+ "Reshape",
91
+ "Shape",
92
+ "Slice",
93
+ "Softmax",
94
+ "Sqrt",
95
+ "Squeeze",
96
+ "Sub",
97
+ "Tanh",
98
+ "Transpose",
99
+ "Unsqueeze",
100
+ "Where"
101
+ ],
102
+ "weight_type": "QInt8"
103
+ }
104
+ },
105
+ "per_channel": false,
106
+ "reduce_range": false
107
+ },
108
+ "int8": {
109
+ "per_model_config": {
110
+ "decoder_with_past_model": {
111
+ "op_types": [
112
+ "Add",
113
+ "Cast",
114
+ "Concat",
115
+ "Constant",
116
+ "ConstantOfShape",
117
+ "Div",
118
+ "Equal",
119
+ "Expand",
120
+ "Gather",
121
+ "Identity",
122
+ "Less",
123
+ "MatMul",
124
+ "Mul",
125
+ "Neg",
126
+ "Pow",
127
+ "Range",
128
+ "ReduceMean",
129
+ "Reshape",
130
+ "Shape",
131
+ "Slice",
132
+ "Softmax",
133
+ "Sqrt",
134
+ "Squeeze",
135
+ "Sub",
136
+ "Tanh",
137
+ "Transpose",
138
+ "Unsqueeze",
139
+ "Where"
140
+ ],
141
+ "weight_type": "QInt8"
142
+ },
143
+ "decoder_model": {
144
+ "op_types": [
145
+ "Add",
146
+ "Cast",
147
+ "Concat",
148
+ "Constant",
149
+ "ConstantOfShape",
150
+ "Div",
151
+ "Equal",
152
+ "Expand",
153
+ "Gather",
154
+ "Identity",
155
+ "Less",
156
+ "MatMul",
157
+ "Mul",
158
+ "Neg",
159
+ "Pow",
160
+ "Range",
161
+ "ReduceMean",
162
+ "Reshape",
163
+ "Shape",
164
+ "Slice",
165
+ "Softmax",
166
+ "Sqrt",
167
+ "Squeeze",
168
+ "Sub",
169
+ "Tanh",
170
+ "Transpose",
171
+ "Unsqueeze",
172
+ "Where"
173
+ ],
174
+ "weight_type": "QInt8"
175
+ },
176
+ "decoder_model_merged": {
177
+ "op_types": [
178
+ "Add",
179
+ "Cast",
180
+ "Concat",
181
+ "Constant",
182
+ "ConstantOfShape",
183
+ "Div",
184
+ "Equal",
185
+ "Expand",
186
+ "Gather",
187
+ "Identity",
188
+ "If",
189
+ "Less",
190
+ "MatMul",
191
+ "Mul",
192
+ "Neg",
193
+ "Pow",
194
+ "Range",
195
+ "ReduceMean",
196
+ "Reshape",
197
+ "Shape",
198
+ "Slice",
199
+ "Softmax",
200
+ "Sqrt",
201
+ "Squeeze",
202
+ "Sub",
203
+ "Tanh",
204
+ "Transpose",
205
+ "Unsqueeze",
206
+ "Where"
207
+ ],
208
+ "weight_type": "QInt8"
209
+ }
210
+ },
211
+ "per_channel": false,
212
+ "reduce_range": false
213
+ },
214
+ "uint8": {
215
+ "per_model_config": {
216
+ "decoder_with_past_model": {
217
+ "op_types": [
218
+ "Add",
219
+ "Cast",
220
+ "Concat",
221
+ "Constant",
222
+ "ConstantOfShape",
223
+ "Div",
224
+ "Equal",
225
+ "Expand",
226
+ "Gather",
227
+ "Identity",
228
+ "Less",
229
+ "MatMul",
230
+ "Mul",
231
+ "Neg",
232
+ "Pow",
233
+ "Range",
234
+ "ReduceMean",
235
+ "Reshape",
236
+ "Shape",
237
+ "Slice",
238
+ "Softmax",
239
+ "Sqrt",
240
+ "Squeeze",
241
+ "Sub",
242
+ "Tanh",
243
+ "Transpose",
244
+ "Unsqueeze",
245
+ "Where"
246
+ ],
247
+ "weight_type": "QUInt8"
248
+ },
249
+ "decoder_model": {
250
+ "op_types": [
251
+ "Add",
252
+ "Cast",
253
+ "Concat",
254
+ "Constant",
255
+ "ConstantOfShape",
256
+ "Div",
257
+ "Equal",
258
+ "Expand",
259
+ "Gather",
260
+ "Identity",
261
+ "Less",
262
+ "MatMul",
263
+ "Mul",
264
+ "Neg",
265
+ "Pow",
266
+ "Range",
267
+ "ReduceMean",
268
+ "Reshape",
269
+ "Shape",
270
+ "Slice",
271
+ "Softmax",
272
+ "Sqrt",
273
+ "Squeeze",
274
+ "Sub",
275
+ "Tanh",
276
+ "Transpose",
277
+ "Unsqueeze",
278
+ "Where"
279
+ ],
280
+ "weight_type": "QUInt8"
281
+ },
282
+ "decoder_model_merged": {
283
+ "op_types": [
284
+ "Add",
285
+ "Cast",
286
+ "Concat",
287
+ "Constant",
288
+ "ConstantOfShape",
289
+ "Div",
290
+ "Equal",
291
+ "Expand",
292
+ "Gather",
293
+ "Identity",
294
+ "If",
295
+ "Less",
296
+ "MatMul",
297
+ "Mul",
298
+ "Neg",
299
+ "Pow",
300
+ "Range",
301
+ "ReduceMean",
302
+ "Reshape",
303
+ "Shape",
304
+ "Slice",
305
+ "Softmax",
306
+ "Sqrt",
307
+ "Squeeze",
308
+ "Sub",
309
+ "Tanh",
310
+ "Transpose",
311
+ "Unsqueeze",
312
+ "Where"
313
+ ],
314
+ "weight_type": "QUInt8"
315
+ }
316
+ },
317
+ "per_channel": false,
318
+ "reduce_range": false
319
+ },
320
+ "q4": {
321
+ "block_size": 32,
322
+ "is_symmetric": true,
323
+ "accuracy_level": null
324
+ },
325
+ "bnb4": {
326
+ "block_size": 64,
327
+ "quant_type": 1
328
+ }
329
+ }