khu commited on
Commit
d297277
·
unverified ·
1 Parent(s): 7375c6b
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "avsolatorio/GIST-small-Embedding-v0",
3
  "architectures": [
4
  "BertModel"
5
  ],
@@ -21,6 +21,7 @@
21
  "model_type": "bert",
22
  "num_attention_heads": 12,
23
  "num_hidden_layers": 12,
 
24
  "pad_token_id": 0,
25
  "position_embedding_type": "absolute",
26
  "transformers_version": "4.38.1",
 
1
  {
2
+ "_name_or_path": "avsolatorio/NoInstruct-small-Embedding-v0",
3
  "architectures": [
4
  "BertModel"
5
  ],
 
21
  "model_type": "bert",
22
  "num_attention_heads": 12,
23
  "num_hidden_layers": 12,
24
+ "output_hidden_states": true,
25
  "pad_token_id": 0,
26
  "position_embedding_type": "absolute",
27
  "transformers_version": "4.38.1",
onnx/model.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8c95502dc23d4ffb50a81d3838f721abde71509982d720d355ce52c0cdfe2dfe
3
  size 133093492
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37a0a5eb063d3479ad8f0d764cb99cc71fd51b12b0c0d2b85b58c5423f821d3d
3
  size 133093492
onnx/model_bnb4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b169590fdbc586696c2bf29346a51b8dffa2e1196f878aa4b115eb2e486a6790
3
+ size 60113951
onnx/model_fp16.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:195acee1af16e6f3ee605d2a71b4c900e2cee6fac8eb58fa36bd25b1cf228387
3
+ size 66749214
onnx/model_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43f4bed7552ac961cb329c5fbbc47dbddb2135fe7ae5c5f67d30f91acc608c6f
3
+ size 34015111
onnx/model_q4.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffd7cce04f628df93f7f6fc4c12e7eef2ff3e78459f80b77e62e02fde5d8524b
3
+ size 61440599
onnx/model_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:45b972073570f3af92b90b708ec580d63cca87654e2d5e7ccb2cf560aa881bf3
3
- size 34014427
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43f4bed7552ac961cb329c5fbbc47dbddb2135fe7ae5c5f67d30f91acc608c6f
3
+ size 34015111
onnx/model_uint8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b4e77809f071f798de78dd9358164415043afa2ce72c4c95f51ca2203444612
3
+ size 34015108
quantize_config.json CHANGED
@@ -1,30 +1,102 @@
1
  {
2
- "per_channel": true,
3
- "reduce_range": true,
4
- "per_model_config": {
5
- "model": {
6
- "op_types": [
7
- "Transpose",
8
- "Add",
9
- "Concat",
10
- "Shape",
11
- "Mul",
12
- "Reshape",
13
- "Gather",
14
- "Div",
15
- "Constant",
16
- "Sub",
17
- "Pow",
18
- "Slice",
19
- "Sqrt",
20
- "Erf",
21
- "Cast",
22
- "Softmax",
23
- "Unsqueeze",
24
- "ReduceMean",
25
- "MatMul"
26
- ],
27
- "weight_type": "QInt8"
28
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29
  }
30
  }
 
1
  {
2
+ "fp16": {},
3
+ "q8": {
4
+ "per_model_config": {
5
+ "model": {
6
+ "op_types": [
7
+ "Add",
8
+ "Cast",
9
+ "Concat",
10
+ "Constant",
11
+ "Div",
12
+ "Erf",
13
+ "Gather",
14
+ "MatMul",
15
+ "Mul",
16
+ "Pow",
17
+ "ReduceMean",
18
+ "Reshape",
19
+ "Shape",
20
+ "Slice",
21
+ "Softmax",
22
+ "Sqrt",
23
+ "Sub",
24
+ "Transpose",
25
+ "Unsqueeze"
26
+ ],
27
+ "weight_type": "QInt8"
28
+ }
29
+ },
30
+ "per_channel": true,
31
+ "reduce_range": true
32
+ },
33
+ "int8": {
34
+ "per_model_config": {
35
+ "model": {
36
+ "op_types": [
37
+ "Add",
38
+ "Cast",
39
+ "Concat",
40
+ "Constant",
41
+ "Div",
42
+ "Erf",
43
+ "Gather",
44
+ "MatMul",
45
+ "Mul",
46
+ "Pow",
47
+ "ReduceMean",
48
+ "Reshape",
49
+ "Shape",
50
+ "Slice",
51
+ "Softmax",
52
+ "Sqrt",
53
+ "Sub",
54
+ "Transpose",
55
+ "Unsqueeze"
56
+ ],
57
+ "weight_type": "QInt8"
58
+ }
59
+ },
60
+ "per_channel": true,
61
+ "reduce_range": true
62
+ },
63
+ "uint8": {
64
+ "per_model_config": {
65
+ "model": {
66
+ "op_types": [
67
+ "Add",
68
+ "Cast",
69
+ "Concat",
70
+ "Constant",
71
+ "Div",
72
+ "Erf",
73
+ "Gather",
74
+ "MatMul",
75
+ "Mul",
76
+ "Pow",
77
+ "ReduceMean",
78
+ "Reshape",
79
+ "Shape",
80
+ "Slice",
81
+ "Softmax",
82
+ "Sqrt",
83
+ "Sub",
84
+ "Transpose",
85
+ "Unsqueeze"
86
+ ],
87
+ "weight_type": "QUInt8"
88
+ }
89
+ },
90
+ "per_channel": true,
91
+ "reduce_range": true
92
+ },
93
+ "q4": {
94
+ "block_size": 32,
95
+ "is_symmetric": true,
96
+ "accuracy_level": null
97
+ },
98
+ "bnb4": {
99
+ "block_size": 64,
100
+ "quant_type": 1
101
  }
102
  }