steerapi commited on
Commit
d786ec5
1 Parent(s): c384d8b

Upload folder using huggingface_hub

Browse files
.ipynb_checkpoints/config-checkpoint.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "./models/TheBloke/Llama-2-7b-chat-fp16-w8-g128",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "bos_token_id": 1,
7
+ "eos_token_id": 2,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 4096,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 11008,
12
+ "max_position_embeddings": 4096,
13
+ "model_type": "llama",
14
+ "num_attention_heads": 32,
15
+ "num_hidden_layers": 32,
16
+ "num_key_value_heads": 32,
17
+ "pad_token_id": 0,
18
+ "pretraining_tp": 1,
19
+ "rms_norm_eps": 1e-06,
20
+ "rope_scaling": null,
21
+ "tie_word_embeddings": false,
22
+ "transformers_version": "4.31.0",
23
+ "use_cache": true,
24
+ "vocab_size": 32000
25
+ }
onnx/decoder_model_merged_quantized.onnx CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0e70270bfcc9239918ab41968230e6c96b228c6c8d1fdb0bdb09881631eadf4a
3
- size 332371926
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8e5997534698ab4dcebafbc8267016e17e6ce336592dba2d82cd8be61bd50d7
3
+ size 330767818
onnx/decoder_model_merged_quantized.onnx_data CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6752bb9b7e955f3098598555e3716f5da689ae2ee8c416b85b37ef873dddbc6
3
  size 6739214336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1166a5e8849121216988329ee41544c2e6c31d3c9e570f44825c8974ecd04ddb
3
  size 6739214336
onnx/quantize_config.json CHANGED
@@ -5,35 +5,35 @@
5
  "per_model_config": {
6
  "decoder_model_merged": {
7
  "op_types": [
 
 
 
8
  "Div",
9
- "Less",
10
- "Cast",
11
- "Add",
12
- "Unsqueeze",
13
- "Reshape",
14
- "Shape",
15
- "Range",
16
  "Concat",
17
- "Where",
18
- "Identity",
19
- "MatMul",
20
  "Equal",
21
- "Softmax",
22
- "ConstantOfShape",
23
- "Neg",
24
- "Slice",
25
  "Sqrt",
26
- "Expand",
27
- "Sigmoid",
28
  "Transpose",
29
  "ReduceMean",
30
- "Mul",
31
  "Squeeze",
32
  "Sub",
33
- "Pow",
 
 
 
 
34
  "Gather",
35
- "If",
36
- "Constant"
 
 
 
 
 
 
 
37
  ],
38
  "weight_type": "QInt8"
39
  }
 
5
  "per_model_config": {
6
  "decoder_model_merged": {
7
  "op_types": [
8
+ "Where",
9
+ "Softmax",
10
+ "Pow",
11
  "Div",
12
+ "If",
 
 
 
 
 
 
13
  "Concat",
 
 
 
14
  "Equal",
15
+ "Cast",
 
 
 
16
  "Sqrt",
 
 
17
  "Transpose",
18
  "ReduceMean",
19
+ "Range",
20
  "Squeeze",
21
  "Sub",
22
+ "Identity",
23
+ "Shape",
24
+ "MatMul",
25
+ "Unsqueeze",
26
+ "Sigmoid",
27
  "Gather",
28
+ "ConstantOfShape",
29
+ "Expand",
30
+ "Add",
31
+ "Neg",
32
+ "Constant",
33
+ "Mul",
34
+ "Reshape",
35
+ "Less",
36
+ "Slice"
37
  ],
38
  "weight_type": "QInt8"
39
  }