Zhiyu Cheng
commited on
Commit
•
b2cbce0
1
Parent(s):
5512a28
add model checkpoints and config files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- config.json +39 -0
- generation_config.json +12 -0
- hf_quant_config.json +10 -0
- model-00001-of-00086.safetensors +3 -0
- model-00002-of-00086.safetensors +3 -0
- model-00003-of-00086.safetensors +3 -0
- model-00004-of-00086.safetensors +3 -0
- model-00005-of-00086.safetensors +3 -0
- model-00006-of-00086.safetensors +3 -0
- model-00007-of-00086.safetensors +3 -0
- model-00008-of-00086.safetensors +3 -0
- model-00009-of-00086.safetensors +3 -0
- model-00010-of-00086.safetensors +3 -0
- model-00011-of-00086.safetensors +3 -0
- model-00012-of-00086.safetensors +3 -0
- model-00013-of-00086.safetensors +3 -0
- model-00014-of-00086.safetensors +3 -0
- model-00015-of-00086.safetensors +3 -0
- model-00016-of-00086.safetensors +3 -0
- model-00017-of-00086.safetensors +3 -0
- model-00018-of-00086.safetensors +3 -0
- model-00019-of-00086.safetensors +3 -0
- model-00020-of-00086.safetensors +3 -0
- model-00021-of-00086.safetensors +3 -0
- model-00022-of-00086.safetensors +3 -0
- model-00023-of-00086.safetensors +3 -0
- model-00024-of-00086.safetensors +3 -0
- model-00025-of-00086.safetensors +3 -0
- model-00026-of-00086.safetensors +3 -0
- model-00027-of-00086.safetensors +3 -0
- model-00028-of-00086.safetensors +3 -0
- model-00029-of-00086.safetensors +3 -0
- model-00030-of-00086.safetensors +3 -0
- model-00031-of-00086.safetensors +3 -0
- model-00032-of-00086.safetensors +3 -0
- model-00033-of-00086.safetensors +3 -0
- model-00034-of-00086.safetensors +3 -0
- model-00035-of-00086.safetensors +3 -0
- model-00036-of-00086.safetensors +3 -0
- model-00037-of-00086.safetensors +3 -0
- model-00038-of-00086.safetensors +3 -0
- model-00039-of-00086.safetensors +3 -0
- model-00040-of-00086.safetensors +3 -0
- model-00041-of-00086.safetensors +3 -0
- model-00042-of-00086.safetensors +3 -0
- model-00043-of-00086.safetensors +3 -0
- model-00044-of-00086.safetensors +3 -0
- model-00045-of-00086.safetensors +3 -0
- model-00046-of-00086.safetensors +3 -0
- model-00047-of-00086.safetensors +3 -0
config.json
ADDED
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "/opt/llama3.1/Meta-Llama-3.1-405B-Instruct/",
|
3 |
+
"architectures": [
|
4 |
+
"LlamaForCausalLM"
|
5 |
+
],
|
6 |
+
"attention_bias": false,
|
7 |
+
"attention_dropout": 0.0,
|
8 |
+
"bos_token_id": 128000,
|
9 |
+
"eos_token_id": [
|
10 |
+
128001,
|
11 |
+
128008,
|
12 |
+
128009
|
13 |
+
],
|
14 |
+
"hidden_act": "silu",
|
15 |
+
"hidden_size": 16384,
|
16 |
+
"initializer_range": 0.02,
|
17 |
+
"intermediate_size": 53248,
|
18 |
+
"max_position_embeddings": 131072,
|
19 |
+
"mlp_bias": false,
|
20 |
+
"model_type": "llama",
|
21 |
+
"num_attention_heads": 128,
|
22 |
+
"num_hidden_layers": 126,
|
23 |
+
"num_key_value_heads": 8,
|
24 |
+
"pretraining_tp": 1,
|
25 |
+
"rms_norm_eps": 1e-05,
|
26 |
+
"rope_scaling": {
|
27 |
+
"factor": 8.0,
|
28 |
+
"high_freq_factor": 4.0,
|
29 |
+
"low_freq_factor": 1.0,
|
30 |
+
"original_max_position_embeddings": 8192,
|
31 |
+
"rope_type": "llama3"
|
32 |
+
},
|
33 |
+
"rope_theta": 500000.0,
|
34 |
+
"tie_word_embeddings": false,
|
35 |
+
"torch_dtype": "bfloat16",
|
36 |
+
"transformers_version": "4.43.4",
|
37 |
+
"use_cache": true,
|
38 |
+
"vocab_size": 128256
|
39 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token_id": 128000,
|
3 |
+
"do_sample": true,
|
4 |
+
"eos_token_id": [
|
5 |
+
128001,
|
6 |
+
128008,
|
7 |
+
128009
|
8 |
+
],
|
9 |
+
"temperature": 0.6,
|
10 |
+
"top_p": 0.9,
|
11 |
+
"transformers_version": "4.43.4"
|
12 |
+
}
|
hf_quant_config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"producer": {
|
3 |
+
"name": "modelopt",
|
4 |
+
"version": "0.17.0"
|
5 |
+
},
|
6 |
+
"quantization": {
|
7 |
+
"quant_algo": "FP8",
|
8 |
+
"kv_cache_quant_algo": null
|
9 |
+
}
|
10 |
+
}
|
model-00001-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:abc799746b86eb40c887f3b66c1f1df1b66c8925dcd6ce372cc21193c6a54fad
|
3 |
+
size 4773119376
|
model-00002-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3917a62ef49efce3331d1621f6409c5466b17fbe757cc00bdefafd2180dc8236
|
3 |
+
size 4932570128
|
model-00003-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a46ee529d528b6dfec63878fb7e65398fc8887ce3249504031757193c5cde7d2
|
3 |
+
size 4630646976
|
model-00004-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7db50ebf9f0f94dede49b1b4f100cf93766f512fb3d457d85eec3f629e50319b
|
3 |
+
size 4932570128
|
model-00005-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4a377187b3de8e1c296b21fefb4b0e88fa9e7d0eb4cb94f2862065384c3cccb5
|
3 |
+
size 4630646976
|
model-00006-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cab4217ce8fa627e56f1c60cf4e6184c495c0be8fa18eb960cedd7a9788daeed
|
3 |
+
size 4932570128
|
model-00007-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd2a4ad3ee10bede80e5e8f3340f4a063a0d45f02a5c0c2cd77ed3708ed89f30
|
3 |
+
size 4630646976
|
model-00008-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:341eeb2f36f63dcd86cd081bd6b1de8355a52eecd3facddb355b5b9d2576d3dc
|
3 |
+
size 4932570144
|
model-00009-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b32ee8cba42ec8696b9de04a206aca20d5ef7f2c77b56e804162a5c0cb4e938
|
3 |
+
size 4630647016
|
model-00010-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4413283a100aa3138b9fddd530c585fe3a881cce69df617ebfbc6808d71a1d62
|
3 |
+
size 4932570160
|
model-00011-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0f3df58f7599098b9d444c9e0ed1119509adcb11fba4aae66a2bd5c4ef336c32
|
3 |
+
size 4630647016
|
model-00012-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d7e2cf786c5195d3a59b33faf8e73fb39bb691957fab0d9811e35517e998f086
|
3 |
+
size 4932570160
|
model-00013-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d729c1da0bc71319f56a0dbafc6d2d7f1a0bf25f3d47dc69298a32bdcfa55b4f
|
3 |
+
size 4630647016
|
model-00014-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:89cefccb0925f1b897097ac951c3c5d546453413d1747e18fb2377753e6da8b9
|
3 |
+
size 4932570160
|
model-00015-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ab1e9fabbe6a02391b5e614dcfbaf30cabf5c27b973cc3692fc73f17d378d214
|
3 |
+
size 4630647016
|
model-00016-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:99e71eacfd0a418738f1e2e12bbd533058f9c8ef9c7fe6d58315284254abe476
|
3 |
+
size 4932570160
|
model-00017-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3bc77d66e6ede3e0e10b225b732d7a52f53f4124eb49201698ba5741404b164a
|
3 |
+
size 4630647016
|
model-00018-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a63590a0766446e17835a4b3d3407688b0c2c795f9eabc09097bdb0274acd42d
|
3 |
+
size 4932570160
|
model-00019-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:44b7f24cc86576cc9b2ba0b6e9d06f77bfd2d99f34b9a08e139b047f1c916a48
|
3 |
+
size 4630647016
|
model-00020-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cbf3b420b673a88769315d8ce192807bc38dbc849c3ae54567d620aaec93b7ed
|
3 |
+
size 4932570160
|
model-00021-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fa037061ff66d64fa0a3152c32bf2452cac8a87848a99fa50b1b894d6806d9af
|
3 |
+
size 4630647016
|
model-00022-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:48e9e21b78f808c4726a896d3791a26c787c053bc2182e109ce5b3b2c5e81ca1
|
3 |
+
size 4932570160
|
model-00023-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:325fc87cb641c0f8632170b4d2ecbb8a790fa42cf4d52c416d86c5389049b949
|
3 |
+
size 4630647016
|
model-00024-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c8faaeba1c8166d9ab926d84a05672e6adeb687e03b20724e929945d51f01ca9
|
3 |
+
size 4932570160
|
model-00025-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4709e6e48efaf8c73933f06ac7e90f689ef07b80811707291e241499139dca17
|
3 |
+
size 4630647016
|
model-00026-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7d29e50dd7fb1d407180bf86bc641a216a77b4be74af130bb930449e3c634347
|
3 |
+
size 4932570160
|
model-00027-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6175ba8c92031a46d81ab5b815724110f155fa6efdf00e162e58b5b92dc04a5f
|
3 |
+
size 4630647016
|
model-00028-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0b08da7afe4edfc8863829ece066ad2306d31e9190cae1f7498e0ada93a0efdc
|
3 |
+
size 4932570160
|
model-00029-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:375bc82d48d042ab84f72e6c986b954b865286b15b5f364b0c23c1526d7105ab
|
3 |
+
size 4630647016
|
model-00030-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:abdea2093d079a3f01f2dc2815412fb8fb58b3932448c10a5dbef3325b71d4c4
|
3 |
+
size 4932570160
|
model-00031-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:7f83ffc139abca27d301931d476347e484375f4100e0d98ca68b8cb86e52a65d
|
3 |
+
size 4630647016
|
model-00032-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:71e78f25c7b2d4db79f65373d597970173872b9a3808401b61d3b28701166244
|
3 |
+
size 4932570160
|
model-00033-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fbb68bc5ef9951cf2e0182a0b5cbc3e3773de794614cd869cda015fd4c386678
|
3 |
+
size 4630647016
|
model-00034-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:964c58979eb119eb30d052da0251c14138629d12522cb664475c354cff1852d9
|
3 |
+
size 4932570160
|
model-00035-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e2b5aa300bab7e85a480d0a7f847816a458398da82a4f8a5e512b19564809e3a
|
3 |
+
size 4630647016
|
model-00036-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bd9195bea0a5992ddce286acfcc2ee9de6859ae8faaaf9176fcb854c89803248
|
3 |
+
size 4932570160
|
model-00037-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:67915574bf7aad4ee6299f33e7706ab40e0437e3014ea6c7a42eadcf582099cc
|
3 |
+
size 4630647016
|
model-00038-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dd5cdcddffcd6b4dde66e5fe7070e8a8447981471baf345fa1db79b080d8e521
|
3 |
+
size 4932570160
|
model-00039-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:027128272cf78c68ce3261cd8f7834f3e52e1d12bd7793fe2f3f10e33f629d3a
|
3 |
+
size 4630647016
|
model-00040-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33fd2044a3e4a6f5557953fe7683d8fdbd37a83d56568e5aacf2435d7e8d3a5b
|
3 |
+
size 4932570160
|
model-00041-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:94fb40bbc0a21692d63a2b59a3648cf5d380232e53f38860a28f5d907fc4c6a9
|
3 |
+
size 4630647016
|
model-00042-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bc8e474d63ebeb8a0704470bd45a83c4d6d3214e189b250e0c010ebcc2363ec1
|
3 |
+
size 4932570160
|
model-00043-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:42ddf7231acf5bc45f939b6aeba554cb6fbe4753f3ac815df5c9b50f06484e11
|
3 |
+
size 4630647016
|
model-00044-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:25b2f8a47a783918462e4ba4547bf98645dad8203fd22b5f78b773d82d4bba3d
|
3 |
+
size 4932570160
|
model-00045-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fadbc818e2f55c0cc1750e64502436aa7965682fe80db297a83861eda1bcd2a4
|
3 |
+
size 4630647016
|
model-00046-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0c02fe1c87881687b3b0927cfbc1ec41edf3c4f7cf87f4db7661a3f89fc7ebc0
|
3 |
+
size 4932570160
|
model-00047-of-00086.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21f03e7a296a843b26247ec41217d6d5ce9e6abcc251fbeae5d66519eeb8cb4b
|
3 |
+
size 4630647016
|