Zhiyu Cheng commited on
Commit
b2cbce0
1 Parent(s): 5512a28

add model checkpoints and config files

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. config.json +39 -0
  2. generation_config.json +12 -0
  3. hf_quant_config.json +10 -0
  4. model-00001-of-00086.safetensors +3 -0
  5. model-00002-of-00086.safetensors +3 -0
  6. model-00003-of-00086.safetensors +3 -0
  7. model-00004-of-00086.safetensors +3 -0
  8. model-00005-of-00086.safetensors +3 -0
  9. model-00006-of-00086.safetensors +3 -0
  10. model-00007-of-00086.safetensors +3 -0
  11. model-00008-of-00086.safetensors +3 -0
  12. model-00009-of-00086.safetensors +3 -0
  13. model-00010-of-00086.safetensors +3 -0
  14. model-00011-of-00086.safetensors +3 -0
  15. model-00012-of-00086.safetensors +3 -0
  16. model-00013-of-00086.safetensors +3 -0
  17. model-00014-of-00086.safetensors +3 -0
  18. model-00015-of-00086.safetensors +3 -0
  19. model-00016-of-00086.safetensors +3 -0
  20. model-00017-of-00086.safetensors +3 -0
  21. model-00018-of-00086.safetensors +3 -0
  22. model-00019-of-00086.safetensors +3 -0
  23. model-00020-of-00086.safetensors +3 -0
  24. model-00021-of-00086.safetensors +3 -0
  25. model-00022-of-00086.safetensors +3 -0
  26. model-00023-of-00086.safetensors +3 -0
  27. model-00024-of-00086.safetensors +3 -0
  28. model-00025-of-00086.safetensors +3 -0
  29. model-00026-of-00086.safetensors +3 -0
  30. model-00027-of-00086.safetensors +3 -0
  31. model-00028-of-00086.safetensors +3 -0
  32. model-00029-of-00086.safetensors +3 -0
  33. model-00030-of-00086.safetensors +3 -0
  34. model-00031-of-00086.safetensors +3 -0
  35. model-00032-of-00086.safetensors +3 -0
  36. model-00033-of-00086.safetensors +3 -0
  37. model-00034-of-00086.safetensors +3 -0
  38. model-00035-of-00086.safetensors +3 -0
  39. model-00036-of-00086.safetensors +3 -0
  40. model-00037-of-00086.safetensors +3 -0
  41. model-00038-of-00086.safetensors +3 -0
  42. model-00039-of-00086.safetensors +3 -0
  43. model-00040-of-00086.safetensors +3 -0
  44. model-00041-of-00086.safetensors +3 -0
  45. model-00042-of-00086.safetensors +3 -0
  46. model-00043-of-00086.safetensors +3 -0
  47. model-00044-of-00086.safetensors +3 -0
  48. model-00045-of-00086.safetensors +3 -0
  49. model-00046-of-00086.safetensors +3 -0
  50. model-00047-of-00086.safetensors +3 -0
config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/opt/llama3.1/Meta-Llama-3.1-405B-Instruct/",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 128000,
9
+ "eos_token_id": [
10
+ 128001,
11
+ 128008,
12
+ 128009
13
+ ],
14
+ "hidden_act": "silu",
15
+ "hidden_size": 16384,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 53248,
18
+ "max_position_embeddings": 131072,
19
+ "mlp_bias": false,
20
+ "model_type": "llama",
21
+ "num_attention_heads": 128,
22
+ "num_hidden_layers": 126,
23
+ "num_key_value_heads": 8,
24
+ "pretraining_tp": 1,
25
+ "rms_norm_eps": 1e-05,
26
+ "rope_scaling": {
27
+ "factor": 8.0,
28
+ "high_freq_factor": 4.0,
29
+ "low_freq_factor": 1.0,
30
+ "original_max_position_embeddings": 8192,
31
+ "rope_type": "llama3"
32
+ },
33
+ "rope_theta": 500000.0,
34
+ "tie_word_embeddings": false,
35
+ "torch_dtype": "bfloat16",
36
+ "transformers_version": "4.43.4",
37
+ "use_cache": true,
38
+ "vocab_size": 128256
39
+ }
generation_config.json ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 128000,
3
+ "do_sample": true,
4
+ "eos_token_id": [
5
+ 128001,
6
+ 128008,
7
+ 128009
8
+ ],
9
+ "temperature": 0.6,
10
+ "top_p": 0.9,
11
+ "transformers_version": "4.43.4"
12
+ }
hf_quant_config.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "producer": {
3
+ "name": "modelopt",
4
+ "version": "0.17.0"
5
+ },
6
+ "quantization": {
7
+ "quant_algo": "FP8",
8
+ "kv_cache_quant_algo": null
9
+ }
10
+ }
model-00001-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abc799746b86eb40c887f3b66c1f1df1b66c8925dcd6ce372cc21193c6a54fad
3
+ size 4773119376
model-00002-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3917a62ef49efce3331d1621f6409c5466b17fbe757cc00bdefafd2180dc8236
3
+ size 4932570128
model-00003-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a46ee529d528b6dfec63878fb7e65398fc8887ce3249504031757193c5cde7d2
3
+ size 4630646976
model-00004-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7db50ebf9f0f94dede49b1b4f100cf93766f512fb3d457d85eec3f629e50319b
3
+ size 4932570128
model-00005-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4a377187b3de8e1c296b21fefb4b0e88fa9e7d0eb4cb94f2862065384c3cccb5
3
+ size 4630646976
model-00006-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cab4217ce8fa627e56f1c60cf4e6184c495c0be8fa18eb960cedd7a9788daeed
3
+ size 4932570128
model-00007-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd2a4ad3ee10bede80e5e8f3340f4a063a0d45f02a5c0c2cd77ed3708ed89f30
3
+ size 4630646976
model-00008-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:341eeb2f36f63dcd86cd081bd6b1de8355a52eecd3facddb355b5b9d2576d3dc
3
+ size 4932570144
model-00009-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b32ee8cba42ec8696b9de04a206aca20d5ef7f2c77b56e804162a5c0cb4e938
3
+ size 4630647016
model-00010-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4413283a100aa3138b9fddd530c585fe3a881cce69df617ebfbc6808d71a1d62
3
+ size 4932570160
model-00011-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f3df58f7599098b9d444c9e0ed1119509adcb11fba4aae66a2bd5c4ef336c32
3
+ size 4630647016
model-00012-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7e2cf786c5195d3a59b33faf8e73fb39bb691957fab0d9811e35517e998f086
3
+ size 4932570160
model-00013-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d729c1da0bc71319f56a0dbafc6d2d7f1a0bf25f3d47dc69298a32bdcfa55b4f
3
+ size 4630647016
model-00014-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89cefccb0925f1b897097ac951c3c5d546453413d1747e18fb2377753e6da8b9
3
+ size 4932570160
model-00015-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab1e9fabbe6a02391b5e614dcfbaf30cabf5c27b973cc3692fc73f17d378d214
3
+ size 4630647016
model-00016-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:99e71eacfd0a418738f1e2e12bbd533058f9c8ef9c7fe6d58315284254abe476
3
+ size 4932570160
model-00017-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bc77d66e6ede3e0e10b225b732d7a52f53f4124eb49201698ba5741404b164a
3
+ size 4630647016
model-00018-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a63590a0766446e17835a4b3d3407688b0c2c795f9eabc09097bdb0274acd42d
3
+ size 4932570160
model-00019-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44b7f24cc86576cc9b2ba0b6e9d06f77bfd2d99f34b9a08e139b047f1c916a48
3
+ size 4630647016
model-00020-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbf3b420b673a88769315d8ce192807bc38dbc849c3ae54567d620aaec93b7ed
3
+ size 4932570160
model-00021-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa037061ff66d64fa0a3152c32bf2452cac8a87848a99fa50b1b894d6806d9af
3
+ size 4630647016
model-00022-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:48e9e21b78f808c4726a896d3791a26c787c053bc2182e109ce5b3b2c5e81ca1
3
+ size 4932570160
model-00023-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:325fc87cb641c0f8632170b4d2ecbb8a790fa42cf4d52c416d86c5389049b949
3
+ size 4630647016
model-00024-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c8faaeba1c8166d9ab926d84a05672e6adeb687e03b20724e929945d51f01ca9
3
+ size 4932570160
model-00025-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4709e6e48efaf8c73933f06ac7e90f689ef07b80811707291e241499139dca17
3
+ size 4630647016
model-00026-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d29e50dd7fb1d407180bf86bc641a216a77b4be74af130bb930449e3c634347
3
+ size 4932570160
model-00027-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6175ba8c92031a46d81ab5b815724110f155fa6efdf00e162e58b5b92dc04a5f
3
+ size 4630647016
model-00028-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b08da7afe4edfc8863829ece066ad2306d31e9190cae1f7498e0ada93a0efdc
3
+ size 4932570160
model-00029-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:375bc82d48d042ab84f72e6c986b954b865286b15b5f364b0c23c1526d7105ab
3
+ size 4630647016
model-00030-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:abdea2093d079a3f01f2dc2815412fb8fb58b3932448c10a5dbef3325b71d4c4
3
+ size 4932570160
model-00031-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7f83ffc139abca27d301931d476347e484375f4100e0d98ca68b8cb86e52a65d
3
+ size 4630647016
model-00032-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71e78f25c7b2d4db79f65373d597970173872b9a3808401b61d3b28701166244
3
+ size 4932570160
model-00033-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fbb68bc5ef9951cf2e0182a0b5cbc3e3773de794614cd869cda015fd4c386678
3
+ size 4630647016
model-00034-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:964c58979eb119eb30d052da0251c14138629d12522cb664475c354cff1852d9
3
+ size 4932570160
model-00035-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2b5aa300bab7e85a480d0a7f847816a458398da82a4f8a5e512b19564809e3a
3
+ size 4630647016
model-00036-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd9195bea0a5992ddce286acfcc2ee9de6859ae8faaaf9176fcb854c89803248
3
+ size 4932570160
model-00037-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67915574bf7aad4ee6299f33e7706ab40e0437e3014ea6c7a42eadcf582099cc
3
+ size 4630647016
model-00038-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd5cdcddffcd6b4dde66e5fe7070e8a8447981471baf345fa1db79b080d8e521
3
+ size 4932570160
model-00039-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:027128272cf78c68ce3261cd8f7834f3e52e1d12bd7793fe2f3f10e33f629d3a
3
+ size 4630647016
model-00040-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33fd2044a3e4a6f5557953fe7683d8fdbd37a83d56568e5aacf2435d7e8d3a5b
3
+ size 4932570160
model-00041-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94fb40bbc0a21692d63a2b59a3648cf5d380232e53f38860a28f5d907fc4c6a9
3
+ size 4630647016
model-00042-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc8e474d63ebeb8a0704470bd45a83c4d6d3214e189b250e0c010ebcc2363ec1
3
+ size 4932570160
model-00043-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:42ddf7231acf5bc45f939b6aeba554cb6fbe4753f3ac815df5c9b50f06484e11
3
+ size 4630647016
model-00044-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25b2f8a47a783918462e4ba4547bf98645dad8203fd22b5f78b773d82d4bba3d
3
+ size 4932570160
model-00045-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fadbc818e2f55c0cc1750e64502436aa7965682fe80db297a83861eda1bcd2a4
3
+ size 4630647016
model-00046-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c02fe1c87881687b3b0927cfbc1ec41edf3c4f7cf87f4db7661a3f89fc7ebc0
3
+ size 4932570160
model-00047-of-00086.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21f03e7a296a843b26247ec41217d6d5ce9e6abcc251fbeae5d66519eeb8cb4b
3
+ size 4630647016