giannisan commited on
Commit
0a6e3a9
·
verified ·
1 Parent(s): 4f574ab

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. .ipynb_checkpoints/config-checkpoint.json +39 -0
  2. README.md +38 -0
  3. config.json +39 -0
  4. mergekit_config.yml +10 -0
  5. model-00001-of-00287.safetensors +3 -0
  6. model-00002-of-00287.safetensors +3 -0
  7. model-00003-of-00287.safetensors +3 -0
  8. model-00004-of-00287.safetensors +3 -0
  9. model-00005-of-00287.safetensors +3 -0
  10. model-00006-of-00287.safetensors +3 -0
  11. model-00007-of-00287.safetensors +3 -0
  12. model-00008-of-00287.safetensors +3 -0
  13. model-00009-of-00287.safetensors +3 -0
  14. model-00010-of-00287.safetensors +3 -0
  15. model-00011-of-00287.safetensors +3 -0
  16. model-00012-of-00287.safetensors +3 -0
  17. model-00013-of-00287.safetensors +3 -0
  18. model-00014-of-00287.safetensors +3 -0
  19. model-00015-of-00287.safetensors +3 -0
  20. model-00016-of-00287.safetensors +3 -0
  21. model-00017-of-00287.safetensors +3 -0
  22. model-00018-of-00287.safetensors +3 -0
  23. model-00019-of-00287.safetensors +3 -0
  24. model-00020-of-00287.safetensors +3 -0
  25. model-00021-of-00287.safetensors +3 -0
  26. model-00022-of-00287.safetensors +3 -0
  27. model-00023-of-00287.safetensors +3 -0
  28. model-00024-of-00287.safetensors +3 -0
  29. model-00025-of-00287.safetensors +3 -0
  30. model-00026-of-00287.safetensors +3 -0
  31. model-00027-of-00287.safetensors +3 -0
  32. model-00028-of-00287.safetensors +3 -0
  33. model-00029-of-00287.safetensors +3 -0
  34. model-00030-of-00287.safetensors +3 -0
  35. model-00031-of-00287.safetensors +3 -0
  36. model-00032-of-00287.safetensors +3 -0
  37. model-00033-of-00287.safetensors +3 -0
  38. model-00034-of-00287.safetensors +3 -0
  39. model-00035-of-00287.safetensors +3 -0
  40. model-00036-of-00287.safetensors +3 -0
  41. model-00037-of-00287.safetensors +3 -0
  42. model-00038-of-00287.safetensors +3 -0
  43. model-00039-of-00287.safetensors +3 -0
  44. model-00040-of-00287.safetensors +3 -0
  45. model-00041-of-00287.safetensors +3 -0
  46. model-00042-of-00287.safetensors +3 -0
  47. model-00043-of-00287.safetensors +3 -0
  48. model-00044-of-00287.safetensors +3 -0
  49. model-00045-of-00287.safetensors +3 -0
  50. model-00046-of-00287.safetensors +3 -0
.ipynb_checkpoints/config-checkpoint.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "../llama-3.1-405B",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 128000,
9
+ "eos_token_id": [
10
+ 128001,
11
+ 128008,
12
+ 128009
13
+ ],
14
+ "hidden_act": "silu",
15
+ "hidden_size": 16384,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 53248,
18
+ "max_position_embeddings": 131072,
19
+ "mlp_bias": false,
20
+ "model_type": "llama",
21
+ "num_attention_heads": 128,
22
+ "num_hidden_layers": 188,
23
+ "num_key_value_heads": 8,
24
+ "pretraining_tp": 1,
25
+ "rms_norm_eps": 1e-05,
26
+ "rope_scaling": {
27
+ "factor": 8.0,
28
+ "high_freq_factor": 4.0,
29
+ "low_freq_factor": 1.0,
30
+ "original_max_position_embeddings": 8192,
31
+ "rope_type": "llama3"
32
+ },
33
+ "rope_theta": 500000.0,
34
+ "tie_word_embeddings": false,
35
+ "torch_dtype": "bfloat16",
36
+ "transformers_version": "4.44.2",
37
+ "use_cache": true,
38
+ "vocab_size": 128256
39
+ }
README.md ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model: []
3
+ library_name: transformers
4
+ tags:
5
+ - mergekit
6
+ - merge
7
+
8
+ ---
9
+ # llama3.1-405B-upscaled
10
+
11
+ This is a merge of pre-trained language models created using [mergekit](https://github.com/cg123/mergekit).
12
+
13
+ ## Merge Details
14
+ ### Merge Method
15
+
16
+ This model was merged using the passthrough merge method.
17
+
18
+ ### Models Merged
19
+
20
+ The following models were included in the merge:
21
+ * ../llama-3.1-405B
22
+
23
+ ### Configuration
24
+
25
+ The following YAML configuration was used to produce this model:
26
+
27
+ ```yaml
28
+ slices:
29
+ - sources:
30
+ - model: ../llama-3.1-405B
31
+ layer_range: [0, 94]
32
+ - sources:
33
+ - model: ../llama-3.1-405B
34
+ layer_range: [32, 126]
35
+ merge_method: passthrough
36
+ dtype: bfloat16
37
+
38
+ ```
config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "../llama-3.1-405B",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 128000,
9
+ "eos_token_id": [
10
+ 128001,
11
+ 128008,
12
+ 128009
13
+ ],
14
+ "hidden_act": "silu",
15
+ "hidden_size": 16384,
16
+ "initializer_range": 0.02,
17
+ "intermediate_size": 53248,
18
+ "max_position_embeddings": 131072,
19
+ "mlp_bias": false,
20
+ "model_type": "llama",
21
+ "num_attention_heads": 128,
22
+ "num_hidden_layers": 188,
23
+ "num_key_value_heads": 8,
24
+ "pretraining_tp": 1,
25
+ "rms_norm_eps": 1e-05,
26
+ "rope_scaling": {
27
+ "factor": 8.0,
28
+ "high_freq_factor": 4.0,
29
+ "low_freq_factor": 1.0,
30
+ "original_max_position_embeddings": 8192,
31
+ "rope_type": "llama3"
32
+ },
33
+ "rope_theta": 500000.0,
34
+ "tie_word_embeddings": false,
35
+ "torch_dtype": "bfloat16",
36
+ "transformers_version": "4.44.2",
37
+ "use_cache": true,
38
+ "vocab_size": 128256
39
+ }
mergekit_config.yml ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ slices:
2
+ - sources:
3
+ - model: ../llama-3.1-405B
4
+ layer_range: [0, 94]
5
+ - sources:
6
+ - model: ../llama-3.1-405B
7
+ layer_range: [32, 126]
8
+ merge_method: passthrough
9
+ dtype: bfloat16
10
+
model-00001-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71b079a02069ebe5a5f4f8135815f136afb1d6fc4423620dcb2a14fa399cdf12
3
+ size 4202692736
model-00002-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bbb90d23098d70c7ce46a8a10ef3c0279d83831022e8f8da53f67874f1a84a66
3
+ size 4202725632
model-00003-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c07820a3c436a61f69fe4e2b8abdffc7bce4a11c44f8105297d5b03a5dae092d
3
+ size 3489661192
model-00004-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cddf5fe2f2493fa499e5109d93200f3ab893a60393fa1ee2c5bd906a91e81979
3
+ size 4630578120
model-00005-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67ba574d49e9abc4ce0a2be8f1a2714021147eb2bc685837f419617b9563a47a
3
+ size 4630578128
model-00006-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:053a525027d39c68ba7714be3ee8115f5034d181f07834c5e4dfed07623f15d7
3
+ size 3489661200
model-00007-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d207b05cc5d2231c4945da4a7e442c62f530e9b9e83a2dbb3c17258aea0d076c
3
+ size 4630578136
model-00008-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba4b8beea09073c80b03430b36a36ef8ecbd3ced233942bc6556c8b4aad26ff8
3
+ size 4630578136
model-00009-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d4ec8c1baaccaf0e18204e1c7567b854944dabd7ea28f15c77f6ceab22bb92f
3
+ size 3489661200
model-00010-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b8abe4404535d19c5cb41ae0dd1f2cbf2fdb86148478e4444c8e8c6ab0ba62c
3
+ size 4630578136
model-00011-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c70607553627122d6f8aa68e751aa82f62f1537605f1f811a2ed5e99af6c08b9
3
+ size 4630578136
model-00012-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6fc2d886d358bdf776e84f1422111940353366e9542e7bef85cc5f7ee02aa06
3
+ size 3489661200
model-00013-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:94364adcfc6f1a6443ee61481769c91fd70f5ec4cec9cfec9e6e99cd22560004
3
+ size 4630578136
model-00014-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8beeae4c91d1036c85aed338c34cd46b92215a0c389eb2c3119c6cadf306550
3
+ size 4630578136
model-00015-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fefdde8967ad9682976a1015274f530faf6a5a95f9a70ee75b2bb98bedf9a6b7
3
+ size 3489661200
model-00016-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9baf5b29284d241a83b22f099da8a8e71660786ecc72c2539e50c0859ca8331e
3
+ size 4630578136
model-00017-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6a16a6bbfc3010c5a897825b83489da30bb4ccbe7b8b76fe7d788c3257244104
3
+ size 4630578136
model-00018-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c021c9cd563d702c2b662d3f7dc6da76fd0f6f0843fbc93c2caf59af0b85817
3
+ size 3489661200
model-00019-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:142a3039f4551e324c0965d63bc75af83b509ef0881297fe05555b2e0d450c9f
3
+ size 4630578136
model-00020-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7cf0a53706e98176e33d1d1385dcfceeb2343f87a380b4dd1bff25c5e80cf156
3
+ size 4630578136
model-00021-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2adbaebf1886f41bd85955b2ca1fb76f4c2a16332e6a74b44befcb6a07c2c606
3
+ size 3489661200
model-00022-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3dba7531b57647ab164e473188af93aa4cf8912b9dd4f91ac69800ea0a6964b9
3
+ size 4630578128
model-00023-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2ac08499fc375154c23352d3377cea68c30e5665f43f29f9e42e6c70193b8fe0
3
+ size 4630578136
model-00024-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab1b67bbca3a78bcb99f651fb4dddbc4c0ee38448dcf1cd395998193d46a87c4
3
+ size 3489661200
model-00025-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07e521a275c8064c253cdec25301d43158e8bc40026495b72bc3dc406cc8f66d
3
+ size 4630578136
model-00026-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08d46889b5273ad0a447492524973fab273683e9d81dd6e634403369661edb59
3
+ size 4630578136
model-00027-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:189433dcf3cc66fbe94cd23b7d15920cbfa4fc2879798eb4d5b0a1549550d570
3
+ size 3489661200
model-00028-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43ab5239f834f85cdc6e4e8d728f27d7c14b368107ceaf56c4dfd2f178d6ddc5
3
+ size 4630578136
model-00029-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:47d2f9a0807dc1fa78fdad188d7238f9d3fcb7562918e5faf6597505d50548c5
3
+ size 4630578136
model-00030-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c346fbf14d5872b7d57fdd7f022dba9752389a3bf6d989d92aa4ca4c748da767
3
+ size 3489661200
model-00031-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0f18566f012330964e5e42684b5a10a30ccf1825ba675dfef32b550913bd4bbb
3
+ size 4630578136
model-00032-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a4006b19e4fbd829608b810fcebd64808d5002b24bb351b3d037f461f2af3204
3
+ size 4630578136
model-00033-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a16a7c97101382daa2041da2663def33196b388e13c95c06716976d228f14b2
3
+ size 3489661200
model-00034-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a80ab7d2cf11c32921f1ebf4b9748510760497fead0f6896ad43c222e6a30fc
3
+ size 4630578136
model-00035-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f5798777841c30bb592c77a758cec1a365e12ec39d8ed4aa8c89a25c34a4e8c
3
+ size 4630578136
model-00036-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bee77a27f9fef90303f92f3479c7485dd3e6c6c4f8a385ed73153f8cb0351fcf
3
+ size 3489661200
model-00037-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d6717fa8bed3b5d28c1309be7ab68544a8a79f5cb3c07c328d81ba203049550
3
+ size 4630578136
model-00038-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca9d7f6987d1d63982cb35089c7e1181bae22d4837f8bee59d923649904eff67
3
+ size 4630578128
model-00039-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a4e5febe06fd9aa06cbc2ee8c0df238f668bcf41b6634b480e24622dc2e4330
3
+ size 3489661200
model-00040-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8404c65c08e057b057840e9de4225934c5d8b2fa609ae2d7fd125f77a0dc73dc
3
+ size 4630578136
model-00041-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8db8ffbac09ed221ff12e55ddd46fdd4fbf14863b353d99b66d16c770ace9c56
3
+ size 4630578136
model-00042-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:980309d32c2194add0d3cdac1589e0951e488cf4ca8a43371db3d9e9be9bc8e0
3
+ size 3489661200
model-00043-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:36257e613b4236367d568892b0fb2ca46962e6754a0b3ed789644e3bdd057d7d
3
+ size 4630578136
model-00044-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c0ca6e693e79ba9c382607d24d8bb5faab5425e7cb94d205eecfb3ca8e0cbbca
3
+ size 4630578136
model-00045-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0225cb1274e156b704dc51c78daedbf9acecc0dfc9c6e27ec7effe1f750f89ab
3
+ size 3489661200
model-00046-of-00287.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c1369bb930e92f7471cabd0bc33df7f6933e42b34b97f660330fd65b46a27ea
3
+ size 4630578136