Upload folder using huggingface_hub
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +0 -0
- nemo/checkpoints/gemma-7b-sql-nemo.nemo +3 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/common.pt +3 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/metadata.json +1 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.final_layernorm.weight/.zarray +14 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.final_layernorm.weight/0 +0 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_0_28.pt +3 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_10_28.pt +3 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_11_28.pt +3 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_12_28.pt +3 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_13_28.pt +3 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_14_28.pt +3 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_15_28.pt +3 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_16_28.pt +3 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_17_28.pt +3 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_18_28.pt +3 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_19_28.pt +3 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_1_28.pt +3 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_20_28.pt +3 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_21_28.pt +3 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_22_28.pt +3 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_23_28.pt +3 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_24_28.pt +3 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_25_28.pt +3 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_26_28.pt +3 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_27_28.pt +3 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_2_28.pt +3 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_3_28.pt +3 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_4_28.pt +3 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_5_28.pt +3 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_6_28.pt +3 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_7_28.pt +3 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_8_28.pt +3 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_9_28.pt +3 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/.zarray +16 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/0.0 +0 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/1.0 +0 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/10.0 +0 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/11.0 +0 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/12.0 +0 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/13.0 +0 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/14.0 +0 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/15.0 +0 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/16.0 +0 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/17.0 +0 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/18.0 +0 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/19.0 +0 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/2.0 +0 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/20.0 +0 -0
- nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/21.0 +0 -0
.gitattributes
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
nemo/checkpoints/gemma-7b-sql-nemo.nemo
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5913b2f1371a31e7a25b5cdc2a1946e4be67daa4591ae06b4def257d474b87a9
|
3 |
+
size 17081016320
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/common.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d358ddab6b28f844cf94b3d0786a5ec3c7fb8b3968d0b59e58a1fed8ce3d16d6
|
3 |
+
size 25175
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/metadata.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"sharded_backend": "zarr", "sharded_backend_version": 1, "common_backend": "torch", "common_backend_version": 1}
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.final_layernorm.weight/.zarray
ADDED
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"chunks": [
|
3 |
+
3072
|
4 |
+
],
|
5 |
+
"compressor": null,
|
6 |
+
"dtype": "bfloat16",
|
7 |
+
"fill_value": null,
|
8 |
+
"filters": null,
|
9 |
+
"order": "C",
|
10 |
+
"shape": [
|
11 |
+
3072
|
12 |
+
],
|
13 |
+
"zarr_format": 2
|
14 |
+
}
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.final_layernorm.weight/0
ADDED
Binary file (6.14 kB). View file
|
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_0_28.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:63f63a650a1e81efb1222bec5ea786efd0bfb9a5e80530f442bf91f9acdbf8df
|
3 |
+
size 1836
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_10_28.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:227c205836c7bbcc512b59dc9008d0c412699bd03df9aebfee0b7bd3c3e329c4
|
3 |
+
size 1840
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_11_28.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b05ac4f60393e0c09711fc249a87bc2e036047e8dcc5d63402503e7d1d662c31
|
3 |
+
size 1840
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_12_28.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:764ad22a31d074c0951a6a5dbd73f033ca459ecbb5ac362236981e8fe12da56c
|
3 |
+
size 1840
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_13_28.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e620f1086ba7f104be16365fc69e8487a32d8bd7acbdf63f87bf802447b6466d
|
3 |
+
size 1840
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_14_28.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6910bdfa718b3600854184a6b1f32a8e9d9be3ce10c17f12fe6db79120786a2f
|
3 |
+
size 1840
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_15_28.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9acd9fb25e6ab8a85b6ab125c958ee5480710dab44592b1fb84a4eb69872a013
|
3 |
+
size 1840
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_16_28.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:885ef049145a29bcdab6fefc355a8e1236bd882c226930cf530a3ba0bd4ee721
|
3 |
+
size 1840
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_17_28.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:22bfd3234ccd2293b4de4762d491d064b46cb4558254220f7634545715025838
|
3 |
+
size 1840
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_18_28.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5c02c0faf91090dbe1822a700e9bc7f91f16e835f7f826eab91a0443d48cac46
|
3 |
+
size 1840
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_19_28.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cf409a5924123568716ca4dcbcd3b5f1ba9624a1713a170c91d025cb035207cc
|
3 |
+
size 1840
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_1_28.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5e8b079d0103f30cd750bc238764389ba0c37424878264066e76bfdbe45c0562
|
3 |
+
size 1836
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_20_28.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2188e59e72780c58a3acd8e6ff7fe03f373d14a42a0e36c69f1583b349cbc4a7
|
3 |
+
size 1840
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_21_28.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a5a40ef6b59bc7e2ac745df57f28cc08ba614e13e73c12c7a16ac9e3bb005a74
|
3 |
+
size 1840
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_22_28.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:eb827b13ea508701f2eb587f7c075983c97a7aedac472cc63f237756c5961c76
|
3 |
+
size 1840
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_23_28.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9a1927306a894bb424ec9e0ce4a672d7afb28d3bb9d5db1c6e1268c2ec58232a
|
3 |
+
size 1840
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_24_28.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:84ab4816acfe4bf4df6814b170665c0b14ff82e64c6a3db350780a5ee58961e1
|
3 |
+
size 1840
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_25_28.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0e0bc28d64fcfc1620f51ba7912eb5f3e6757292e45e4256c2d11914214bbc77
|
3 |
+
size 1840
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_26_28.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a4e987570aaa427c84d57db99f8a2529370b6bb4bb6e61a5c0a280fc3f8b1f3f
|
3 |
+
size 1840
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_27_28.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:addb78803cdfa5fcc25acb716a18e71def373b29af9d89294cc8bdda95b3757d
|
3 |
+
size 1840
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_2_28.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ce21d6653df4f0be7621f065b58aa61c970bef1c6dbbcbf018391ba742f93e4d
|
3 |
+
size 1836
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_3_28.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9e17ee9ca84caa8e84a29eae3086e03cdcd8242fa586638054520904b7d5811a
|
3 |
+
size 1836
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_4_28.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ceab42286b21b2bc4a581e3b7a3cb3de527ddf20deea8354d87bef6ec9b8b648
|
3 |
+
size 1836
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_5_28.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c10d49188641f77ed026550da10a961d3e7096ed1c71fcdaf8c4a4964dd5d8b2
|
3 |
+
size 1836
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_6_28.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:f9b99c08a5951d88fddc30f73527ab22295e4bdc040febc36a47616428f879fc
|
3 |
+
size 1836
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_7_28.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:65b2a3db54c013bd9849efb2db9c10758012e0f5a5d1f31397e97482756600e6
|
3 |
+
size 1836
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_8_28.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d9e62e3c64c74eceaaa2bbd8b35484b88271cfe637474693ce93c978f0bed7ad
|
3 |
+
size 1836
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1._extra_state/shard_9_28.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0069d31c4f93e27495b18776b0c2fe67027ca0e663f5174f085b69c0cd60df36
|
3 |
+
size 1836
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/.zarray
ADDED
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"chunks": [
|
3 |
+
1,
|
4 |
+
3072
|
5 |
+
],
|
6 |
+
"compressor": null,
|
7 |
+
"dtype": "bfloat16",
|
8 |
+
"fill_value": null,
|
9 |
+
"filters": null,
|
10 |
+
"order": "C",
|
11 |
+
"shape": [
|
12 |
+
28,
|
13 |
+
3072
|
14 |
+
],
|
15 |
+
"zarr_format": 2
|
16 |
+
}
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/0.0
ADDED
Binary file (6.14 kB). View file
|
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/1.0
ADDED
Binary file (6.14 kB). View file
|
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/10.0
ADDED
Binary file (6.14 kB). View file
|
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/11.0
ADDED
Binary file (6.14 kB). View file
|
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/12.0
ADDED
Binary file (6.14 kB). View file
|
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/13.0
ADDED
Binary file (6.14 kB). View file
|
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/14.0
ADDED
Binary file (6.14 kB). View file
|
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/15.0
ADDED
Binary file (6.14 kB). View file
|
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/16.0
ADDED
Binary file (6.14 kB). View file
|
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/17.0
ADDED
Binary file (6.14 kB). View file
|
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/18.0
ADDED
Binary file (6.14 kB). View file
|
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/19.0
ADDED
Binary file (6.14 kB). View file
|
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/2.0
ADDED
Binary file (6.14 kB). View file
|
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/20.0
ADDED
Binary file (6.14 kB). View file
|
|
nemo/checkpoints/megatron_gpt_sft--validation_loss=0.000-step=613-consumed_samples=78464-epoch=1-last/model.decoder.layers.mlp.linear_fc1.layer_norm_weight/21.0
ADDED
Binary file (6.14 kB). View file
|
|