Saving weights and log at step 900000
Browse files- README.md +2 -2
- config.json +2 -0
- flax_model.msgpack +1 -1
- opt_state.msgpack +1 -1
- pytorch_model.bin +1 -1
- runs/events.out.tfevents.1641156371.t1v-n-2f64d7c8-w-0.13342.0.v2 +2 -2
- training_state.json +1 -1
README.md
CHANGED
@@ -20,7 +20,7 @@ Dataset:
|
|
20 |
|
21 |
* [mC4 NL Cleaned](https://huggingface.co/datasets/yhavinga/mc4_nl_cleaned)
|
22 |
* dataset config: tiny (3B tokens)
|
23 |
-
* dataset config:
|
24 |
|
25 |
Tokenizer:
|
26 |
|
@@ -30,7 +30,7 @@ Tokenizer:
|
|
30 |
Training details:
|
31 |
|
32 |
* Trained for 70K steps (batch size 64) to ppl 27 on mc4 nl tiny 1 epoch
|
33 |
-
* Trained for
|
34 |
* Training continuing
|
35 |
* Block size: 512
|
36 |
* Optimizer: adafactor
|
|
|
20 |
|
21 |
* [mC4 NL Cleaned](https://huggingface.co/datasets/yhavinga/mc4_nl_cleaned)
|
22 |
* dataset config: tiny (3B tokens)
|
23 |
+
* dataset config: large (24B tokens)
|
24 |
|
25 |
Tokenizer:
|
26 |
|
|
|
30 |
Training details:
|
31 |
|
32 |
* Trained for 70K steps (batch size 64) to ppl 27 on mc4 nl tiny 1 epoch
|
33 |
+
* Trained for 900K steps (batch size 16) to ppl 16.2 on mc4 nl full
|
34 |
* Training continuing
|
35 |
* Block size: 512
|
36 |
* Optimizer: adafactor
|
config.json
CHANGED
@@ -1,4 +1,5 @@
|
|
1 |
{
|
|
|
2 |
"activation_function": "gelu_new",
|
3 |
"architectures": [
|
4 |
"GPTNeoForCausalLM"
|
@@ -65,6 +66,7 @@
|
|
65 |
}
|
66 |
},
|
67 |
"tokenizer_class": "GPT2Tokenizer",
|
|
|
68 |
"transformers_version": "4.13.0",
|
69 |
"use_cache": true,
|
70 |
"vocab_size": 50257,
|
|
|
1 |
{
|
2 |
+
"_name_or_path": ".",
|
3 |
"activation_function": "gelu_new",
|
4 |
"architectures": [
|
5 |
"GPTNeoForCausalLM"
|
|
|
66 |
}
|
67 |
},
|
68 |
"tokenizer_class": "GPT2Tokenizer",
|
69 |
+
"torch_dtype": "float32",
|
70 |
"transformers_version": "4.13.0",
|
71 |
"use_cache": true,
|
72 |
"vocab_size": 50257,
|
flax_model.msgpack
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5262314590
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9dd15b6b3443195b649c98349863cdb4ea5db416d0af59deb752c4f0cefda8b7
|
3 |
size 5262314590
|
opt_state.msgpack
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5778100
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5b55d3da77e33f432751d34890d4cc45a029eaae21192b2b6edaa17ef14e6bcf
|
3 |
size 5778100
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 5363100545
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3d626da36deceb8dcf070caea44a7d037038c4b1167bd59b28909295bdecb588
|
3 |
size 5363100545
|
runs/events.out.tfevents.1641156371.t1v-n-2f64d7c8-w-0.13342.0.v2
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b4876750cb0514a8349c9db283b9941a0aad2ea9877924149192c49e19162db1
|
3 |
+
size 134442753
|
training_state.json
CHANGED
@@ -1 +1 @@
|
|
1 |
-
{"step":
|
|
|
1 |
+
{"step": 900001}
|