Saving weights and log at step 900000

Files changed (7) hide show

README.md CHANGED Viewed

@@ -20,7 +20,7 @@ Dataset:
 * [mC4 NL Cleaned](https://huggingface.co/datasets/yhavinga/mc4_nl_cleaned)
 * dataset config: tiny (3B tokens)
-* dataset config: full (33B tokens)
 Tokenizer:
@@ -30,7 +30,7 @@ Tokenizer:
 Training details:
 * Trained for 70K steps (batch size 64) to ppl 27 on mc4 nl tiny 1 epoch
-* Trained for 760K steps (batch size 16) to ppl 16.8 on mc4 nl full
 * Training continuing
 * Block size: 512
 * Optimizer: adafactor

 * [mC4 NL Cleaned](https://huggingface.co/datasets/yhavinga/mc4_nl_cleaned)
 * dataset config: tiny (3B tokens)
+* dataset config: large (24B tokens)
 Tokenizer:
 Training details:
 * Trained for 70K steps (batch size 64) to ppl 27 on mc4 nl tiny 1 epoch
+* Trained for 900K steps (batch size 16) to ppl 16.2 on mc4 nl full
 * Training continuing
 * Block size: 512
 * Optimizer: adafactor

config.json CHANGED Viewed

@@ -1,4 +1,5 @@
 {
   "activation_function": "gelu_new",
   "architectures": [
     "GPTNeoForCausalLM"
@@ -65,6 +66,7 @@
     }
   },
   "tokenizer_class": "GPT2Tokenizer",
   "transformers_version": "4.13.0",
   "use_cache": true,
   "vocab_size": 50257,

 {
+  "_name_or_path": ".",
   "activation_function": "gelu_new",
   "architectures": [
     "GPTNeoForCausalLM"
     }
   },
   "tokenizer_class": "GPT2Tokenizer",
+  "torch_dtype": "float32",
   "transformers_version": "4.13.0",
   "use_cache": true,
   "vocab_size": 50257,

flax_model.msgpack CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9219656705501e15f9f93b78df01c8a339552af0685161f55febd8dc2edca3fc
 size 5262314590

 version https://git-lfs.github.com/spec/v1
+oid sha256:9dd15b6b3443195b649c98349863cdb4ea5db416d0af59deb752c4f0cefda8b7
 size 5262314590

opt_state.msgpack CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9415497a6e41b76b0baa60a31beb021fe2a13f11f513106d350c89beda73f7f6
 size 5778100

 version https://git-lfs.github.com/spec/v1
+oid sha256:5b55d3da77e33f432751d34890d4cc45a029eaae21192b2b6edaa17ef14e6bcf
 size 5778100

pytorch_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:71eac87d5c3e71477204c4b97e36e0beb17c43686afc160ee20955316ab50c80
 size 5363100545

 version https://git-lfs.github.com/spec/v1
+oid sha256:3d626da36deceb8dcf070caea44a7d037038c4b1167bd59b28909295bdecb588
 size 5363100545

runs/events.out.tfevents.1641156371.t1v-n-2f64d7c8-w-0.13342.0.v2 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:909e7ac40e6afe9723bd239188da21469a915ed6c44b30318bdca1e48dd9ba04
-size 114081255

 version https://git-lfs.github.com/spec/v1
+oid sha256:b4876750cb0514a8349c9db283b9941a0aad2ea9877924149192c49e19162db1
+size 134442753

training_state.json CHANGED Viewed

	@@ -1 +1 @@
1	- {"step": ~~760001~~}


1	+ {"step": 900001}