yhavinga commited on
Commit
1f59a80
1 Parent(s): b61ea27

Saving weights and log at step 900000

Browse files
README.md CHANGED
@@ -20,7 +20,7 @@ Dataset:
20
 
21
  * [mC4 NL Cleaned](https://huggingface.co/datasets/yhavinga/mc4_nl_cleaned)
22
  * dataset config: tiny (3B tokens)
23
- * dataset config: full (33B tokens)
24
 
25
  Tokenizer:
26
 
@@ -30,7 +30,7 @@ Tokenizer:
30
  Training details:
31
 
32
  * Trained for 70K steps (batch size 64) to ppl 27 on mc4 nl tiny 1 epoch
33
- * Trained for 760K steps (batch size 16) to ppl 16.8 on mc4 nl full
34
  * Training continuing
35
  * Block size: 512
36
  * Optimizer: adafactor
 
20
 
21
  * [mC4 NL Cleaned](https://huggingface.co/datasets/yhavinga/mc4_nl_cleaned)
22
  * dataset config: tiny (3B tokens)
23
+ * dataset config: large (24B tokens)
24
 
25
  Tokenizer:
26
 
 
30
  Training details:
31
 
32
  * Trained for 70K steps (batch size 64) to ppl 27 on mc4 nl tiny 1 epoch
33
+ * Trained for 900K steps (batch size 16) to ppl 16.2 on mc4 nl full
34
  * Training continuing
35
  * Block size: 512
36
  * Optimizer: adafactor
config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "activation_function": "gelu_new",
3
  "architectures": [
4
  "GPTNeoForCausalLM"
@@ -65,6 +66,7 @@
65
  }
66
  },
67
  "tokenizer_class": "GPT2Tokenizer",
 
68
  "transformers_version": "4.13.0",
69
  "use_cache": true,
70
  "vocab_size": 50257,
 
1
  {
2
+ "_name_or_path": ".",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPTNeoForCausalLM"
 
66
  }
67
  },
68
  "tokenizer_class": "GPT2Tokenizer",
69
+ "torch_dtype": "float32",
70
  "transformers_version": "4.13.0",
71
  "use_cache": true,
72
  "vocab_size": 50257,
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9219656705501e15f9f93b78df01c8a339552af0685161f55febd8dc2edca3fc
3
  size 5262314590
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9dd15b6b3443195b649c98349863cdb4ea5db416d0af59deb752c4f0cefda8b7
3
  size 5262314590
opt_state.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9415497a6e41b76b0baa60a31beb021fe2a13f11f513106d350c89beda73f7f6
3
  size 5778100
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5b55d3da77e33f432751d34890d4cc45a029eaae21192b2b6edaa17ef14e6bcf
3
  size 5778100
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71eac87d5c3e71477204c4b97e36e0beb17c43686afc160ee20955316ab50c80
3
  size 5363100545
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d626da36deceb8dcf070caea44a7d037038c4b1167bd59b28909295bdecb588
3
  size 5363100545
runs/events.out.tfevents.1641156371.t1v-n-2f64d7c8-w-0.13342.0.v2 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:909e7ac40e6afe9723bd239188da21469a915ed6c44b30318bdca1e48dd9ba04
3
- size 114081255
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4876750cb0514a8349c9db283b9941a0aad2ea9877924149192c49e19162db1
3
+ size 134442753
training_state.json CHANGED
@@ -1 +1 @@
1
- {"step": 760001}
 
1
+ {"step": 900001}