Norod78 commited on
Commit
62298dd
1 Parent(s): d4c8f27

Testing Jax model training. After a few iterations (still in the first epoch), testing model save

Browse files
config.json CHANGED
@@ -1,4 +1,5 @@
1
  {
 
2
  "_num_labels": 1,
3
  "activation_function": "gelu_new",
4
  "architectures": [
@@ -24,6 +25,7 @@
24
  "n_inner": null,
25
  "n_layer": 6,
26
  "n_positions": 1024,
 
27
  "resid_pdrop": 0.1,
28
  "scale_attn_weights": true,
29
  "summary_activation": null,
@@ -37,6 +39,7 @@
37
  "max_length": 50
38
  }
39
  },
 
40
  "transformers_version": "4.9.0.dev0",
41
  "use_cache": true,
42
  "vocab_size": 50257
 
1
  {
2
+ "_name_or_path": "./distilgpt2-pretrained-he",
3
  "_num_labels": 1,
4
  "activation_function": "gelu_new",
5
  "architectures": [
 
25
  "n_inner": null,
26
  "n_layer": 6,
27
  "n_positions": 1024,
28
+ "pad_token_id": 50257,
29
  "resid_pdrop": 0.1,
30
  "scale_attn_weights": true,
31
  "summary_activation": null,
 
39
  "max_length": 50
40
  }
41
  },
42
+ "torch_dtype": "float32",
43
  "transformers_version": "4.9.0.dev0",
44
  "use_cache": true,
45
  "vocab_size": 50257
flax_model.msgpack → pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d0246ab09910a61fafb618e5b1e64abf1e8c8a88e6c984b3505a8aea1e936555
3
- size 248885212
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7853d0c6cbd54e6790f32280ed40bb1747b08ff855aeb4bf6d63e2b8e74ee52f
3
+ size 333973553