Joemgu commited on
Commit
b9a9be5
1 Parent(s): 0b7fc29

Training in progress, step 200

Browse files
.gitattributes CHANGED
@@ -32,5 +32,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
35
- last-checkpoint/tokenizer.json filter=lfs diff=lfs merge=lfs -text
36
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
32
  *.zip filter=lfs diff=lfs merge=lfs -text
33
  *.zst filter=lfs diff=lfs merge=lfs -text
34
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
35
  tokenizer.json filter=lfs diff=lfs merge=lfs -text
config.json CHANGED
@@ -8,7 +8,7 @@
8
  "d_model": 768,
9
  "decoder_start_token_id": 0,
10
  "dense_act_fn": "gelu_new",
11
- "dropout_rate": 0.0,
12
  "encoder_attention_type": "transient-global",
13
  "eos_token_id": 1,
14
  "feed_forward_proj": "gated-gelu",
@@ -42,7 +42,7 @@
42
  },
43
  "tie_word_embeddings": false,
44
  "torch_dtype": "float32",
45
- "transformers_version": "4.30.0",
46
  "use_cache": false,
47
  "vocab_size": 256384
48
  }
 
8
  "d_model": 768,
9
  "decoder_start_token_id": 0,
10
  "dense_act_fn": "gelu_new",
11
+ "dropout_rate": 0.1,
12
  "encoder_attention_type": "transient-global",
13
  "eos_token_id": 1,
14
  "feed_forward_proj": "gated-gelu",
 
42
  },
43
  "tie_word_embeddings": false,
44
  "torch_dtype": "float32",
45
+ "transformers_version": "4.30.2",
46
  "use_cache": false,
47
  "vocab_size": 256384
48
  }
last-checkpoint/config.json CHANGED
@@ -8,7 +8,7 @@
8
  "d_model": 768,
9
  "decoder_start_token_id": 0,
10
  "dense_act_fn": "gelu_new",
11
- "dropout_rate": 0.0,
12
  "encoder_attention_type": "transient-global",
13
  "eos_token_id": 1,
14
  "feed_forward_proj": "gated-gelu",
@@ -42,7 +42,7 @@
42
  },
43
  "tie_word_embeddings": false,
44
  "torch_dtype": "float32",
45
- "transformers_version": "4.30.0",
46
  "use_cache": false,
47
  "vocab_size": 256384
48
  }
 
8
  "d_model": 768,
9
  "decoder_start_token_id": 0,
10
  "dense_act_fn": "gelu_new",
11
+ "dropout_rate": 0.1,
12
  "encoder_attention_type": "transient-global",
13
  "eos_token_id": 1,
14
  "feed_forward_proj": "gated-gelu",
 
42
  },
43
  "tie_word_embeddings": false,
44
  "torch_dtype": "float32",
45
+ "transformers_version": "4.30.2",
46
  "use_cache": false,
47
  "vocab_size": 256384
48
  }
last-checkpoint/generation_config.json CHANGED
@@ -1,6 +1,8 @@
1
  {
 
2
  "decoder_start_token_id": 0,
3
- "max_length": 1024,
4
- "num_beams": 4,
5
- "transformers_version": "4.30.0"
 
6
  }
 
1
  {
2
+ "_from_model_config": true,
3
  "decoder_start_token_id": 0,
4
+ "eos_token_id": 1,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.30.2",
7
+ "use_cache": false
8
  }
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6f0b117cb3b5ca9f3104796b19da86da2f7d1e2c40756714f09621415d49b13
3
  size 4736616809
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e1c5ea18bd87fdb611133a2de17cde918710c98486e4c43afa42b6a54bfdd26
3
  size 4736616809
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cdb640acc1ad0b52fbe5849e2fe9ec8023752b0a552442e328c1f4ad282eab9d
3
  size 2368281769
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8e0394726ad0e28741282c318cacf3dc28269f23a93f3ff84875f2503165cc9
3
  size 2368281769
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f471bea3861187dceb51828819a95fc512e9746b6a62f37798a37a6d6a9d142
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1784c9e20ffdc46b706882695c2108245d7626a328b6d70a37d079ad1fbbc989
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b35301bff95034222da17e187f0935a7b363754032e82a831e804e9485f6ec34
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97e255bbc5f7f71168348462c22fbdbbadbc23b19d6869fc621700a4f4ba07b1
3
  size 627
last-checkpoint/tokenizer.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:750ffbacc45c2f284f16da1d281fedfe2ed16f956306c0999ccaeb7b08554793
3
- size 16780024
 
 
 
 
last-checkpoint/trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff
 
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cb1ee3100dd5731f643b0702779cb0f885a239198814a1b5fd38a1b3541b399
3
- size 5371
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81616117252fb3172fd7b27ba4471c6643d6cc085d4f229ee6a2e5de41127204
3
+ size 4091
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cdb640acc1ad0b52fbe5849e2fe9ec8023752b0a552442e328c1f4ad282eab9d
3
  size 2368281769
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8e0394726ad0e28741282c318cacf3dc28269f23a93f3ff84875f2503165cc9
3
  size 2368281769
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0cb1ee3100dd5731f643b0702779cb0f885a239198814a1b5fd38a1b3541b399
3
- size 5371
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81616117252fb3172fd7b27ba4471c6643d6cc085d4f229ee6a2e5de41127204
3
+ size 4091