naman1011 commited on
Commit
c8f42db
·
1 Parent(s): 5e95e51

Training in progress, step 500, checkpoint

Browse files
last-checkpoint/config.json ADDED
@@ -0,0 +1,61 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "t5-small",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "classifier_dropout": 0.0,
7
+ "d_ff": 2048,
8
+ "d_kv": 64,
9
+ "d_model": 512,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "relu",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 1,
14
+ "feed_forward_proj": "relu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": false,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "model_type": "t5",
20
+ "n_positions": 512,
21
+ "num_decoder_layers": 6,
22
+ "num_heads": 8,
23
+ "num_layers": 6,
24
+ "output_past": true,
25
+ "pad_token_id": 0,
26
+ "relative_attention_max_distance": 128,
27
+ "relative_attention_num_buckets": 32,
28
+ "task_specific_params": {
29
+ "summarization": {
30
+ "early_stopping": true,
31
+ "length_penalty": 2.0,
32
+ "max_length": 200,
33
+ "min_length": 30,
34
+ "no_repeat_ngram_size": 3,
35
+ "num_beams": 4,
36
+ "prefix": "summarize: "
37
+ },
38
+ "translation_en_to_de": {
39
+ "early_stopping": true,
40
+ "max_length": 300,
41
+ "num_beams": 4,
42
+ "prefix": "translate English to German: "
43
+ },
44
+ "translation_en_to_fr": {
45
+ "early_stopping": true,
46
+ "max_length": 300,
47
+ "num_beams": 4,
48
+ "prefix": "translate English to French: "
49
+ },
50
+ "translation_en_to_ro": {
51
+ "early_stopping": true,
52
+ "max_length": 300,
53
+ "num_beams": 4,
54
+ "prefix": "translate English to Romanian: "
55
+ }
56
+ },
57
+ "torch_dtype": "float32",
58
+ "transformers_version": "4.33.1",
59
+ "use_cache": true,
60
+ "vocab_size": 32128
61
+ }
last-checkpoint/generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "decoder_start_token_id": 0,
4
+ "eos_token_id": 1,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.33.1"
7
+ }
last-checkpoint/global_step500/zero_pp_rank_0_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83e8739b47bd77f214d2265d8e968c48d849b22085a58dab0b7c3ccba438581f
3
+ size 77125
last-checkpoint/global_step500/zero_pp_rank_0_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:599deb700a2f4c61b7adc819874dd469192f52018cb4b875128a64fded13d9a9
3
+ size 181523565
last-checkpoint/global_step500/zero_pp_rank_1_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3929c020a78a4a0cce2c67df9b850d5a86b92225a7630a32fc9483f43ac91f92
3
+ size 76997
last-checkpoint/global_step500/zero_pp_rank_1_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f07594c86e8871c23855f2a19cdc79eb85f0e7d5d721efc1b9881baf6e0eb19b
3
+ size 181523565
last-checkpoint/global_step500/zero_pp_rank_2_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f75706537bfb250662e279566323ce34f592140890e541fbc25136c94fdeb5ad
3
+ size 76997
last-checkpoint/global_step500/zero_pp_rank_2_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0029132f861e09e7eab38a0ce01efab1b1df833de23e0e43184d00cf1fa8c75a
3
+ size 181523565
last-checkpoint/global_step500/zero_pp_rank_3_mp_rank_00_model_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:987560874e3a316caeab90a1bff849cb8577dcb826748a624e94454e6657bc81
3
+ size 76997
last-checkpoint/global_step500/zero_pp_rank_3_mp_rank_00_optim_states.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bca6522f0189356f28382d62016ab596ac3ecc5570fdaf78143aeec62e2855e6
3
+ size 181523565
last-checkpoint/latest CHANGED
@@ -1 +1 @@
1
- global_step2000
 
1
+ global_step500
last-checkpoint/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:481596c94b33296e9ed55da626f3e03b5bea14ef8b76b47930fb60b0bc92309e
3
+ size 242070038
last-checkpoint/rng_state_0.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50c45dee7a55d713aa15eaefbdbd4b45df9e67d02bb6ccefa86e1468c6ea416f
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2c1308702d9b086a5f1f3508e8349ed8fd156bcbf917cc570469986ebf544b9
3
  size 15024
last-checkpoint/rng_state_1.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:abb041f181920f1b36237d3ff78db879a26def694eccb0894945dbd4a97cc65a
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a686bae368655e506dce0c104615593cc3990e826fa497b96df4f213df009708
3
  size 15024
last-checkpoint/rng_state_2.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3399d3d8d8d656a349c7e785e13454283813d091435a0af845c955519e7653c
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f82111dd01fcc7a0012f668d56030cec3e7d214b8fb3347d56acb344101f8d5
3
  size 15024
last-checkpoint/rng_state_3.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b1eea31e444764e3e1f1ec7465ddb8eefb5bbbefde013a11a62c8a1b296356b1
3
  size 15024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5f70bbd4c2166937b01de500ebb8e2d351aea43a0dd44583afdaabc82d411b5
3
  size 15024
last-checkpoint/trainer_state.json CHANGED
@@ -1,31 +1,25 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7818608287724785,
5
  "eval_steps": 500,
6
- "global_step": 2000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.39,
13
- "learning_rate": 0.0001,
14
- "loss": 0.3412,
15
- "step": 1000
16
- },
17
- {
18
- "epoch": 0.78,
19
- "learning_rate": 0.0001,
20
- "loss": 0.1473,
21
- "step": 2000
22
  }
23
  ],
24
- "logging_steps": 1000,
25
- "max_steps": 12790,
26
- "num_train_epochs": 5,
27
- "save_steps": 1000,
28
- "total_flos": 26238293901312.0,
29
  "trial_name": null,
30
  "trial_params": null
31
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.0,
5
  "eval_steps": 500,
6
+ "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 2.0,
13
+ "learning_rate": 0.01,
14
+ "loss": 0.6125,
15
+ "step": 500
 
 
 
 
 
 
16
  }
17
  ],
18
+ "logging_steps": 500,
19
+ "max_steps": 2500,
20
+ "num_train_epochs": 10,
21
+ "save_steps": 500,
22
+ "total_flos": 84516077568.0,
23
  "trial_name": null,
24
  "trial_params": null
25
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e88bb83dac898ccb88b94e8a2bbce48f354e3d93653571d7cd72c8fe00fe3205
3
  size 6840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8639ef87c97e0b1db1451ec2c0e630582811a1a01d4762880d5387664e980641
3
  size 6840