davidmezzetti commited on
Commit
acaa03e
1 Parent(s): 6f09249

Update model

Browse files
Files changed (4) hide show
  1. config.json +2 -1
  2. pytorch_model.bin +2 -2
  3. trainer_state.json +18 -18
  4. training_args.bin +1 -1
config.json CHANGED
@@ -20,6 +20,7 @@
20
  "num_layers": 6,
21
  "output_past": true,
22
  "pad_token_id": 0,
 
23
  "relative_attention_num_buckets": 32,
24
  "task_specific_params": {
25
  "summarization": {
@@ -51,7 +52,7 @@
51
  }
52
  },
53
  "torch_dtype": "float32",
54
- "transformers_version": "4.17.0",
55
  "use_cache": true,
56
  "vocab_size": 32128
57
  }
20
  "num_layers": 6,
21
  "output_past": true,
22
  "pad_token_id": 0,
23
+ "relative_attention_max_distance": 128,
24
  "relative_attention_num_buckets": 32,
25
  "task_specific_params": {
26
  "summarization": {
52
  }
53
  },
54
  "torch_dtype": "float32",
55
+ "transformers_version": "4.18.0",
56
  "use_cache": true,
57
  "vocab_size": 32128
58
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d508d86071ac3db833a5e088b980356c4982644b23a731d31731204756ce7b9d
3
- size 242085627
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2904f5e412ff63d45b4e98976027f57a15f4c1bfc562fde4ca7ab6b075f23d68
3
+ size 242070267
trainer_state.json CHANGED
@@ -2,42 +2,42 @@
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 5.0,
5
- "global_step": 1750,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 1.43,
12
- "learning_rate": 3.571428571428572e-05,
13
- "loss": 0.8859,
14
  "step": 500
15
  },
16
  {
17
- "epoch": 2.86,
18
- "learning_rate": 2.1428571428571428e-05,
19
- "loss": 0.0459,
20
  "step": 1000
21
  },
22
  {
23
- "epoch": 4.29,
24
- "learning_rate": 7.142857142857143e-06,
25
- "loss": 0.0187,
26
  "step": 1500
27
  },
28
  {
29
  "epoch": 5.0,
30
- "step": 1750,
31
- "total_flos": 29806917058560.0,
32
- "train_loss": 0.27371163518088204,
33
- "train_runtime": 133.5222,
34
- "train_samples_per_second": 52.426,
35
- "train_steps_per_second": 13.106
36
  }
37
  ],
38
- "max_steps": 1750,
39
  "num_train_epochs": 5,
40
- "total_flos": 29806917058560.0,
41
  "trial_name": null,
42
  "trial_params": null
43
  }
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 5.0,
5
+ "global_step": 1875,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 1.33,
12
+ "learning_rate": 3.6666666666666666e-05,
13
+ "loss": 0.8661,
14
  "step": 500
15
  },
16
  {
17
+ "epoch": 2.67,
18
+ "learning_rate": 2.3333333333333336e-05,
19
+ "loss": 0.0521,
20
  "step": 1000
21
  },
22
  {
23
+ "epoch": 4.0,
24
+ "learning_rate": 1e-05,
25
+ "loss": 0.0199,
26
  "step": 1500
27
  },
28
  {
29
  "epoch": 5.0,
30
+ "step": 1875,
31
+ "total_flos": 31746111307776.0,
32
+ "train_loss": 0.2530242416381836,
33
+ "train_runtime": 134.3349,
34
+ "train_samples_per_second": 55.831,
35
+ "train_steps_per_second": 13.958
36
  }
37
  ],
38
+ "max_steps": 1875,
39
  "num_train_epochs": 5,
40
+ "total_flos": 31746111307776.0,
41
  "trial_name": null,
42
  "trial_params": null
43
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:322e30da87d91285cc592fef6b8b680764c3d73dd2be1478a6c6fd789346423d
3
  size 2991
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:897f546f75deb840b1810a2128356735036e00fd14c88e92914e4fa4c3e1ee32
3
  size 2991