balaramas commited on
Commit
ea4f3f5
1 Parent(s): fddf3ce

Model save

Browse files
README.md CHANGED
@@ -37,7 +37,7 @@ The following hyperparameters were used during training:
37
  - seed: 42
38
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
39
  - lr_scheduler_type: linear
40
- - num_epochs: 3.0
41
 
42
  ### Training results
43
 
 
37
  - seed: 42
38
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
39
  - lr_scheduler_type: linear
40
+ - num_epochs: 7.0
41
 
42
  ### Training results
43
 
all_results.json CHANGED
@@ -1,18 +1,18 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_gen_len": 78.04093919325707,
4
- "eval_loss": 1.0967354774475098,
5
- "eval_rouge1": 9.6103,
6
- "eval_rouge2": 3.2602,
7
- "eval_rougeL": 9.4833,
8
- "eval_rougeLsum": 9.5204,
9
- "eval_runtime": 305.3578,
10
  "eval_samples": 1661,
11
- "eval_samples_per_second": 5.44,
12
- "eval_steps_per_second": 1.362,
13
- "train_loss": 1.3809210790793738,
14
- "train_runtime": 2386.364,
15
  "train_samples": 14964,
16
- "train_samples_per_second": 6.271,
17
- "train_steps_per_second": 1.568
18
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "eval_gen_len": 81.00240818783865,
4
+ "eval_loss": 1.0256497859954834,
5
+ "eval_rouge1": 11.0948,
6
+ "eval_rouge2": 3.7099,
7
+ "eval_rougeL": 10.8859,
8
+ "eval_rougeLsum": 10.9048,
9
+ "eval_runtime": 310.5094,
10
  "eval_samples": 1661,
11
+ "eval_samples_per_second": 5.349,
12
+ "eval_steps_per_second": 1.34,
13
+ "train_loss": 0.5631158361882754,
14
+ "train_runtime": 5240.7351,
15
  "train_samples": 14964,
16
+ "train_samples_per_second": 8.566,
17
+ "train_steps_per_second": 2.141
18
  }
eval_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 1.0,
3
- "eval_gen_len": 78.04093919325707,
4
- "eval_loss": 1.0967354774475098,
5
- "eval_rouge1": 9.6103,
6
- "eval_rouge2": 3.2602,
7
- "eval_rougeL": 9.4833,
8
- "eval_rougeLsum": 9.5204,
9
- "eval_runtime": 305.3578,
10
  "eval_samples": 1661,
11
- "eval_samples_per_second": 5.44,
12
- "eval_steps_per_second": 1.362
13
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "eval_gen_len": 81.00240818783865,
4
+ "eval_loss": 1.0256497859954834,
5
+ "eval_rouge1": 11.0948,
6
+ "eval_rouge2": 3.7099,
7
+ "eval_rougeL": 10.8859,
8
+ "eval_rougeLsum": 10.9048,
9
+ "eval_runtime": 310.5094,
10
  "eval_samples": 1661,
11
+ "eval_samples_per_second": 5.349,
12
+ "eval_steps_per_second": 1.34
13
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:37846d11c8151954bf5997b55c431a0d3e59a1f8018b9c208ebb8eed603b171c
3
  size 2444578688
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:489ffc4b625aae8b37eacc9372da4e7fb224d61ccfb879c9c242053baecad364
3
  size 2444578688
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 1.0,
3
- "train_loss": 1.3809210790793738,
4
- "train_runtime": 2386.364,
5
  "train_samples": 14964,
6
- "train_samples_per_second": 6.271,
7
- "train_steps_per_second": 1.568
8
  }
 
1
  {
2
+ "epoch": 3.0,
3
+ "train_loss": 0.5631158361882754,
4
+ "train_runtime": 5240.7351,
5
  "train_samples": 14964,
6
+ "train_samples_per_second": 8.566,
7
+ "train_steps_per_second": 2.141
8
  }
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.0,
5
  "eval_steps": 500,
6
- "global_step": 3741,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -51,21 +51,111 @@
51
  "step": 3500
52
  },
53
  {
54
- "epoch": 1.0,
55
- "step": 3741,
56
- "total_flos": 1.70456184471552e+16,
57
- "train_loss": 1.3809210790793738,
58
- "train_runtime": 2386.364,
59
- "train_samples_per_second": 6.271,
60
- "train_steps_per_second": 1.568
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  }
62
  ],
63
  "logging_steps": 500,
64
- "max_steps": 3741,
65
  "num_input_tokens_seen": 0,
66
- "num_train_epochs": 1,
67
  "save_steps": 500,
68
- "total_flos": 1.70456184471552e+16,
69
  "train_batch_size": 4,
70
  "trial_name": null,
71
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
  "eval_steps": 500,
6
+ "global_step": 11223,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
51
  "step": 3500
52
  },
53
  {
54
+ "epoch": 1.07,
55
+ "learning_rate": 3.217945290920431e-05,
56
+ "loss": 1.1143,
57
+ "step": 4000
58
+ },
59
+ {
60
+ "epoch": 1.2,
61
+ "learning_rate": 2.995188452285485e-05,
62
+ "loss": 1.0045,
63
+ "step": 4500
64
+ },
65
+ {
66
+ "epoch": 1.34,
67
+ "learning_rate": 2.772431613650539e-05,
68
+ "loss": 0.9933,
69
+ "step": 5000
70
+ },
71
+ {
72
+ "epoch": 1.47,
73
+ "learning_rate": 2.549674775015593e-05,
74
+ "loss": 0.9654,
75
+ "step": 5500
76
+ },
77
+ {
78
+ "epoch": 1.6,
79
+ "learning_rate": 2.326917936380647e-05,
80
+ "loss": 0.9793,
81
+ "step": 6000
82
+ },
83
+ {
84
+ "epoch": 1.74,
85
+ "learning_rate": 2.104161097745701e-05,
86
+ "loss": 0.979,
87
+ "step": 6500
88
+ },
89
+ {
90
+ "epoch": 1.87,
91
+ "learning_rate": 1.8814042591107548e-05,
92
+ "loss": 0.9534,
93
+ "step": 7000
94
+ },
95
+ {
96
+ "epoch": 2.0,
97
+ "learning_rate": 1.658647420475809e-05,
98
+ "loss": 0.9262,
99
+ "step": 7500
100
+ },
101
+ {
102
+ "epoch": 2.14,
103
+ "learning_rate": 1.4358905818408625e-05,
104
+ "loss": 0.6403,
105
+ "step": 8000
106
+ },
107
+ {
108
+ "epoch": 2.27,
109
+ "learning_rate": 1.2131337432059165e-05,
110
+ "loss": 0.6348,
111
+ "step": 8500
112
+ },
113
+ {
114
+ "epoch": 2.41,
115
+ "learning_rate": 9.903769045709705e-06,
116
+ "loss": 0.6605,
117
+ "step": 9000
118
+ },
119
+ {
120
+ "epoch": 2.54,
121
+ "learning_rate": 7.676200659360243e-06,
122
+ "loss": 0.6282,
123
+ "step": 9500
124
+ },
125
+ {
126
+ "epoch": 2.67,
127
+ "learning_rate": 5.448632273010781e-06,
128
+ "loss": 0.632,
129
+ "step": 10000
130
+ },
131
+ {
132
+ "epoch": 2.81,
133
+ "learning_rate": 3.2210638866613206e-06,
134
+ "loss": 0.6298,
135
+ "step": 10500
136
+ },
137
+ {
138
+ "epoch": 2.94,
139
+ "learning_rate": 9.934955003118596e-07,
140
+ "loss": 0.6261,
141
+ "step": 11000
142
+ },
143
+ {
144
+ "epoch": 3.0,
145
+ "step": 11223,
146
+ "total_flos": 5.117130085328486e+16,
147
+ "train_loss": 0.5631158361882754,
148
+ "train_runtime": 5240.7351,
149
+ "train_samples_per_second": 8.566,
150
+ "train_steps_per_second": 2.141
151
  }
152
  ],
153
  "logging_steps": 500,
154
+ "max_steps": 11223,
155
  "num_input_tokens_seen": 0,
156
+ "num_train_epochs": 3,
157
  "save_steps": 500,
158
+ "total_flos": 5.117130085328486e+16,
159
  "train_batch_size": 4,
160
  "trial_name": null,
161
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9fea473a216aa9aa4b033031c9e8f1485df505e03a5b11e6cee03b585b064ee7
3
  size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:88aa6844593b382092effed6f9be95b0657dbbe8f2c35be0770510ab0bddcbb6
3
  size 5048