edbeeching HF staff commited on
Commit
90e0792
1 Parent(s): 0fe8392

Model save

Browse files
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 0.67,
3
- "eval_loss": 0.9324473142623901,
4
- "eval_runtime": 332.1457,
5
  "eval_samples": 23110,
6
- "eval_samples_per_second": 69.578,
7
- "eval_steps_per_second": 0.545,
8
- "train_loss": 0.9717074483633041,
9
- "train_runtime": 11641.8542,
10
  "train_samples": 207865,
11
- "train_samples_per_second": 17.855,
12
  "train_steps_per_second": 0.035
13
  }
 
1
  {
2
  "epoch": 0.67,
3
+ "eval_loss": 0.932437002658844,
4
+ "eval_runtime": 331.0457,
5
  "eval_samples": 23110,
6
+ "eval_samples_per_second": 69.809,
7
+ "eval_steps_per_second": 0.547,
8
+ "train_loss": 0.9717322877224754,
9
+ "train_runtime": 11645.5121,
10
  "train_samples": 207865,
11
+ "train_samples_per_second": 17.849,
12
  "train_steps_per_second": 0.035
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0.67,
3
- "eval_loss": 0.9324473142623901,
4
- "eval_runtime": 332.1457,
5
  "eval_samples": 23110,
6
- "eval_samples_per_second": 69.578,
7
- "eval_steps_per_second": 0.545
8
  }
 
1
  {
2
  "epoch": 0.67,
3
+ "eval_loss": 0.932437002658844,
4
+ "eval_runtime": 331.0457,
5
  "eval_samples": 23110,
6
+ "eval_samples_per_second": 69.809,
7
+ "eval_steps_per_second": 0.547
8
  }
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:680b68a743e98d28395a9d4de52faa2e9f0903ac1bd00e2da3e3a86cd6d2b30e
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68b1e135ad66cee90fef5c9335e8c80b8e60b16254ff5f4e88d3369ebcce96a4
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:af63d2ee6754e3ee87e77fe640326da11423269bd2ad9bf2bfebafb1d19ed797
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8df5a386b05f0393662aaa5d39d8dd052a22a366fe6f5cd42a0c7bc940898d6e
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d849ec49660cbf358f514b3da3e123ef0480f7eecd76da2098129ee9e97cc58b
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:202c8e90c2ac2219a17c1bce35622a063977658808c635d326433237022d2ea3
3
  size 4540516344
runs/Nov10_00-16-16_ip-26-0-155-187/events.out.tfevents.1699575456.ip-26-0-155-187.230765.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3189372cc8c6459ee460d3d31cbaddfb99a40696a85cc9050a23dea2a8e6a339
3
+ size 13430
runs/Nov10_00-16-16_ip-26-0-155-187/events.out.tfevents.1699587432.ip-26-0-155-187.230765.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2868aa7d42743dcea1cc11cb77830e050620d5fc59cb3c0b74befa9a82dd81e
3
+ size 359
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0.67,
3
- "train_loss": 0.9717074483633041,
4
- "train_runtime": 11641.8542,
5
  "train_samples": 207865,
6
- "train_samples_per_second": 17.855,
7
  "train_steps_per_second": 0.035
8
  }
 
1
  {
2
  "epoch": 0.67,
3
+ "train_loss": 0.9717322877224754,
4
+ "train_runtime": 11645.5121,
5
  "train_samples": 207865,
6
+ "train_samples_per_second": 17.849,
7
  "train_steps_per_second": 0.035
8
  }
trainer_state.json CHANGED
@@ -17,25 +17,25 @@
17
  {
18
  "epoch": 0.01,
19
  "learning_rate": 1.999251652147735e-05,
20
- "loss": 1.6998,
21
  "step": 5
22
  },
23
  {
24
  "epoch": 0.02,
25
  "learning_rate": 1.997007728639956e-05,
26
- "loss": 1.1491,
27
  "step": 10
28
  },
29
  {
30
  "epoch": 0.04,
31
  "learning_rate": 1.9932715879473385e-05,
32
- "loss": 1.0708,
33
  "step": 15
34
  },
35
  {
36
  "epoch": 0.05,
37
  "learning_rate": 1.9880488219356086e-05,
38
- "loss": 1.0485,
39
  "step": 20
40
  },
41
  {
@@ -59,7 +59,7 @@
59
  {
60
  "epoch": 0.1,
61
  "learning_rate": 1.9524809490566878e-05,
62
- "loss": 0.9806,
63
  "step": 40
64
  },
65
  {
@@ -71,7 +71,7 @@
71
  {
72
  "epoch": 0.12,
73
  "learning_rate": 1.926084840336821e-05,
74
- "loss": 0.9815,
75
  "step": 50
76
  },
77
  {
@@ -113,7 +113,7 @@
113
  {
114
  "epoch": 0.21,
115
  "learning_rate": 1.791386494010081e-05,
116
- "loss": 0.9643,
117
  "step": 85
118
  },
119
  {
@@ -125,7 +125,7 @@
125
  {
126
  "epoch": 0.23,
127
  "learning_rate": 1.7417625312098453e-05,
128
- "loss": 0.9443,
129
  "step": 95
130
  },
131
  {
@@ -143,13 +143,13 @@
143
  {
144
  "epoch": 0.27,
145
  "learning_rate": 1.659103377877423e-05,
146
- "loss": 0.9497,
147
  "step": 110
148
  },
149
  {
150
  "epoch": 0.28,
151
  "learning_rate": 1.629520819706912e-05,
152
- "loss": 0.9465,
153
  "step": 115
154
  },
155
  {
@@ -340,9 +340,9 @@
340
  },
341
  {
342
  "epoch": 0.67,
343
- "eval_loss": 0.9322993755340576,
344
- "eval_runtime": 337.3516,
345
- "eval_samples_per_second": 68.504,
346
  "eval_steps_per_second": 0.537,
347
  "step": 272
348
  },
@@ -350,9 +350,9 @@
350
  "epoch": 0.67,
351
  "step": 272,
352
  "total_flos": 455322233733120.0,
353
- "train_loss": 0.9717074483633041,
354
- "train_runtime": 11641.8542,
355
- "train_samples_per_second": 17.855,
356
  "train_steps_per_second": 0.035
357
  }
358
  ],
 
17
  {
18
  "epoch": 0.01,
19
  "learning_rate": 1.999251652147735e-05,
20
+ "loss": 1.6995,
21
  "step": 5
22
  },
23
  {
24
  "epoch": 0.02,
25
  "learning_rate": 1.997007728639956e-05,
26
+ "loss": 1.1502,
27
  "step": 10
28
  },
29
  {
30
  "epoch": 0.04,
31
  "learning_rate": 1.9932715879473385e-05,
32
+ "loss": 1.0714,
33
  "step": 15
34
  },
35
  {
36
  "epoch": 0.05,
37
  "learning_rate": 1.9880488219356086e-05,
38
+ "loss": 1.0487,
39
  "step": 20
40
  },
41
  {
 
59
  {
60
  "epoch": 0.1,
61
  "learning_rate": 1.9524809490566878e-05,
62
+ "loss": 0.9805,
63
  "step": 40
64
  },
65
  {
 
71
  {
72
  "epoch": 0.12,
73
  "learning_rate": 1.926084840336821e-05,
74
+ "loss": 0.9814,
75
  "step": 50
76
  },
77
  {
 
113
  {
114
  "epoch": 0.21,
115
  "learning_rate": 1.791386494010081e-05,
116
+ "loss": 0.9642,
117
  "step": 85
118
  },
119
  {
 
125
  {
126
  "epoch": 0.23,
127
  "learning_rate": 1.7417625312098453e-05,
128
+ "loss": 0.9444,
129
  "step": 95
130
  },
131
  {
 
143
  {
144
  "epoch": 0.27,
145
  "learning_rate": 1.659103377877423e-05,
146
+ "loss": 0.9498,
147
  "step": 110
148
  },
149
  {
150
  "epoch": 0.28,
151
  "learning_rate": 1.629520819706912e-05,
152
+ "loss": 0.9464,
153
  "step": 115
154
  },
155
  {
 
340
  },
341
  {
342
  "epoch": 0.67,
343
+ "eval_loss": 0.9322898387908936,
344
+ "eval_runtime": 337.0923,
345
+ "eval_samples_per_second": 68.557,
346
  "eval_steps_per_second": 0.537,
347
  "step": 272
348
  },
 
350
  "epoch": 0.67,
351
  "step": 272,
352
  "total_flos": 455322233733120.0,
353
+ "train_loss": 0.9717322877224754,
354
+ "train_runtime": 11645.5121,
355
+ "train_samples_per_second": 17.849,
356
  "train_steps_per_second": 0.035
357
  }
358
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e6f1c9a0c5c8fa59d877590748d687cb1b42797067ac4868f119c9e310152f11
3
  size 5624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c5e5f1c2e60e8566b60a42c429bdeebcc5f5392a53938ef2fe0c39224dde9fc
3
  size 5624