alejndrojavier commited on
Commit
0a9c06b
1 Parent(s): c8b762e

Training in progress, epoch 2

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7a0439899010bb3a86a7f68722d69040ee0edd4cd396a46c51755f94f06e78f4
3
  size 267832560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ffe3afa822d315a1900625a7578ec708aa4cdda2c4865921057dec1afe89fe2
3
  size 267832560
tmp-checkpoint-350/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc6628694c950dd5fbe46f68ce2987c750081a50bc283657d3212f07f03f972f
3
  size 267832560
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6ffe3afa822d315a1900625a7578ec708aa4cdda2c4865921057dec1afe89fe2
3
  size 267832560
tmp-checkpoint-350/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5877be18420a112359a1a007e8e3b106c6f005a1f1e9c87fd1b5df566d875441
3
  size 535727290
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c18f5e9ccd427ec5bda64d8cbd7a0ccadfe49285f3b28f5d4b60f391d7d0e45
3
  size 535727290
tmp-checkpoint-350/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fe0e7135622ace68d7968efe929220efcc643079f367f734515eda6abfb77956
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5225d9c40c3c71998b118bcf9e76d16d005cdec607bb7aa3c0a29d56cf212f89
3
  size 14244
tmp-checkpoint-350/trainer_state.json CHANGED
@@ -11,63 +11,63 @@
11
  {
12
  "epoch": 0.29,
13
  "learning_rate": 1.0000000000000002e-06,
14
- "loss": 0.5477,
15
  "step": 50
16
  },
17
  {
18
  "epoch": 0.57,
19
  "learning_rate": 2.0000000000000003e-06,
20
- "loss": 0.5383,
21
  "step": 100
22
  },
23
  {
24
  "epoch": 0.86,
25
  "learning_rate": 3e-06,
26
- "loss": 0.5464,
27
  "step": 150
28
  },
29
  {
30
  "epoch": 1.0,
31
  "eval_accuracy": 0.7685714285714286,
32
  "eval_f1": 0.8691437802907915,
33
- "eval_loss": 0.5194858312606812,
34
- "eval_runtime": 10.6792,
35
- "eval_samples_per_second": 65.548,
36
- "eval_steps_per_second": 4.12,
37
  "step": 175
38
  },
39
  {
40
  "epoch": 1.14,
41
  "learning_rate": 4.000000000000001e-06,
42
- "loss": 0.5248,
43
  "step": 200
44
  },
45
  {
46
  "epoch": 1.43,
47
  "learning_rate": 5e-06,
48
- "loss": 0.5475,
49
  "step": 250
50
  },
51
  {
52
  "epoch": 1.71,
53
  "learning_rate": 6e-06,
54
- "loss": 0.5114,
55
  "step": 300
56
  },
57
  {
58
  "epoch": 2.0,
59
  "learning_rate": 7e-06,
60
- "loss": 0.5552,
61
  "step": 350
62
  },
63
  {
64
  "epoch": 2.0,
65
- "eval_accuracy": 0.7685714285714286,
66
- "eval_f1": 0.8691437802907915,
67
- "eval_loss": 0.525272011756897,
68
- "eval_runtime": 15.9825,
69
- "eval_samples_per_second": 43.798,
70
- "eval_steps_per_second": 2.753,
71
  "step": 350
72
  }
73
  ],
@@ -76,7 +76,7 @@
76
  "num_input_tokens_seen": 0,
77
  "num_train_epochs": 3,
78
  "save_steps": 500,
79
- "total_flos": 826596567613440.0,
80
  "train_batch_size": 16,
81
  "trial_name": null,
82
  "trial_params": null
 
11
  {
12
  "epoch": 0.29,
13
  "learning_rate": 1.0000000000000002e-06,
14
+ "loss": 0.5249,
15
  "step": 50
16
  },
17
  {
18
  "epoch": 0.57,
19
  "learning_rate": 2.0000000000000003e-06,
20
+ "loss": 0.5264,
21
  "step": 100
22
  },
23
  {
24
  "epoch": 0.86,
25
  "learning_rate": 3e-06,
26
+ "loss": 0.521,
27
  "step": 150
28
  },
29
  {
30
  "epoch": 1.0,
31
  "eval_accuracy": 0.7685714285714286,
32
  "eval_f1": 0.8691437802907915,
33
+ "eval_loss": 0.5161310434341431,
34
+ "eval_runtime": 11.1188,
35
+ "eval_samples_per_second": 62.957,
36
+ "eval_steps_per_second": 3.957,
37
  "step": 175
38
  },
39
  {
40
  "epoch": 1.14,
41
  "learning_rate": 4.000000000000001e-06,
42
+ "loss": 0.5021,
43
  "step": 200
44
  },
45
  {
46
  "epoch": 1.43,
47
  "learning_rate": 5e-06,
48
+ "loss": 0.5169,
49
  "step": 250
50
  },
51
  {
52
  "epoch": 1.71,
53
  "learning_rate": 6e-06,
54
+ "loss": 0.5231,
55
  "step": 300
56
  },
57
  {
58
  "epoch": 2.0,
59
  "learning_rate": 7e-06,
60
+ "loss": 0.5115,
61
  "step": 350
62
  },
63
  {
64
  "epoch": 2.0,
65
+ "eval_accuracy": 0.7757142857142857,
66
+ "eval_f1": 0.8726682887266829,
67
+ "eval_loss": 0.5628861784934998,
68
+ "eval_runtime": 17.6371,
69
+ "eval_samples_per_second": 39.689,
70
+ "eval_steps_per_second": 2.495,
71
  "step": 350
72
  }
73
  ],
 
76
  "num_input_tokens_seen": 0,
77
  "num_train_epochs": 3,
78
  "save_steps": 500,
79
+ "total_flos": 741817432473600.0,
80
  "train_batch_size": 16,
81
  "trial_name": null,
82
  "trial_params": null