simonmok commited on
Commit
9595574
1 Parent(s): a354e88

Training in progress, step 2226

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:983ee01fd4a13d5b53525ad13e56f7ba0c9935604ee872f9db3a1c44b97fc8bb
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bca72d2dc92e78fa7483fdc3c99254ad247da91be97621b7f3ec3ff082e8e56
3
  size 268290900
run-1/checkpoint-2226/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d82d1662482298b0235cc9e0a73a2edbadda394622ccfe4ebed5350e5ab731c0
3
  size 268290900
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bca72d2dc92e78fa7483fdc3c99254ad247da91be97621b7f3ec3ff082e8e56
3
  size 268290900
run-1/checkpoint-2226/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a62141f53b06718507014cf92f0cfae562e7df1c30dcc5b57c2793fd757c932
3
  size 536643898
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc77bf18a7d7c1e219546b5c50e37a24ad4013822dd9b310b06c18f3179a6301
3
  size 536643898
run-1/checkpoint-2226/trainer_state.json CHANGED
@@ -8,94 +8,115 @@
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
 
 
 
 
 
 
 
11
  {
12
  "epoch": 1.0,
13
- "eval_accuracy": 0.6364516129032258,
14
- "eval_loss": 0.2942647337913513,
15
- "eval_runtime": 5.7495,
16
- "eval_samples_per_second": 539.181,
17
- "eval_steps_per_second": 11.305,
18
  "step": 318
19
  },
20
  {
21
- "epoch": 1.5723270440251573,
22
- "grad_norm": 0.6808456778526306,
23
- "learning_rate": 1.550763701707098e-05,
24
- "loss": 0.4631,
25
- "step": 500
26
  },
27
  {
28
  "epoch": 2.0,
29
- "eval_accuracy": 0.837741935483871,
30
- "eval_loss": 0.12298235297203064,
31
- "eval_runtime": 5.982,
32
- "eval_samples_per_second": 518.218,
33
- "eval_steps_per_second": 10.866,
34
  "step": 636
35
  },
 
 
 
 
 
 
 
36
  {
37
  "epoch": 3.0,
38
- "eval_accuracy": 0.886774193548387,
39
- "eval_loss": 0.07365494966506958,
40
- "eval_runtime": 5.4854,
41
- "eval_samples_per_second": 565.141,
42
- "eval_steps_per_second": 11.85,
43
  "step": 954
44
  },
45
  {
46
- "epoch": 3.1446540880503147,
47
- "grad_norm": 0.7211179137229919,
48
- "learning_rate": 1.101527403414196e-05,
49
- "loss": 0.144,
50
- "step": 1000
51
  },
52
  {
53
  "epoch": 4.0,
54
- "eval_accuracy": 0.9006451612903226,
55
- "eval_loss": 0.05463062971830368,
56
- "eval_runtime": 5.5595,
57
- "eval_samples_per_second": 557.599,
58
- "eval_steps_per_second": 11.692,
59
  "step": 1272
60
  },
61
  {
62
- "epoch": 4.716981132075472,
63
- "grad_norm": 0.4047314524650574,
64
- "learning_rate": 6.522911051212939e-06,
65
- "loss": 0.0842,
66
- "step": 1500
67
  },
68
  {
69
  "epoch": 5.0,
70
- "eval_accuracy": 0.917741935483871,
71
- "eval_loss": 0.044852741062641144,
72
- "eval_runtime": 5.832,
73
- "eval_samples_per_second": 531.549,
74
- "eval_steps_per_second": 11.145,
75
  "step": 1590
76
  },
 
 
 
 
 
 
 
77
  {
78
  "epoch": 6.0,
79
- "eval_accuracy": 0.9193548387096774,
80
- "eval_loss": 0.04065420478582382,
81
- "eval_runtime": 5.4964,
82
- "eval_samples_per_second": 564.004,
83
- "eval_steps_per_second": 11.826,
84
  "step": 1908
85
  },
86
  {
87
- "epoch": 6.289308176100629,
88
- "grad_norm": 0.3561893105506897,
89
- "learning_rate": 2.0305480682839176e-06,
90
- "loss": 0.0669,
91
- "step": 2000
92
  }
93
  ],
94
- "logging_steps": 500,
95
  "max_steps": 2226,
96
  "num_input_tokens_seen": 0,
97
  "num_train_epochs": 7,
98
- "save_steps": 500,
99
  "stateful_callbacks": {
100
  "TrainerControl": {
101
  "args": {
@@ -108,12 +129,12 @@
108
  "attributes": {}
109
  }
110
  },
111
- "total_flos": 520991326672152.0,
112
  "train_batch_size": 48,
113
  "trial_name": null,
114
  "trial_params": {
115
- "alpha": 0.27523519283703446,
116
  "num_train_epochs": 7,
117
- "temperature": 3
118
  }
119
  }
 
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
+ {
12
+ "epoch": 0.9968553459119497,
13
+ "grad_norm": 0.7022855281829834,
14
+ "learning_rate": 1.7151841868823e-05,
15
+ "loss": 0.4035,
16
+ "step": 317
17
+ },
18
  {
19
  "epoch": 1.0,
20
+ "eval_accuracy": 0.59,
21
+ "eval_loss": 0.2041669636964798,
22
+ "eval_runtime": 5.3525,
23
+ "eval_samples_per_second": 579.169,
24
+ "eval_steps_per_second": 12.144,
25
  "step": 318
26
  },
27
  {
28
+ "epoch": 1.9937106918238994,
29
+ "grad_norm": 0.4554065763950348,
30
+ "learning_rate": 1.4303683737646003e-05,
31
+ "loss": 0.1712,
32
+ "step": 634
33
  },
34
  {
35
  "epoch": 2.0,
36
+ "eval_accuracy": 0.8219354838709677,
37
+ "eval_loss": 0.10027384757995605,
38
+ "eval_runtime": 5.4479,
39
+ "eval_samples_per_second": 569.027,
40
+ "eval_steps_per_second": 11.931,
41
  "step": 636
42
  },
43
+ {
44
+ "epoch": 2.990566037735849,
45
+ "grad_norm": 0.49464017152786255,
46
+ "learning_rate": 1.1455525606469004e-05,
47
+ "loss": 0.1083,
48
+ "step": 951
49
+ },
50
  {
51
  "epoch": 3.0,
52
+ "eval_accuracy": 0.8780645161290322,
53
+ "eval_loss": 0.06833519041538239,
54
+ "eval_runtime": 5.4064,
55
+ "eval_samples_per_second": 573.399,
56
+ "eval_steps_per_second": 12.023,
57
  "step": 954
58
  },
59
  {
60
+ "epoch": 3.9874213836477987,
61
+ "grad_norm": 0.3309425115585327,
62
+ "learning_rate": 8.607367475292004e-06,
63
+ "loss": 0.0827,
64
+ "step": 1268
65
  },
66
  {
67
  "epoch": 4.0,
68
+ "eval_accuracy": 0.8935483870967742,
69
+ "eval_loss": 0.053802672773599625,
70
+ "eval_runtime": 5.4522,
71
+ "eval_samples_per_second": 568.582,
72
+ "eval_steps_per_second": 11.922,
73
  "step": 1272
74
  },
75
  {
76
+ "epoch": 4.984276729559748,
77
+ "grad_norm": 0.2727065682411194,
78
+ "learning_rate": 5.759209344115006e-06,
79
+ "loss": 0.0698,
80
+ "step": 1585
81
  },
82
  {
83
  "epoch": 5.0,
84
+ "eval_accuracy": 0.9025806451612903,
85
+ "eval_loss": 0.04560532420873642,
86
+ "eval_runtime": 5.4111,
87
+ "eval_samples_per_second": 572.892,
88
+ "eval_steps_per_second": 12.012,
89
  "step": 1590
90
  },
91
+ {
92
+ "epoch": 5.981132075471698,
93
+ "grad_norm": 0.33385205268859863,
94
+ "learning_rate": 2.911051212938006e-06,
95
+ "loss": 0.063,
96
+ "step": 1902
97
+ },
98
  {
99
  "epoch": 6.0,
100
+ "eval_accuracy": 0.9070967741935484,
101
+ "eval_loss": 0.04146786779165268,
102
+ "eval_runtime": 5.4202,
103
+ "eval_samples_per_second": 571.939,
104
+ "eval_steps_per_second": 11.992,
105
  "step": 1908
106
  },
107
  {
108
+ "epoch": 6.977987421383648,
109
+ "grad_norm": 0.2678754925727844,
110
+ "learning_rate": 6.289308176100629e-08,
111
+ "loss": 0.0595,
112
+ "step": 2219
113
  }
114
  ],
115
+ "logging_steps": 317,
116
  "max_steps": 2226,
117
  "num_input_tokens_seen": 0,
118
  "num_train_epochs": 7,
119
+ "save_steps": 1000000000.0,
120
  "stateful_callbacks": {
121
  "TrainerControl": {
122
  "args": {
 
129
  "attributes": {}
130
  }
131
  },
132
+ "total_flos": 578219881979544.0,
133
  "train_batch_size": 48,
134
  "trial_name": null,
135
  "trial_params": {
136
+ "alpha": 0.04281472072400683,
137
  "num_train_epochs": 7,
138
+ "temperature": 10
139
  }
140
  }
run-1/checkpoint-2226/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c50be35dabf57488910ecef52dcf0c3f00eb115989d551ba66b159364e8ee11
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09cec54d629ea3a2934e8f55ca9c467045baab73f61f5602fb0f2ef26a668bb7
3
  size 5368
runs/Nov27_11-17-59_dd9f37d8f6cd/events.out.tfevents.1732707419.dd9f37d8f6cd.692.2 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69bebb5a2f4d39178a286c1c4cb9da32fc80a628cb5ec041d17d7bcfad08f90b
3
+ size 16661
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f276566b7a36317209ad1b0771b8b02f07cd65c76cc8c09aee24982f6ccda11
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09cec54d629ea3a2934e8f55ca9c467045baab73f61f5602fb0f2ef26a668bb7
3
  size 5368