seanfarrell commited on
Commit
1e89ae9
·
verified ·
1 Parent(s): 1af7039

Upload folder using huggingface_hub

Browse files
Files changed (6) hide show
  1. model.safetensors +1 -1
  2. optimizer.pt +1 -1
  3. rng_state.pth +1 -1
  4. scheduler.pt +1 -1
  5. trainer_state.json +70 -63
  6. training_args.bin +1 -1
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:474b993621dcf0bddfe1abd8c429bde1a78e3cd28d23183ebdc1cfb5f798f9fb
3
  size 430935892
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a65f77de1eb62cec0f629cb19463346793fec80e1454deff0266de4d5ef2c55
3
  size 430935892
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f3373cbddd58f49d7ee1bee615e1e87f0d764904fd125e2f94c5c1ca6b82737
3
  size 861991482
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e1381267deba7fff5c23ccbe019de55935969197d9c8386d0b1e2e7bdb75daf0
3
  size 861991482
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:df8904498d0ea4278d68a668aa846964ce80df1ff9136c37871ee274669dc57f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d79b618e9c3de1ee0de1ae85c648b3b80a40cb85b291159a42ac076a903c57e
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4e9de563a795946068ee2943aa801ab2757eb65d8f36f8743830ce3a1cd4b5ce
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0e65eb7d8463989e20eaa15c5cb44e7258208d340b08446887fbe12fa3673349
3
  size 1064
trainer_state.json CHANGED
@@ -1,100 +1,107 @@
1
  {
2
- "best_global_step": 609,
3
- "best_metric": 0.010163484141230583,
4
- "best_model_checkpoint": "projects/PetBERT_annonymisation/data/augment/arrow_4/checkpoint-609",
5
  "epoch": 4.0,
6
  "eval_steps": 500,
7
- "global_step": 2436,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 0,
14
- "eval_f1": 0.0125291815966952,
15
- "eval_loss": 2.3777594566345215,
16
- "eval_precision": 0.09023052095482559,
17
- "eval_recall": 0.0881670915774344,
18
- "eval_runtime": 12.1157,
19
- "eval_samples_per_second": 136.764,
20
- "eval_steps_per_second": 4.292,
21
  "step": 0
22
  },
23
  {
24
- "epoch": 0.8210180623973727,
25
- "grad_norm": 0.28877386450767517,
26
- "learning_rate": 4.9959031198686376e-05,
27
  "loss": 0.0484,
28
  "step": 500
29
  },
30
  {
31
  "epoch": 1.0,
32
- "eval_f1": 0.9208551148897809,
33
- "eval_loss": 0.010163484141230583,
34
- "eval_precision": 0.9173087088894764,
35
- "eval_recall": 0.9271621325936177,
36
- "eval_runtime": 12.5384,
37
- "eval_samples_per_second": 132.154,
38
- "eval_steps_per_second": 4.147,
39
- "step": 609
40
  },
41
  {
42
- "epoch": 1.6420361247947455,
43
- "grad_norm": 0.3570762574672699,
44
- "learning_rate": 4.99179802955665e-05,
45
- "loss": 0.0065,
46
  "step": 1000
47
  },
48
  {
49
  "epoch": 2.0,
50
- "eval_f1": 0.9295632654722351,
51
- "eval_loss": 0.010717815719544888,
52
- "eval_precision": 0.9179765798631134,
53
- "eval_recall": 0.9419352898275708,
54
- "eval_runtime": 12.5527,
55
- "eval_samples_per_second": 132.004,
56
- "eval_steps_per_second": 4.143,
57
- "step": 1218
58
  },
59
  {
60
- "epoch": 2.4630541871921183,
61
- "grad_norm": 0.006953490898013115,
62
- "learning_rate": 4.987692939244664e-05,
63
- "loss": 0.0033,
64
  "step": 1500
65
  },
 
 
 
 
 
 
 
66
  {
67
  "epoch": 3.0,
68
- "eval_f1": 0.9048958394614014,
69
- "eval_loss": 0.013671835884451866,
70
- "eval_precision": 0.8760752650167597,
71
- "eval_recall": 0.9419362439194514,
72
- "eval_runtime": 12.5648,
73
- "eval_samples_per_second": 131.877,
74
- "eval_steps_per_second": 4.139,
75
- "step": 1827
76
  },
77
  {
78
- "epoch": 3.284072249589491,
79
- "grad_norm": 0.03194739297032356,
80
- "learning_rate": 4.983587848932677e-05,
81
- "loss": 0.0024,
82
- "step": 2000
83
  },
84
  {
85
  "epoch": 4.0,
86
- "eval_f1": 0.926800098647886,
87
- "eval_loss": 0.013943095691502094,
88
- "eval_precision": 0.9002957711004475,
89
- "eval_recall": 0.9554729790053276,
90
- "eval_runtime": 12.6182,
91
- "eval_samples_per_second": 131.319,
92
- "eval_steps_per_second": 4.121,
93
- "step": 2436
94
  }
95
  ],
96
  "logging_steps": 500,
97
- "max_steps": 609000,
98
  "num_input_tokens_seen": 0,
99
  "num_train_epochs": 1000,
100
  "save_steps": 500,
@@ -105,7 +112,7 @@
105
  "early_stopping_threshold": 0.0
106
  },
107
  "attributes": {
108
- "early_stopping_patience_counter": 3
109
  }
110
  },
111
  "TrainerControl": {
@@ -114,12 +121,12 @@
114
  "should_evaluate": false,
115
  "should_log": false,
116
  "should_save": true,
117
- "should_training_stop": true
118
  },
119
  "attributes": {}
120
  }
121
  },
122
- "total_flos": 2.035249251765043e+16,
123
  "train_batch_size": 32,
124
  "trial_name": null,
125
  "trial_params": null
 
1
  {
2
+ "best_global_step": 1376,
3
+ "best_metric": 0.012005209922790527,
4
+ "best_model_checkpoint": "projects/PetHarbor/data/augment/arrow_v5/model/checkpoint-1376",
5
  "epoch": 4.0,
6
  "eval_steps": 500,
7
+ "global_step": 2752,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
11
  "log_history": [
12
  {
13
  "epoch": 0,
14
+ "eval_f1": 0.011655798995725575,
15
+ "eval_loss": 2.4493634700775146,
16
+ "eval_precision": 0.09168997185092281,
17
+ "eval_recall": 0.05262704226436905,
18
+ "eval_runtime": 12.3437,
19
+ "eval_samples_per_second": 134.238,
20
+ "eval_steps_per_second": 4.213,
21
  "step": 0
22
  },
23
  {
24
+ "epoch": 0.7267441860465116,
25
+ "grad_norm": 0.2848288416862488,
26
+ "learning_rate": 4.996373546511628e-05,
27
  "loss": 0.0484,
28
  "step": 500
29
  },
30
  {
31
  "epoch": 1.0,
32
+ "eval_f1": 0.8927075594912149,
33
+ "eval_loss": 0.013240625150501728,
34
+ "eval_precision": 0.8758418145856814,
35
+ "eval_recall": 0.9170635030650058,
36
+ "eval_runtime": 12.5123,
37
+ "eval_samples_per_second": 132.429,
38
+ "eval_steps_per_second": 4.156,
39
+ "step": 688
40
  },
41
  {
42
+ "epoch": 1.4534883720930232,
43
+ "grad_norm": 0.19568854570388794,
44
+ "learning_rate": 4.992739825581396e-05,
45
+ "loss": 0.0071,
46
  "step": 1000
47
  },
48
  {
49
  "epoch": 2.0,
50
+ "eval_f1": 0.9068541971529176,
51
+ "eval_loss": 0.012005209922790527,
52
+ "eval_precision": 0.9104600842733132,
53
+ "eval_recall": 0.9040660692132211,
54
+ "eval_runtime": 12.5166,
55
+ "eval_samples_per_second": 132.384,
56
+ "eval_steps_per_second": 4.154,
57
+ "step": 1376
58
  },
59
  {
60
+ "epoch": 2.1802325581395348,
61
+ "grad_norm": 0.035110026597976685,
62
+ "learning_rate": 4.989106104651163e-05,
63
+ "loss": 0.0038,
64
  "step": 1500
65
  },
66
+ {
67
+ "epoch": 2.9069767441860463,
68
+ "grad_norm": 0.008144177496433258,
69
+ "learning_rate": 4.985472383720931e-05,
70
+ "loss": 0.0025,
71
+ "step": 2000
72
+ },
73
  {
74
  "epoch": 3.0,
75
+ "eval_f1": 0.9162870645176323,
76
+ "eval_loss": 0.01785987988114357,
77
+ "eval_precision": 0.9166192713919116,
78
+ "eval_recall": 0.9175617135937298,
79
+ "eval_runtime": 12.5589,
80
+ "eval_samples_per_second": 131.939,
81
+ "eval_steps_per_second": 4.14,
82
+ "step": 2064
83
  },
84
  {
85
+ "epoch": 3.633720930232558,
86
+ "grad_norm": 0.44367948174476624,
87
+ "learning_rate": 4.981838662790698e-05,
88
+ "loss": 0.0021,
89
+ "step": 2500
90
  },
91
  {
92
  "epoch": 4.0,
93
+ "eval_f1": 0.9225461672714073,
94
+ "eval_loss": 0.016809402033686638,
95
+ "eval_precision": 0.920670658749496,
96
+ "eval_recall": 0.9248274530000097,
97
+ "eval_runtime": 12.5585,
98
+ "eval_samples_per_second": 131.942,
99
+ "eval_steps_per_second": 4.141,
100
+ "step": 2752
101
  }
102
  ],
103
  "logging_steps": 500,
104
+ "max_steps": 688000,
105
  "num_input_tokens_seen": 0,
106
  "num_train_epochs": 1000,
107
  "save_steps": 500,
 
112
  "early_stopping_threshold": 0.0
113
  },
114
  "attributes": {
115
+ "early_stopping_patience_counter": 2
116
  }
117
  },
118
  "TrainerControl": {
 
121
  "should_evaluate": false,
122
  "should_log": false,
123
  "should_save": true,
124
+ "should_training_stop": false
125
  },
126
  "attributes": {}
127
  }
128
  },
129
+ "total_flos": 2.2993895044079616e+16,
130
  "train_batch_size": 32,
131
  "trial_name": null,
132
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10d352a09e2aecd9422bdcc1bf58522262d82cd9de793273e937f615b15344c1
3
  size 5304
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18049da023690507d75e521ee5b9996b1c940ec5354febc2d3364798461dae1e
3
  size 5304