ozidan commited on
Commit
f0b526a
·
verified ·
1 Parent(s): aaad856

Upload folder using huggingface_hub

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f5cefe576b6dc625831cf98ded0f30d5ab46b679aa44275ab0dbfb7d901449c
3
  size 664589192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0a5fbd69e85d1f171059fe74f946b082297f5f3dcebd4aca847806acb3db821f
3
  size 664589192
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:48dfd27bde15e80eb98cdd2c5ba92d706502d0b25f6e76ff3f46d9990b888696
3
  size 1329387626
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8437ce730c286a86c3621616df89ccd7634ef635d1c823d312df8131eec99ec
3
  size 1329387626
trainer_state.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "best_global_step": 160,
3
- "best_metric": 0.7793427230046949,
4
  "best_model_checkpoint": "/content/drive/MyDrive/data/models/jigsaw/decoder_with_classification_head/checkpoint-160",
5
  "epoch": 1.5714285714285714,
6
  "eval_steps": 20,
@@ -11,130 +11,130 @@
11
  "log_history": [
12
  {
13
  "epoch": 0.19704433497536947,
14
- "grad_norm": 125.02799224853516,
15
  "learning_rate": 9.068627450980392e-06,
16
- "loss": 1.7878,
17
  "step": 20
18
  },
19
  {
20
  "epoch": 0.19704433497536947,
21
- "eval_f1": 0.7181467181467182,
22
- "eval_loss": 1.0732333660125732,
23
- "eval_runtime": 4.0898,
24
- "eval_samples_per_second": 49.636,
25
- "eval_steps_per_second": 6.357,
26
  "step": 20
27
  },
28
  {
29
  "epoch": 0.39408866995073893,
30
- "grad_norm": 26.702146530151367,
31
  "learning_rate": 8.088235294117648e-06,
32
- "loss": 1.9092,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.39408866995073893,
37
- "eval_f1": 0.719626168224299,
38
- "eval_loss": 0.733646810054779,
39
- "eval_runtime": 4.0893,
40
- "eval_samples_per_second": 49.642,
41
- "eval_steps_per_second": 6.358,
42
  "step": 40
43
  },
44
  {
45
  "epoch": 0.5911330049261084,
46
- "grad_norm": 57.312957763671875,
47
  "learning_rate": 7.107843137254903e-06,
48
- "loss": 1.3607,
49
  "step": 60
50
  },
51
  {
52
  "epoch": 0.5911330049261084,
53
- "eval_f1": 0.700507614213198,
54
- "eval_loss": 0.6013323664665222,
55
- "eval_runtime": 5.2387,
56
- "eval_samples_per_second": 38.75,
57
- "eval_steps_per_second": 4.963,
58
  "step": 60
59
  },
60
  {
61
  "epoch": 0.7881773399014779,
62
- "grad_norm": 49.63467788696289,
63
  "learning_rate": 6.1274509803921575e-06,
64
- "loss": 1.2099,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 0.7881773399014779,
69
- "eval_f1": 0.6480446927374302,
70
- "eval_loss": 0.6042909026145935,
71
- "eval_runtime": 4.112,
72
- "eval_samples_per_second": 49.368,
73
- "eval_steps_per_second": 6.323,
74
  "step": 80
75
  },
76
  {
77
  "epoch": 0.9852216748768473,
78
- "grad_norm": 104.98243713378906,
79
  "learning_rate": 5.147058823529411e-06,
80
- "loss": 1.2787,
81
  "step": 100
82
  },
83
  {
84
  "epoch": 0.9852216748768473,
85
- "eval_f1": 0.7425742574257426,
86
- "eval_loss": 0.5480031371116638,
87
- "eval_runtime": 4.2347,
88
- "eval_samples_per_second": 47.937,
89
- "eval_steps_per_second": 6.14,
90
  "step": 100
91
  },
92
  {
93
  "epoch": 1.1773399014778325,
94
- "grad_norm": 30.004165649414062,
95
  "learning_rate": 4.166666666666667e-06,
96
- "loss": 0.6028,
97
  "step": 120
98
  },
99
  {
100
  "epoch": 1.1773399014778325,
101
- "eval_f1": 0.7647058823529411,
102
- "eval_loss": 0.5726549029350281,
103
- "eval_runtime": 4.383,
104
- "eval_samples_per_second": 46.316,
105
- "eval_steps_per_second": 5.932,
106
  "step": 120
107
  },
108
  {
109
  "epoch": 1.374384236453202,
110
- "grad_norm": 49.051124572753906,
111
  "learning_rate": 3.1862745098039216e-06,
112
- "loss": 0.5868,
113
  "step": 140
114
  },
115
  {
116
  "epoch": 1.374384236453202,
117
- "eval_f1": 0.74,
118
- "eval_loss": 0.5683486461639404,
119
- "eval_runtime": 4.1072,
120
- "eval_samples_per_second": 49.425,
121
- "eval_steps_per_second": 6.33,
122
  "step": 140
123
  },
124
  {
125
  "epoch": 1.5714285714285714,
126
- "grad_norm": 27.854595184326172,
127
  "learning_rate": 2.2058823529411767e-06,
128
- "loss": 0.4944,
129
  "step": 160
130
  },
131
  {
132
  "epoch": 1.5714285714285714,
133
- "eval_f1": 0.7793427230046949,
134
- "eval_loss": 0.6091228127479553,
135
- "eval_runtime": 4.1403,
136
- "eval_samples_per_second": 49.031,
137
- "eval_steps_per_second": 6.28,
138
  "step": 160
139
  }
140
  ],
 
1
  {
2
  "best_global_step": 160,
3
+ "best_metric": 0.7714285714285715,
4
  "best_model_checkpoint": "/content/drive/MyDrive/data/models/jigsaw/decoder_with_classification_head/checkpoint-160",
5
  "epoch": 1.5714285714285714,
6
  "eval_steps": 20,
 
11
  "log_history": [
12
  {
13
  "epoch": 0.19704433497536947,
14
+ "grad_norm": 77.9209213256836,
15
  "learning_rate": 9.068627450980392e-06,
16
+ "loss": 1.9571,
17
  "step": 20
18
  },
19
  {
20
  "epoch": 0.19704433497536947,
21
+ "eval_f1": 0.6872246696035242,
22
+ "eval_loss": 0.8138803839683533,
23
+ "eval_runtime": 4.1302,
24
+ "eval_samples_per_second": 49.15,
25
+ "eval_steps_per_second": 6.295,
26
  "step": 20
27
  },
28
  {
29
  "epoch": 0.39408866995073893,
30
+ "grad_norm": 82.646240234375,
31
  "learning_rate": 8.088235294117648e-06,
32
+ "loss": 1.5111,
33
  "step": 40
34
  },
35
  {
36
  "epoch": 0.39408866995073893,
37
+ "eval_f1": 0.6818181818181818,
38
+ "eval_loss": 0.8300531506538391,
39
+ "eval_runtime": 4.1106,
40
+ "eval_samples_per_second": 49.385,
41
+ "eval_steps_per_second": 6.325,
42
  "step": 40
43
  },
44
  {
45
  "epoch": 0.5911330049261084,
46
+ "grad_norm": 73.47966766357422,
47
  "learning_rate": 7.107843137254903e-06,
48
+ "loss": 1.4946,
49
  "step": 60
50
  },
51
  {
52
  "epoch": 0.5911330049261084,
53
+ "eval_f1": 0.6666666666666666,
54
+ "eval_loss": 0.6204099059104919,
55
+ "eval_runtime": 4.1289,
56
+ "eval_samples_per_second": 49.166,
57
+ "eval_steps_per_second": 6.297,
58
  "step": 60
59
  },
60
  {
61
  "epoch": 0.7881773399014779,
62
+ "grad_norm": 71.3563232421875,
63
  "learning_rate": 6.1274509803921575e-06,
64
+ "loss": 1.2645,
65
  "step": 80
66
  },
67
  {
68
  "epoch": 0.7881773399014779,
69
+ "eval_f1": 0.5766871165644172,
70
+ "eval_loss": 0.6484544277191162,
71
+ "eval_runtime": 4.0732,
72
+ "eval_samples_per_second": 49.837,
73
+ "eval_steps_per_second": 6.383,
74
  "step": 80
75
  },
76
  {
77
  "epoch": 0.9852216748768473,
78
+ "grad_norm": 91.21923065185547,
79
  "learning_rate": 5.147058823529411e-06,
80
+ "loss": 1.2083,
81
  "step": 100
82
  },
83
  {
84
  "epoch": 0.9852216748768473,
85
+ "eval_f1": 0.6844919786096256,
86
+ "eval_loss": 0.5715163946151733,
87
+ "eval_runtime": 4.1066,
88
+ "eval_samples_per_second": 49.433,
89
+ "eval_steps_per_second": 6.331,
90
  "step": 100
91
  },
92
  {
93
  "epoch": 1.1773399014778325,
94
+ "grad_norm": 25.411664962768555,
95
  "learning_rate": 4.166666666666667e-06,
96
+ "loss": 0.5234,
97
  "step": 120
98
  },
99
  {
100
  "epoch": 1.1773399014778325,
101
+ "eval_f1": 0.7523809523809524,
102
+ "eval_loss": 0.5852020978927612,
103
+ "eval_runtime": 4.1413,
104
+ "eval_samples_per_second": 49.019,
105
+ "eval_steps_per_second": 6.278,
106
  "step": 120
107
  },
108
  {
109
  "epoch": 1.374384236453202,
110
+ "grad_norm": 36.53538513183594,
111
  "learning_rate": 3.1862745098039216e-06,
112
+ "loss": 0.6668,
113
  "step": 140
114
  },
115
  {
116
  "epoch": 1.374384236453202,
117
+ "eval_f1": 0.7227722772277227,
118
+ "eval_loss": 0.5720672011375427,
119
+ "eval_runtime": 4.1311,
120
+ "eval_samples_per_second": 49.139,
121
+ "eval_steps_per_second": 6.294,
122
  "step": 140
123
  },
124
  {
125
  "epoch": 1.5714285714285714,
126
+ "grad_norm": 27.936038970947266,
127
  "learning_rate": 2.2058823529411767e-06,
128
+ "loss": 0.5102,
129
  "step": 160
130
  },
131
  {
132
  "epoch": 1.5714285714285714,
133
+ "eval_f1": 0.7714285714285715,
134
+ "eval_loss": 0.608908474445343,
135
+ "eval_runtime": 4.1227,
136
+ "eval_samples_per_second": 49.239,
137
+ "eval_steps_per_second": 6.307,
138
  "step": 160
139
  }
140
  ],
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99742976896f8ed6ae6659cef01de9defb910fe869ff938dc78c2198ca7b1964
3
  size 5969
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:25a35e5b4d99ee56e3f3b0f9604cb520ca8d58557147938cf15c91dd6372b784
3
  size 5969