avsolatorio commited on
Commit
689ab5c
1 Parent(s): 3ef2b9d

Training in progress, step 1000, checkpoint

Browse files
last-checkpoint/config.json CHANGED
@@ -8,184 +8,184 @@
8
  "hidden_dropout_prob": 0.1,
9
  "hidden_size": 768,
10
  "id2label": {
11
- "0": "Global Financing Facility for Women, Children and Adolescents",
12
- "1": "Competitiveness",
13
- "2": "Sustainable Communities",
14
- "3": "Education",
15
- "4": "Natural Capital",
16
- "5": "Regional Integration",
17
- "6": "Jobs and Development",
18
- "7": "Innovation and Entrepreneurship",
19
- "8": "Inclusive Cities",
20
- "9": "Jobs & Development",
21
- "10": "Transport",
22
- "11": "Sustainable Infrastructure Finance",
23
- "12": "Pandemic Preparedness and COVID-19",
24
- "13": "Taxes and Government Revenue",
25
- "14": "Labor Markets",
26
- "15": "Measuring Poverty",
27
- "16": "Food Security Update",
28
- "17": "Disability Inclusion",
29
- "18": "Agriculture and Food",
30
- "19": "Trade Facilitation and Logistics",
31
- "20": "Social Protection",
32
- "21": "One Health",
33
- "22": "Biodiversity",
34
- "23": "Social Inclusion",
35
- "24": "Credit Infrastructure",
36
- "25": "Water Supply",
37
- "26": "Early Childhood Development",
38
- "27": "Food System Jobs",
39
- "28": "Migration",
40
- "29": "Indigenous Peoples",
41
- "30": "Universal Health Coverage",
42
- "31": "Financial Sector",
43
- "32": "Procurement for Development",
44
- "33": "Inequality and Shared Prosperity",
45
- "34": "COVID-19 Hub",
46
- "35": "Poverty",
47
- "36": "Financial Stability",
48
- "37": "Digital Development",
49
- "38": "Long-Term Finance",
50
- "39": "Gas Flaring Reduction",
51
- "40": "Mining Investment and Governance Review",
52
- "41": "Small and Medium Enterprises Finance",
53
- "42": "Infrastructure",
54
- "43": "Health",
55
- "44": "Sexual Orientation and Gender Identity",
56
- "45": "Nutrition",
57
- "46": "Financial Inclusion",
58
- "47": "Fragility, Conflict, and Violence",
59
- "48": "Debt Relief",
60
- "49": "Disaster Risk Management",
61
- "50": "Water in Agriculture",
62
- "51": "Livestock and Sustainability",
63
- "52": "Global Value Chains",
64
- "53": "Competition Policy",
65
- "54": "Pollution",
66
- "55": "Urban Development",
67
- "56": "Gender",
68
- "57": "Safety Nets and Cash Transfers",
69
- "58": "Forests and Landscapes",
70
- "59": "Water Resources Management",
71
- "60": "Extractive Industries",
72
- "61": "Social Sustainability and Inclusion",
73
- "62": "Energy",
74
- "63": "Girls' Education",
75
- "64": "Environment",
76
- "65": "Marine Plastic Pollution",
77
- "66": "Education and Technology",
78
- "67": "Financial Integrity",
79
- "68": "Oceans, Fisheries, and Coastal Economies",
80
- "69": "Sanitation",
81
- "70": "Land",
82
- "71": "Higher Education",
83
- "72": "Teachers",
84
- "73": "Investment Climate",
85
- "74": "Debt",
86
- "75": "Climate Change",
87
- "76": "Trade",
88
- "77": "Skills Development",
89
- "78": "Agribusiness and Value Chains",
90
- "79": "Climate-Smart Agriculture",
91
- "80": "Pensions",
92
- "81": "Infectious diseases and Vaccines",
93
- "82": "Payment Systems",
94
- "83": "Community-Driven Development",
95
- "84": "Water",
96
- "85": "Governance",
97
- "86": "Macroeconomics"
98
  },
99
  "initializer_range": 0.02,
100
  "intermediate_size": 3072,
101
  "label2id": {
102
- "Agribusiness and Value Chains": 78,
103
- "Agriculture and Food": 18,
104
- "Biodiversity": 22,
105
- "COVID-19 Hub": 34,
106
- "Climate Change": 75,
107
- "Climate-Smart Agriculture": 79,
108
- "Community-Driven Development": 83,
109
- "Competition Policy": 53,
110
- "Competitiveness": 1,
111
- "Credit Infrastructure": 24,
112
- "Debt": 74,
113
- "Debt Relief": 48,
114
- "Digital Development": 37,
115
- "Disability Inclusion": 17,
116
- "Disaster Risk Management": 49,
117
- "Early Childhood Development": 26,
118
- "Education": 3,
119
- "Education and Technology": 66,
120
- "Energy": 62,
121
- "Environment": 64,
122
- "Extractive Industries": 60,
123
- "Financial Inclusion": 46,
124
- "Financial Integrity": 67,
125
- "Financial Sector": 31,
126
- "Financial Stability": 36,
127
- "Food Security Update": 16,
128
- "Food System Jobs": 27,
129
- "Forests and Landscapes": 58,
130
- "Fragility, Conflict, and Violence": 47,
131
- "Gas Flaring Reduction": 39,
132
- "Gender": 56,
133
- "Girls' Education": 63,
134
- "Global Financing Facility for Women, Children and Adolescents": 0,
135
- "Global Value Chains": 52,
136
- "Governance": 85,
137
- "Health": 43,
138
- "Higher Education": 71,
139
- "Inclusive Cities": 8,
140
- "Indigenous Peoples": 29,
141
- "Inequality and Shared Prosperity": 33,
142
- "Infectious diseases and Vaccines": 81,
143
- "Infrastructure": 42,
144
- "Innovation and Entrepreneurship": 7,
145
- "Investment Climate": 73,
146
- "Jobs & Development": 9,
147
- "Jobs and Development": 6,
148
- "Labor Markets": 14,
149
- "Land": 70,
150
- "Livestock and Sustainability": 51,
151
- "Long-Term Finance": 38,
152
- "Macroeconomics": 86,
153
- "Marine Plastic Pollution": 65,
154
- "Measuring Poverty": 15,
155
- "Migration": 28,
156
- "Mining Investment and Governance Review": 40,
157
- "Natural Capital": 4,
158
- "Nutrition": 45,
159
- "Oceans, Fisheries, and Coastal Economies": 68,
160
- "One Health": 21,
161
- "Pandemic Preparedness and COVID-19": 12,
162
- "Payment Systems": 82,
163
- "Pensions": 80,
164
- "Pollution": 54,
165
- "Poverty": 35,
166
- "Procurement for Development": 32,
167
- "Regional Integration": 5,
168
- "Safety Nets and Cash Transfers": 57,
169
- "Sanitation": 69,
170
- "Sexual Orientation and Gender Identity": 44,
171
- "Skills Development": 77,
172
- "Small and Medium Enterprises Finance": 41,
173
- "Social Inclusion": 23,
174
- "Social Protection": 20,
175
- "Social Sustainability and Inclusion": 61,
176
- "Sustainable Communities": 2,
177
- "Sustainable Infrastructure Finance": 11,
178
- "Taxes and Government Revenue": 13,
179
- "Teachers": 72,
180
- "Trade": 76,
181
- "Trade Facilitation and Logistics": 19,
182
- "Transport": 10,
183
- "Universal Health Coverage": 30,
184
- "Urban Development": 55,
185
- "Water": 84,
186
- "Water Resources Management": 59,
187
- "Water Supply": 25,
188
- "Water in Agriculture": 50
189
  },
190
  "layer_norm_eps": 1e-07,
191
  "max_position_embeddings": 512,
 
8
  "hidden_dropout_prob": 0.1,
9
  "hidden_size": 768,
10
  "id2label": {
11
+ "0": "Indigenous Peoples",
12
+ "1": "Investment Climate",
13
+ "2": "Marine Plastic Pollution",
14
+ "3": "Poverty",
15
+ "4": "Water",
16
+ "5": "Measuring Poverty",
17
+ "6": "Energy",
18
+ "7": "Sustainable Communities",
19
+ "8": "Disability Inclusion",
20
+ "9": "Macroeconomics",
21
+ "10": "Early Childhood Development",
22
+ "11": "Mining Investment and Governance Review",
23
+ "12": "Infrastructure",
24
+ "13": "Community-Driven Development",
25
+ "14": "Land",
26
+ "15": "Innovation and Entrepreneurship",
27
+ "16": "Competitiveness",
28
+ "17": "Biodiversity",
29
+ "18": "Debt",
30
+ "19": "Higher Education",
31
+ "20": "Livestock and Sustainability",
32
+ "21": "Universal Health Coverage",
33
+ "22": "Nutrition",
34
+ "23": "Education and Technology",
35
+ "24": "Small and Medium Enterprises Finance",
36
+ "25": "Climate-Smart Agriculture",
37
+ "26": "Food Security Update",
38
+ "27": "Inclusive Cities",
39
+ "28": "Jobs & Development",
40
+ "29": "Migration",
41
+ "30": "Infectious diseases and Vaccines",
42
+ "31": "Social Protection",
43
+ "32": "Skills Development",
44
+ "33": "Governance",
45
+ "34": "Pollution",
46
+ "35": "Inequality and Shared Prosperity",
47
+ "36": "Oceans, Fisheries, and Coastal Economies",
48
+ "37": "Global Value Chains",
49
+ "38": "Pensions",
50
+ "39": "Food System Jobs",
51
+ "40": "Debt Relief",
52
+ "41": "Sexual Orientation and Gender Identity",
53
+ "42": "Education",
54
+ "43": "Regional Integration",
55
+ "44": "Jobs and Development",
56
+ "45": "Competition Policy",
57
+ "46": "Fragility, Conflict, and Violence",
58
+ "47": "Urban Development",
59
+ "48": "Pandemic Preparedness and COVID-19",
60
+ "49": "Safety Nets and Cash Transfers",
61
+ "50": "Financial Sector",
62
+ "51": "Climate Change",
63
+ "52": "Transport",
64
+ "53": "COVID-19 Hub",
65
+ "54": "Financial Stability",
66
+ "55": "Gender",
67
+ "56": "Natural Capital",
68
+ "57": "Water in Agriculture",
69
+ "58": "Taxes and Government Revenue",
70
+ "59": "Labor Markets",
71
+ "60": "Trade Facilitation and Logistics",
72
+ "61": "Long-Term Finance",
73
+ "62": "Teachers",
74
+ "63": "Sanitation",
75
+ "64": "Forests and Landscapes",
76
+ "65": "Financial Integrity",
77
+ "66": "Payment Systems",
78
+ "67": "Social Inclusion",
79
+ "68": "Health",
80
+ "69": "Girls' Education",
81
+ "70": "Credit Infrastructure",
82
+ "71": "Social Sustainability and Inclusion",
83
+ "72": "Trade",
84
+ "73": "Financial Inclusion",
85
+ "74": "Global Financing Facility for Women, Children and Adolescents",
86
+ "75": "Sustainable Infrastructure Finance",
87
+ "76": "Extractive Industries",
88
+ "77": "Procurement for Development",
89
+ "78": "Agriculture and Food",
90
+ "79": "Water Supply",
91
+ "80": "Disaster Risk Management",
92
+ "81": "One Health",
93
+ "82": "Environment",
94
+ "83": "Digital Development",
95
+ "84": "Agribusiness and Value Chains",
96
+ "85": "Gas Flaring Reduction",
97
+ "86": "Water Resources Management"
98
  },
99
  "initializer_range": 0.02,
100
  "intermediate_size": 3072,
101
  "label2id": {
102
+ "Agribusiness and Value Chains": 84,
103
+ "Agriculture and Food": 78,
104
+ "Biodiversity": 17,
105
+ "COVID-19 Hub": 53,
106
+ "Climate Change": 51,
107
+ "Climate-Smart Agriculture": 25,
108
+ "Community-Driven Development": 13,
109
+ "Competition Policy": 45,
110
+ "Competitiveness": 16,
111
+ "Credit Infrastructure": 70,
112
+ "Debt": 18,
113
+ "Debt Relief": 40,
114
+ "Digital Development": 83,
115
+ "Disability Inclusion": 8,
116
+ "Disaster Risk Management": 80,
117
+ "Early Childhood Development": 10,
118
+ "Education": 42,
119
+ "Education and Technology": 23,
120
+ "Energy": 6,
121
+ "Environment": 82,
122
+ "Extractive Industries": 76,
123
+ "Financial Inclusion": 73,
124
+ "Financial Integrity": 65,
125
+ "Financial Sector": 50,
126
+ "Financial Stability": 54,
127
+ "Food Security Update": 26,
128
+ "Food System Jobs": 39,
129
+ "Forests and Landscapes": 64,
130
+ "Fragility, Conflict, and Violence": 46,
131
+ "Gas Flaring Reduction": 85,
132
+ "Gender": 55,
133
+ "Girls' Education": 69,
134
+ "Global Financing Facility for Women, Children and Adolescents": 74,
135
+ "Global Value Chains": 37,
136
+ "Governance": 33,
137
+ "Health": 68,
138
+ "Higher Education": 19,
139
+ "Inclusive Cities": 27,
140
+ "Indigenous Peoples": 0,
141
+ "Inequality and Shared Prosperity": 35,
142
+ "Infectious diseases and Vaccines": 30,
143
+ "Infrastructure": 12,
144
+ "Innovation and Entrepreneurship": 15,
145
+ "Investment Climate": 1,
146
+ "Jobs & Development": 28,
147
+ "Jobs and Development": 44,
148
+ "Labor Markets": 59,
149
+ "Land": 14,
150
+ "Livestock and Sustainability": 20,
151
+ "Long-Term Finance": 61,
152
+ "Macroeconomics": 9,
153
+ "Marine Plastic Pollution": 2,
154
+ "Measuring Poverty": 5,
155
+ "Migration": 29,
156
+ "Mining Investment and Governance Review": 11,
157
+ "Natural Capital": 56,
158
+ "Nutrition": 22,
159
+ "Oceans, Fisheries, and Coastal Economies": 36,
160
+ "One Health": 81,
161
+ "Pandemic Preparedness and COVID-19": 48,
162
+ "Payment Systems": 66,
163
+ "Pensions": 38,
164
+ "Pollution": 34,
165
+ "Poverty": 3,
166
+ "Procurement for Development": 77,
167
+ "Regional Integration": 43,
168
+ "Safety Nets and Cash Transfers": 49,
169
+ "Sanitation": 63,
170
+ "Sexual Orientation and Gender Identity": 41,
171
+ "Skills Development": 32,
172
+ "Small and Medium Enterprises Finance": 24,
173
+ "Social Inclusion": 67,
174
+ "Social Protection": 31,
175
+ "Social Sustainability and Inclusion": 71,
176
+ "Sustainable Communities": 7,
177
+ "Sustainable Infrastructure Finance": 75,
178
+ "Taxes and Government Revenue": 58,
179
+ "Teachers": 62,
180
+ "Trade": 72,
181
+ "Trade Facilitation and Logistics": 60,
182
+ "Transport": 52,
183
+ "Universal Health Coverage": 21,
184
+ "Urban Development": 47,
185
+ "Water": 4,
186
+ "Water Resources Management": 86,
187
+ "Water Supply": 79,
188
+ "Water in Agriculture": 57
189
  },
190
  "layer_norm_eps": 1e-07,
191
  "max_position_embeddings": 512,
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd8e2da575487f0f514d3ac641b54188b83b65293d17518c847ecd0b5e591ca1
3
  size 567860028
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0261c797ffae126153cbbb1467b83780ff91dc1912396077fc38e284df689975
3
  size 567860028
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:86fe37d6452dbda6b773629c24131b2d3eda42a3d4aa1cf384885d4fe03ef892
3
  size 1135783354
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a1d631affb6dccd4f3fd4cfc3390be46f868b7efccefa2043b4f74f85561c51c
3
  size 1135783354
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2ef6b86d20d6ecc4df3a0c586eaefe72e249b74f60cbccfafd813b5495f54c4f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34649568145f465443beb487f2295ab3dad9e5f49f758646dd823029413e18fe
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c3c3f1ac50882defeade258e77931f3038c02d6276451a81f8ef210f495feca4
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e136870ce715682b288c29fb4d05a0aec61f1a74b2cc393d7fb9e66e4965261
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,298 +1,38 @@
1
  {
2
- "best_metric": 0.03708931431174278,
3
- "best_model_checkpoint": "doc-topic-model_eval-00_train-03/checkpoint-11000",
4
- "epoch": 5.424063116370808,
5
  "eval_steps": 1000,
6
- "global_step": 11000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.2465483234714004,
13
- "grad_norm": 0.3815906345844269,
14
  "learning_rate": 1.9950690335305722e-05,
15
- "loss": 0.1667,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.4930966469428008,
20
- "grad_norm": 0.36234650015830994,
21
  "learning_rate": 1.9901380670611442e-05,
22
- "loss": 0.0923,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 0.4930966469428008,
27
  "eval_accuracy": 0.9814660487265615,
28
  "eval_f1": 0.0,
29
- "eval_loss": 0.08645907044410706,
30
  "eval_precision": 0.0,
31
  "eval_recall": 0.0,
32
- "eval_runtime": 11.8049,
33
- "eval_samples_per_second": 687.004,
34
- "eval_steps_per_second": 2.711,
35
  "step": 1000
36
- },
37
- {
38
- "epoch": 0.7396449704142012,
39
- "grad_norm": 0.39963558316230774,
40
- "learning_rate": 1.9852071005917162e-05,
41
- "loss": 0.084,
42
- "step": 1500
43
- },
44
- {
45
- "epoch": 0.9861932938856016,
46
- "grad_norm": 0.37207868695259094,
47
- "learning_rate": 1.980276134122288e-05,
48
- "loss": 0.0734,
49
- "step": 2000
50
- },
51
- {
52
- "epoch": 0.9861932938856016,
53
- "eval_accuracy": 0.9814660487265615,
54
- "eval_f1": 0.0,
55
- "eval_loss": 0.0668005496263504,
56
- "eval_precision": 0.0,
57
- "eval_recall": 0.0,
58
- "eval_runtime": 11.8932,
59
- "eval_samples_per_second": 681.903,
60
- "eval_steps_per_second": 2.691,
61
- "step": 2000
62
- },
63
- {
64
- "epoch": 1.232741617357002,
65
- "grad_norm": 0.39202699065208435,
66
- "learning_rate": 1.9753451676528602e-05,
67
- "loss": 0.0653,
68
- "step": 2500
69
- },
70
- {
71
- "epoch": 1.4792899408284024,
72
- "grad_norm": 0.3334597647190094,
73
- "learning_rate": 1.9704142011834322e-05,
74
- "loss": 0.0606,
75
- "step": 3000
76
- },
77
- {
78
- "epoch": 1.4792899408284024,
79
- "eval_accuracy": 0.9823617784202843,
80
- "eval_f1": 0.12648276830209867,
81
- "eval_loss": 0.05524122714996338,
82
- "eval_precision": 0.7700854700854701,
83
- "eval_recall": 0.06889959470826643,
84
- "eval_runtime": 14.1698,
85
- "eval_samples_per_second": 572.346,
86
- "eval_steps_per_second": 2.258,
87
- "step": 3000
88
- },
89
- {
90
- "epoch": 1.725838264299803,
91
- "grad_norm": 0.3517291843891144,
92
- "learning_rate": 1.965483234714004e-05,
93
- "loss": 0.055,
94
- "step": 3500
95
- },
96
- {
97
- "epoch": 1.972386587771203,
98
- "grad_norm": 0.43956857919692993,
99
- "learning_rate": 1.9605522682445763e-05,
100
- "loss": 0.0532,
101
- "step": 4000
102
- },
103
- {
104
- "epoch": 1.972386587771203,
105
- "eval_accuracy": 0.9840511926527489,
106
- "eval_f1": 0.2943500344892456,
107
- "eval_loss": 0.0491451695561409,
108
- "eval_precision": 0.8177700348432055,
109
- "eval_recall": 0.1794754148505009,
110
- "eval_runtime": 14.1385,
111
- "eval_samples_per_second": 573.612,
112
- "eval_steps_per_second": 2.263,
113
- "step": 4000
114
- },
115
- {
116
- "epoch": 2.2189349112426036,
117
- "grad_norm": 0.33584991097450256,
118
- "learning_rate": 1.9556213017751483e-05,
119
- "loss": 0.0483,
120
- "step": 4500
121
- },
122
- {
123
- "epoch": 2.465483234714004,
124
- "grad_norm": 0.35681256651878357,
125
- "learning_rate": 1.95069033530572e-05,
126
- "loss": 0.0466,
127
- "step": 5000
128
- },
129
- {
130
- "epoch": 2.465483234714004,
131
- "eval_accuracy": 0.9850886517283898,
132
- "eval_f1": 0.43230993363189985,
133
- "eval_loss": 0.04674109071493149,
134
- "eval_precision": 0.7342375366568915,
135
- "eval_recall": 0.3063393744742678,
136
- "eval_runtime": 14.36,
137
- "eval_samples_per_second": 564.763,
138
- "eval_steps_per_second": 2.228,
139
- "step": 5000
140
- },
141
- {
142
- "epoch": 2.712031558185404,
143
- "grad_norm": 0.36567422747612,
144
- "learning_rate": 1.9457593688362923e-05,
145
- "loss": 0.0449,
146
- "step": 5500
147
- },
148
- {
149
- "epoch": 2.9585798816568047,
150
- "grad_norm": 0.4769591689109802,
151
- "learning_rate": 1.940828402366864e-05,
152
- "loss": 0.0433,
153
- "step": 6000
154
- },
155
- {
156
- "epoch": 2.9585798816568047,
157
- "eval_accuracy": 0.9859475317828139,
158
- "eval_f1": 0.484694142716075,
159
- "eval_loss": 0.042755745351314545,
160
- "eval_precision": 0.7564892926670993,
161
- "eval_recall": 0.3565802554102623,
162
- "eval_runtime": 14.1095,
163
- "eval_samples_per_second": 574.791,
164
- "eval_steps_per_second": 2.268,
165
- "step": 6000
166
- },
167
- {
168
- "epoch": 3.2051282051282053,
169
- "grad_norm": 0.4941834807395935,
170
- "learning_rate": 1.935897435897436e-05,
171
- "loss": 0.0405,
172
- "step": 6500
173
- },
174
- {
175
- "epoch": 3.4516765285996054,
176
- "grad_norm": 0.39021360874176025,
177
- "learning_rate": 1.930966469428008e-05,
178
- "loss": 0.0391,
179
- "step": 7000
180
- },
181
- {
182
- "epoch": 3.4516765285996054,
183
- "eval_accuracy": 0.9866122425840101,
184
- "eval_f1": 0.5389046177877574,
185
- "eval_loss": 0.04075852409005165,
186
- "eval_precision": 0.7450398164394655,
187
- "eval_recall": 0.4221151640284469,
188
- "eval_runtime": 14.2258,
189
- "eval_samples_per_second": 570.09,
190
- "eval_steps_per_second": 2.249,
191
- "step": 7000
192
- },
193
- {
194
- "epoch": 3.698224852071006,
195
- "grad_norm": 0.45788881182670593,
196
- "learning_rate": 1.92603550295858e-05,
197
- "loss": 0.0381,
198
- "step": 7500
199
- },
200
- {
201
- "epoch": 3.9447731755424065,
202
- "grad_norm": 0.3994843363761902,
203
- "learning_rate": 1.921104536489152e-05,
204
- "loss": 0.0378,
205
- "step": 8000
206
- },
207
- {
208
- "epoch": 3.9447731755424065,
209
- "eval_accuracy": 0.9867312952648214,
210
- "eval_f1": 0.5526994744386049,
211
- "eval_loss": 0.03952678292989731,
212
- "eval_precision": 0.7365338087355151,
213
- "eval_recall": 0.4423032805689378,
214
- "eval_runtime": 14.2307,
215
- "eval_samples_per_second": 569.894,
216
- "eval_steps_per_second": 2.249,
217
- "step": 8000
218
- },
219
- {
220
- "epoch": 4.191321499013807,
221
- "grad_norm": 0.40335726737976074,
222
- "learning_rate": 1.916173570019724e-05,
223
- "loss": 0.0349,
224
- "step": 8500
225
- },
226
- {
227
- "epoch": 4.437869822485207,
228
- "grad_norm": 0.43202295899391174,
229
- "learning_rate": 1.911242603550296e-05,
230
- "loss": 0.0338,
231
- "step": 9000
232
- },
233
- {
234
- "epoch": 4.437869822485207,
235
- "eval_accuracy": 0.986986408152274,
236
- "eval_f1": 0.5843744341843201,
237
- "eval_loss": 0.03873027116060257,
238
- "eval_precision": 0.7160288408208542,
239
- "eval_recall": 0.49361474344268563,
240
- "eval_runtime": 14.0731,
241
- "eval_samples_per_second": 576.277,
242
- "eval_steps_per_second": 2.274,
243
- "step": 9000
244
- },
245
- {
246
- "epoch": 4.684418145956608,
247
- "grad_norm": 0.3353199064731598,
248
- "learning_rate": 1.906311637080868e-05,
249
- "loss": 0.034,
250
- "step": 9500
251
- },
252
- {
253
- "epoch": 4.930966469428008,
254
- "grad_norm": 0.3929939270019531,
255
- "learning_rate": 1.90138067061144e-05,
256
- "loss": 0.0333,
257
- "step": 10000
258
- },
259
- {
260
- "epoch": 4.930966469428008,
261
- "eval_accuracy": 0.9870771149567017,
262
- "eval_f1": 0.5952951620062139,
263
- "eval_loss": 0.037975214421749115,
264
- "eval_precision": 0.7094044218766529,
265
- "eval_recall": 0.5128087481838343,
266
- "eval_runtime": 14.136,
267
- "eval_samples_per_second": 573.711,
268
- "eval_steps_per_second": 2.264,
269
- "step": 10000
270
- },
271
- {
272
- "epoch": 5.177514792899408,
273
- "grad_norm": 0.2832840085029602,
274
- "learning_rate": 1.896459566074951e-05,
275
- "loss": 0.0303,
276
- "step": 10500
277
- },
278
- {
279
- "epoch": 5.424063116370808,
280
- "grad_norm": 0.43404069542884827,
281
- "learning_rate": 1.891528599605523e-05,
282
- "loss": 0.0301,
283
- "step": 11000
284
- },
285
- {
286
- "epoch": 5.424063116370808,
287
- "eval_accuracy": 0.9875660813243193,
288
- "eval_f1": 0.6041958041958042,
289
- "eval_loss": 0.03708931431174278,
290
- "eval_precision": 0.7367957746478874,
291
- "eval_recall": 0.5120440467997247,
292
- "eval_runtime": 14.265,
293
- "eval_samples_per_second": 568.525,
294
- "eval_steps_per_second": 2.243,
295
- "step": 11000
296
  }
297
  ],
298
  "logging_steps": 500,
@@ -321,7 +61,7 @@
321
  "attributes": {}
322
  }
323
  },
324
- "total_flos": 248019064848396.0,
325
  "train_batch_size": 4,
326
  "trial_name": null,
327
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.08951831609010696,
3
+ "best_model_checkpoint": "doc-topic-model_eval-00_train-03/checkpoint-1000",
4
+ "epoch": 0.4930966469428008,
5
  "eval_steps": 1000,
6
+ "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.2465483234714004,
13
+ "grad_norm": 0.3881610631942749,
14
  "learning_rate": 1.9950690335305722e-05,
15
+ "loss": 0.1669,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.4930966469428008,
20
+ "grad_norm": 0.3564796447753906,
21
  "learning_rate": 1.9901380670611442e-05,
22
+ "loss": 0.0935,
23
  "step": 1000
24
  },
25
  {
26
  "epoch": 0.4930966469428008,
27
  "eval_accuracy": 0.9814660487265615,
28
  "eval_f1": 0.0,
29
+ "eval_loss": 0.08951831609010696,
30
  "eval_precision": 0.0,
31
  "eval_recall": 0.0,
32
+ "eval_runtime": 12.2339,
33
+ "eval_samples_per_second": 662.913,
34
+ "eval_steps_per_second": 2.616,
35
  "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
36
  }
37
  ],
38
  "logging_steps": 500,
 
61
  "attributes": {}
62
  }
63
  },
64
+ "total_flos": 22523300934480.0,
65
  "train_batch_size": 4,
66
  "trial_name": null,
67
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:247cccd0c3dbe5c727fd031150f0944d463e134495ac1c2a8628ab0ed563cdcb
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b0815310043095bb92c789617e2f93d478e30d81c4c444abbe71b4d0c0c4fde6
3
  size 5240