timo-10000 commited on
Commit
69e9249
·
verified ·
1 Parent(s): d921785

Upload 11 files

Browse files
Files changed (7) hide show
  1. config.json +176 -268
  2. model.safetensors +2 -2
  3. optimizer.pt +2 -2
  4. rng_state.pth +1 -1
  5. scheduler.pt +1 -1
  6. trainer_state.json +169 -160
  7. training_args.bin +1 -1
config.json CHANGED
@@ -11,284 +11,192 @@
11
  "hidden_dropout_prob": 0.1,
12
  "hidden_size": 1024,
13
  "id2label": {
14
- "0": "B-acid_fast_test",
15
- "1": "B-Acid_Formula",
16
- "2": "B-Acid_Name",
17
- "3": "B-Acid_Species",
18
- "4": "B-acyl_type",
19
- "5": "B-Antibiotic",
20
- "6": "B-Antibiotic_Resistant",
21
- "7": "B-Antibiotic_Susceptibility",
22
- "8": "B-Biochemical_Test_Enzyme",
23
- "9": "B-Carbon_Source_Metabolite",
24
- "10": "B-Cell_arrangement",
25
- "11": "B-Cell_length",
26
- "12": "B-Cell_shape",
27
- "13": "B-Cell_Wall_sugar",
28
- "14": "B-Cell_width",
29
- "15": "B-Colony_Color",
30
- "16": "B-Colony_Margin",
31
- "17": "B-Colony_shape",
32
- "18": "B-Colony_size",
33
- "19": "B-Colony_Texture",
34
- "20": "B-Culture_Medium",
35
- "21": "B-culture_time",
36
- "22": "B-Disease_Name",
37
- "23": "B-Disease_Symptom",
38
- "24": "B-Effect_Of_Growth",
39
- "25": "B-Gas_Formula",
40
- "26": "B-Gas_Name",
41
- "27": "B-GC_Content",
42
- "28": "B-Gene_Length",
43
- "29": "B-Habitat",
44
- "30": "B-Hemolysis_Type",
45
  "31": "B-Kind_Of_Utilization_tested",
46
- "32": "B-Major_fatty_acids",
47
- "33": "B-Metal_ion_Name",
48
- "34": "B-Metal_ion_Type",
49
- "35": "B-Microbe_Name",
50
  "36": "B-MK_Reaction",
51
- "37": "B-Mobility",
52
- "38": "B-Nacl_Max",
53
- "39": "B-Nacl_Min",
54
- "40": "B-Nacl_Optimal",
55
- "41": "B-Nacl_Range",
56
- "42": "B-Nitrogen_Sourcecol_Metabolite",
57
- "43": "B-Organ_Name",
58
  "44": "B-Oxygen_Requirements",
59
- "45": "B-Pathogenic",
60
- "46": "B-Peptidoglycan",
61
- "47": "B-pH_Max",
62
- "48": "B-pH_Min",
63
- "49": "B-pH_NO",
64
- "50": "B-pH_Optimal",
65
- "51": "B-pH_Range",
66
- "52": "B-Pigment_Name",
67
- "53": "B-Pigment_Type",
68
- "54": "B-Polar_Lipids",
69
- "55": "B-Predominant_polyamines",
70
- "56": "B-rDNA_16S_accession_no",
71
- "57": "B-Salinity_Range",
72
- "58": "B-Sporulation",
73
- "59": "B-Temperature_Max",
74
- "60": "B-Temperature_Min",
75
- "61": "B-Temperature_Optimal",
76
- "62": "B-Temperature_Range",
77
- "63": "B-Type_strain",
78
- "64": "B-Vitamins_Cofactors_Name",
79
- "65": "B-Vitamins_Cofactors_Type",
80
- "66": "B-Voges_Proskauer_reaction",
81
- "67": "B-Whole_Genome_accession_no",
82
- "68": "I-acid_fast_test",
83
- "69": "I-Acid_Formula",
84
- "70": "I-Acid_Name",
85
- "71": "I-Acid_Species",
86
- "72": "I-acyl_type",
87
- "73": "I-Antibiotic",
88
- "74": "I-Antibiotic_Resistant",
89
- "75": "I-Antibiotic_Susceptibility",
90
- "76": "I-Biochemical_Test_Enzyme",
91
- "77": "I-Carbon_Source_Metabolite",
92
- "78": "I-Cell_arrangement",
93
- "79": "I-Cell_length",
94
- "80": "I-Cell_shape",
95
- "81": "I-Cell_Wall_sugar",
96
- "82": "I-Cell_width",
97
- "83": "I-Colony_Color",
98
- "84": "I-Colony_Margin",
99
- "85": "I-Colony_shape",
100
- "86": "I-Colony_size",
101
- "87": "I-Colony_Texture",
102
- "88": "I-Culture_Medium",
103
- "89": "I-culture_time",
104
- "90": "I-Disease_Name",
105
- "91": "I-Disease_Symptom",
106
- "92": "I-Effect_Of_Growth",
107
- "93": "I-Gas_Formula",
108
- "94": "I-Gas_Name",
109
- "95": "I-GC_Content",
110
- "96": "I-Gene_Length",
111
- "97": "I-Habitat",
112
- "98": "I-Hemolysis_Type",
113
- "99": "I-Kind_Of_Utilization_tested",
114
- "100": "I-Major_fatty_acids",
115
- "101": "I-Metal_ion_Name",
116
- "102": "I-Metal_ion_Type",
117
- "103": "I-Microbe_Name",
118
- "104": "I-MK_Reaction",
119
- "105": "I-Mobility",
120
- "106": "I-Nacl_Max",
121
- "107": "I-Nacl_Min",
122
- "108": "I-Nacl_Optimal",
123
- "109": "I-Nacl_Range",
124
- "110": "I-Nitrogen_Sourcecol_Metabolite",
125
- "111": "I-Organ_Name",
126
- "112": "I-Oxygen_Requirements",
127
- "113": "I-Pathogenic",
128
- "114": "I-Peptidoglycan",
129
- "115": "I-pH_Max",
130
- "116": "I-pH_Min",
131
- "117": "I-pH_NO",
132
- "118": "I-pH_Optimal",
133
- "119": "I-pH_Range",
134
- "120": "I-Pigment_Name",
135
- "121": "I-Pigment_Type",
136
- "122": "I-Polar_Lipids",
137
- "123": "I-Predominant_polyamines",
138
- "124": "I-rDNA_16S_accession_no",
139
- "125": "I-Salinity_Range",
140
- "126": "I-Sporulation",
141
- "127": "I-Temperature_Max",
142
- "128": "I-Temperature_Min",
143
- "129": "I-Temperature_Optimal",
144
- "130": "I-Temperature_Range",
145
- "131": "I-Type_strain",
146
- "132": "I-Vitamins_Cofactors_Name",
147
- "133": "I-Vitamins_Cofactors_Type",
148
- "134": "I-Voges_Proskauer_reaction",
149
- "135": "I-Whole_Genome_accession_no",
150
- "136": "O"
151
  },
152
  "initializer_range": 0.02,
153
  "intermediate_size": 4096,
154
  "label2id": {
155
- "B-Acid_Formula": 1,
156
- "B-Acid_Name": 2,
157
- "B-Acid_Species": 3,
158
- "B-Antibiotic": 5,
159
- "B-Antibiotic_Resistant": 6,
160
- "B-Antibiotic_Susceptibility": 7,
161
- "B-Biochemical_Test_Enzyme": 8,
162
- "B-Carbon_Source_Metabolite": 9,
163
- "B-Cell_Wall_sugar": 13,
164
- "B-Cell_arrangement": 10,
165
- "B-Cell_length": 11,
166
- "B-Cell_shape": 12,
167
- "B-Cell_width": 14,
168
- "B-Colony_Color": 15,
169
- "B-Colony_Margin": 16,
170
- "B-Colony_Texture": 19,
171
- "B-Colony_shape": 17,
172
- "B-Colony_size": 18,
173
- "B-Culture_Medium": 20,
174
- "B-Disease_Name": 22,
175
- "B-Disease_Symptom": 23,
176
- "B-Effect_Of_Growth": 24,
177
- "B-GC_Content": 27,
178
- "B-Gas_Formula": 25,
179
- "B-Gas_Name": 26,
180
- "B-Gene_Length": 28,
181
- "B-Habitat": 29,
182
- "B-Hemolysis_Type": 30,
183
  "B-Kind_Of_Utilization_tested": 31,
184
  "B-MK_Reaction": 36,
185
- "B-Major_fatty_acids": 32,
186
- "B-Metal_ion_Name": 33,
187
- "B-Metal_ion_Type": 34,
188
- "B-Microbe_Name": 35,
189
- "B-Mobility": 37,
190
- "B-Nacl_Max": 38,
191
- "B-Nacl_Min": 39,
192
- "B-Nacl_Optimal": 40,
193
- "B-Nacl_Range": 41,
194
- "B-Nitrogen_Sourcecol_Metabolite": 42,
195
- "B-Organ_Name": 43,
196
  "B-Oxygen_Requirements": 44,
197
- "B-Pathogenic": 45,
198
- "B-Peptidoglycan": 46,
199
- "B-Pigment_Name": 52,
200
- "B-Pigment_Type": 53,
201
- "B-Polar_Lipids": 54,
202
- "B-Predominant_polyamines": 55,
203
- "B-Salinity_Range": 57,
204
- "B-Sporulation": 58,
205
- "B-Temperature_Max": 59,
206
- "B-Temperature_Min": 60,
207
- "B-Temperature_Optimal": 61,
208
- "B-Temperature_Range": 62,
209
- "B-Type_strain": 63,
210
- "B-Vitamins_Cofactors_Name": 64,
211
- "B-Vitamins_Cofactors_Type": 65,
212
- "B-Voges_Proskauer_reaction": 66,
213
- "B-Whole_Genome_accession_no": 67,
214
- "B-acid_fast_test": 0,
215
- "B-acyl_type": 4,
216
- "B-culture_time": 21,
217
- "B-pH_Max": 47,
218
- "B-pH_Min": 48,
219
- "B-pH_NO": 49,
220
- "B-pH_Optimal": 50,
221
- "B-pH_Range": 51,
222
- "B-rDNA_16S_accession_no": 56,
223
- "I-Acid_Formula": 69,
224
- "I-Acid_Name": 70,
225
- "I-Acid_Species": 71,
226
- "I-Antibiotic": 73,
227
- "I-Antibiotic_Resistant": 74,
228
- "I-Antibiotic_Susceptibility": 75,
229
- "I-Biochemical_Test_Enzyme": 76,
230
- "I-Carbon_Source_Metabolite": 77,
231
- "I-Cell_Wall_sugar": 81,
232
- "I-Cell_arrangement": 78,
233
- "I-Cell_length": 79,
234
- "I-Cell_shape": 80,
235
- "I-Cell_width": 82,
236
- "I-Colony_Color": 83,
237
- "I-Colony_Margin": 84,
238
- "I-Colony_Texture": 87,
239
- "I-Colony_shape": 85,
240
- "I-Colony_size": 86,
241
- "I-Culture_Medium": 88,
242
- "I-Disease_Name": 90,
243
- "I-Disease_Symptom": 91,
244
- "I-Effect_Of_Growth": 92,
245
- "I-GC_Content": 95,
246
- "I-Gas_Formula": 93,
247
- "I-Gas_Name": 94,
248
- "I-Gene_Length": 96,
249
- "I-Habitat": 97,
250
- "I-Hemolysis_Type": 98,
251
- "I-Kind_Of_Utilization_tested": 99,
252
- "I-MK_Reaction": 104,
253
- "I-Major_fatty_acids": 100,
254
- "I-Metal_ion_Name": 101,
255
- "I-Metal_ion_Type": 102,
256
- "I-Microbe_Name": 103,
257
- "I-Mobility": 105,
258
- "I-Nacl_Max": 106,
259
- "I-Nacl_Min": 107,
260
- "I-Nacl_Optimal": 108,
261
- "I-Nacl_Range": 109,
262
- "I-Nitrogen_Sourcecol_Metabolite": 110,
263
- "I-Organ_Name": 111,
264
- "I-Oxygen_Requirements": 112,
265
- "I-Pathogenic": 113,
266
- "I-Peptidoglycan": 114,
267
- "I-Pigment_Name": 120,
268
- "I-Pigment_Type": 121,
269
- "I-Polar_Lipids": 122,
270
- "I-Predominant_polyamines": 123,
271
- "I-Salinity_Range": 125,
272
- "I-Sporulation": 126,
273
- "I-Temperature_Max": 127,
274
- "I-Temperature_Min": 128,
275
- "I-Temperature_Optimal": 129,
276
- "I-Temperature_Range": 130,
277
- "I-Type_strain": 131,
278
- "I-Vitamins_Cofactors_Name": 132,
279
- "I-Vitamins_Cofactors_Type": 133,
280
- "I-Voges_Proskauer_reaction": 134,
281
- "I-Whole_Genome_accession_no": 135,
282
- "I-acid_fast_test": 68,
283
- "I-acyl_type": 72,
284
- "I-culture_time": 89,
285
- "I-pH_Max": 115,
286
- "I-pH_Min": 116,
287
- "I-pH_NO": 117,
288
- "I-pH_Optimal": 118,
289
- "I-pH_Range": 119,
290
- "I-rDNA_16S_accession_no": 124,
291
- "O": 136
292
  },
293
  "layer_norm_eps": 1e-12,
294
  "max_position_embeddings": 512,
 
11
  "hidden_dropout_prob": 0.1,
12
  "hidden_size": 1024,
13
  "id2label": {
14
+ "0": "B-acyl_type",
15
+ "1": "B-cell_size",
16
+ "2": "I-Antibiotic_Resistant",
17
+ "3": "I-Acid_Species",
18
+ "4": "B-Mobility",
19
+ "5": "B-Cell_arrangement",
20
+ "6": "B-Vitamins_Cofactors_Name",
21
+ "7": "B-Cell_Wall_sugar",
22
+ "8": "B-Pathogenic",
23
+ "9": "I-Polar_Lipids",
24
+ "10": "I-cell_size",
25
+ "11": "I-acid_fast_test",
26
+ "12": "I-Hemolysis_Type",
27
+ "13": "B-Salinity_Range",
28
+ "14": "I-cell_shape",
29
+ "15": "I-acyl_type",
30
+ "16": "I-Colony_Texture",
31
+ "17": "B-Disease_Name",
32
+ "18": "B-Pigment_Name",
33
+ "19": "I-Acid_Name",
34
+ "20": "I-colony_size",
35
+ "21": "I-Pathogenic",
36
+ "22": "I-Antibiotic",
37
+ "23": "B-Carbon_Source_Metabolite",
38
+ "24": "I-Microbe_Name",
39
+ "25": "O",
40
+ "26": "B-Major_fatty_acids",
41
+ "27": "I-MK_Reaction",
42
+ "28": "I-Sporulation",
43
+ "29": "B-temperature",
44
+ "30": "B-Habitat",
45
  "31": "B-Kind_Of_Utilization_tested",
46
+ "32": "B-Colony_Color",
47
+ "33": "I-Metal_ion_Name",
48
+ "34": "B-nacl",
49
+ "35": "I-temperature",
50
  "36": "B-MK_Reaction",
51
+ "37": "B-Colony_shape",
52
+ "38": "I-Vitamins_Cofactors_Name",
53
+ "39": "I-Cell_Wall_sugar",
54
+ "40": "B-Voges_Proskauer_reaction",
55
+ "41": "B-Acid_Formula",
56
+ "42": "I-Major_fatty_acids",
57
+ "43": "B-ph",
58
  "44": "B-Oxygen_Requirements",
59
+ "45": "I-Pigment_Name",
60
+ "46": "B-culture_time",
61
+ "47": "I-culture_time",
62
+ "48": "I-nacl",
63
+ "49": "I-Nitrogen_Sourcecol_Metabolite",
64
+ "50": "I-Biochemical_Test_Enzyme",
65
+ "51": "I-Antibiotic_Susceptibility",
66
+ "52": "B-Antibiotic",
67
+ "53": "I-Colony_shape",
68
+ "54": "I-Colony_Color",
69
+ "55": "I-Salinity_Range",
70
+ "56": "I-Mobility",
71
+ "57": "I-Disease_Name",
72
+ "58": "B-Hemolysis_Type",
73
+ "59": "B-Antibiotic_Susceptibility",
74
+ "60": "B-acid_fast_test",
75
+ "61": "I-Carbon_Source_Metabolite",
76
+ "62": "I-Oxygen_Requirements",
77
+ "63": "B-Gas_Name",
78
+ "64": "B-Antibiotic_Resistant",
79
+ "65": "B-Colony_Texture",
80
+ "66": "B-Peptidoglycan",
81
+ "67": "B-Sporulation",
82
+ "68": "B-colony_size",
83
+ "69": "I-Cell_arrangement",
84
+ "70": "I-Acid_Formula",
85
+ "71": "I-Gas_Name",
86
+ "72": "B-Polar_Lipids",
87
+ "73": "B-Nitrogen_Sourcecol_Metabolite",
88
+ "74": "B-Acid_Species",
89
+ "75": "B-Microbe_Name",
90
+ "76": "B-Colony_Margin",
91
+ "77": "I-Type_strain",
92
+ "78": "I-Peptidoglycan",
93
+ "79": "B-Metal_ion_Name",
94
+ "80": "I-ph",
95
+ "81": "B-cell_shape",
96
+ "82": "I-Colony_Margin",
97
+ "83": "I-Habitat",
98
+ "84": "B-rDNA_16S_accession_no",
99
+ "85": "B-Whole_Genome_accession_no",
100
+ "86": "B-Biochemical_Test_Enzyme",
101
+ "87": "B-Acid_Name",
102
+ "88": "B-Predominant_polyamines",
103
+ "89": "B-Type_strain",
104
+ "90": "I-Kind_Of_Utilization_tested"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
105
  },
106
  "initializer_range": 0.02,
107
  "intermediate_size": 4096,
108
  "label2id": {
109
+ "B-Acid_Formula": 41,
110
+ "B-Acid_Name": 87,
111
+ "B-Acid_Species": 74,
112
+ "B-Antibiotic": 52,
113
+ "B-Antibiotic_Resistant": 64,
114
+ "B-Antibiotic_Susceptibility": 59,
115
+ "B-Biochemical_Test_Enzyme": 86,
116
+ "B-Carbon_Source_Metabolite": 23,
117
+ "B-Cell_Wall_sugar": 7,
118
+ "B-Cell_arrangement": 5,
119
+ "B-Colony_Color": 32,
120
+ "B-Colony_Margin": 76,
121
+ "B-Colony_Texture": 65,
122
+ "B-Colony_shape": 37,
123
+ "B-Disease_Name": 17,
124
+ "B-Gas_Name": 63,
125
+ "B-Habitat": 30,
126
+ "B-Hemolysis_Type": 58,
 
 
 
 
 
 
 
 
 
 
127
  "B-Kind_Of_Utilization_tested": 31,
128
  "B-MK_Reaction": 36,
129
+ "B-Major_fatty_acids": 26,
130
+ "B-Metal_ion_Name": 79,
131
+ "B-Microbe_Name": 75,
132
+ "B-Mobility": 4,
133
+ "B-Nitrogen_Sourcecol_Metabolite": 73,
 
 
 
 
 
 
134
  "B-Oxygen_Requirements": 44,
135
+ "B-Pathogenic": 8,
136
+ "B-Peptidoglycan": 66,
137
+ "B-Pigment_Name": 18,
138
+ "B-Polar_Lipids": 72,
139
+ "B-Predominant_polyamines": 88,
140
+ "B-Salinity_Range": 13,
141
+ "B-Sporulation": 67,
142
+ "B-Type_strain": 89,
143
+ "B-Vitamins_Cofactors_Name": 6,
144
+ "B-Voges_Proskauer_reaction": 40,
145
+ "B-Whole_Genome_accession_no": 85,
146
+ "B-acid_fast_test": 60,
147
+ "B-acyl_type": 0,
148
+ "B-cell_shape": 81,
149
+ "B-cell_size": 1,
150
+ "B-colony_size": 68,
151
+ "B-culture_time": 46,
152
+ "B-nacl": 34,
153
+ "B-ph": 43,
154
+ "B-rDNA_16S_accession_no": 84,
155
+ "B-temperature": 29,
156
+ "I-Acid_Formula": 70,
157
+ "I-Acid_Name": 19,
158
+ "I-Acid_Species": 3,
159
+ "I-Antibiotic": 22,
160
+ "I-Antibiotic_Resistant": 2,
161
+ "I-Antibiotic_Susceptibility": 51,
162
+ "I-Biochemical_Test_Enzyme": 50,
163
+ "I-Carbon_Source_Metabolite": 61,
164
+ "I-Cell_Wall_sugar": 39,
165
+ "I-Cell_arrangement": 69,
166
+ "I-Colony_Color": 54,
167
+ "I-Colony_Margin": 82,
168
+ "I-Colony_Texture": 16,
169
+ "I-Colony_shape": 53,
170
+ "I-Disease_Name": 57,
171
+ "I-Gas_Name": 71,
172
+ "I-Habitat": 83,
173
+ "I-Hemolysis_Type": 12,
174
+ "I-Kind_Of_Utilization_tested": 90,
175
+ "I-MK_Reaction": 27,
176
+ "I-Major_fatty_acids": 42,
177
+ "I-Metal_ion_Name": 33,
178
+ "I-Microbe_Name": 24,
179
+ "I-Mobility": 56,
180
+ "I-Nitrogen_Sourcecol_Metabolite": 49,
181
+ "I-Oxygen_Requirements": 62,
182
+ "I-Pathogenic": 21,
183
+ "I-Peptidoglycan": 78,
184
+ "I-Pigment_Name": 45,
185
+ "I-Polar_Lipids": 9,
186
+ "I-Salinity_Range": 55,
187
+ "I-Sporulation": 28,
188
+ "I-Type_strain": 77,
189
+ "I-Vitamins_Cofactors_Name": 38,
190
+ "I-acid_fast_test": 11,
191
+ "I-acyl_type": 15,
192
+ "I-cell_shape": 14,
193
+ "I-cell_size": 10,
194
+ "I-colony_size": 20,
195
+ "I-culture_time": 47,
196
+ "I-nacl": 48,
197
+ "I-ph": 80,
198
+ "I-temperature": 35,
199
+ "O": 25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
200
  },
201
  "layer_norm_eps": 1e-12,
202
  "max_position_embeddings": 512,
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f5551903c51f247df1fedd799f9b0b5e7bd96432a850415b9bb3db5c23e7fca
3
- size 1330727172
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6fb2ea34aca4ffb3a0ec30e8cad082123109494a6a8e3d430b43f42604b13b1b
3
+ size 1330538572
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:11d9b5a93c4d17bef22d10d33e29e546299eed7de304d8a22ece67ef09134e49
3
- size 2661688312
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac22898d1cd6520808a2bfbd87d1d16c94cd985c29fac668382a1a4b652d978c
3
+ size 2661311096
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3b9cf2bdc18485239b323eb29aac0d1c581138879949bb50464f828e3d98fad0
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:55bd7150dc7868ae964590073e06764934bde1c8dc172b936d00083fb44c7967
3
  size 14645
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0d93cac5fd9c451264d7ef7c24144d7a9a36046d60849fa58dd998ac53423b4
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f4e39f542d9787b5e8fb0039e832f3592a8177f418cf5fbbe329abf1a68d4af
3
  size 1465
trainer_state.json CHANGED
@@ -1,253 +1,262 @@
1
  {
2
- "best_metric": 0.4775617718696594,
3
- "best_model_checkpoint": "/mnt/b74bec9d-6899-4c43-8113-eae42a66c7c1/lujing/model_output107/checkpoint-6986",
4
- "epoch": 4.0,
5
  "eval_steps": 500,
6
- "global_step": 13972,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.143143429716576,
13
- "grad_norm": 4.973369121551514,
14
- "learning_rate": 9.95228552342781e-06,
15
- "loss": 1.1304,
16
  "step": 500
17
  },
18
  {
19
- "epoch": 0.286286859433152,
20
- "grad_norm": 1.6813290119171143,
21
- "learning_rate": 9.904571046855617e-06,
22
- "loss": 0.7514,
23
  "step": 1000
24
  },
25
  {
26
- "epoch": 0.42943028914972803,
27
- "grad_norm": 9.68794059753418,
28
- "learning_rate": 9.856856570283426e-06,
29
- "loss": 0.5719,
30
  "step": 1500
31
  },
32
  {
33
- "epoch": 0.572573718866304,
34
- "grad_norm": 29.16054344177246,
35
- "learning_rate": 9.809142093711233e-06,
36
- "loss": 0.6095,
37
  "step": 2000
38
  },
39
  {
40
- "epoch": 0.71571714858288,
41
- "grad_norm": 5.089508533477783,
42
- "learning_rate": 9.76142761713904e-06,
43
- "loss": 0.5383,
44
  "step": 2500
45
  },
46
  {
47
- "epoch": 0.8588605782994561,
48
- "grad_norm": 10.23388671875,
49
- "learning_rate": 9.713713140566848e-06,
50
- "loss": 0.5345,
51
  "step": 3000
52
  },
53
  {
54
- "epoch": 1.0,
55
- "eval_accuracy": 0.86970367309235,
56
- "eval_f1": 0.6656128404669261,
57
- "eval_loss": 0.5136976838111877,
58
- "eval_precision": 0.6540023894862604,
59
- "eval_recall": 0.6776429809358753,
60
- "eval_runtime": 16.4963,
61
- "eval_samples_per_second": 211.744,
62
- "eval_steps_per_second": 52.982,
63
- "step": 3493
64
- },
65
- {
66
- "epoch": 1.002004008016032,
67
- "grad_norm": 6.84984016418457,
68
- "learning_rate": 9.665998663994657e-06,
69
- "loss": 0.4692,
70
  "step": 3500
71
  },
72
  {
73
- "epoch": 1.145147437732608,
74
- "grad_norm": 4.167309284210205,
75
- "learning_rate": 9.618284187422466e-06,
76
- "loss": 0.3873,
77
  "step": 4000
78
  },
79
  {
80
- "epoch": 1.288290867449184,
81
- "grad_norm": 11.164390563964844,
82
- "learning_rate": 9.570569710850273e-06,
83
- "loss": 0.4222,
84
  "step": 4500
85
  },
86
  {
87
- "epoch": 1.43143429716576,
88
- "grad_norm": 0.684718906879425,
89
- "learning_rate": 9.522855234278082e-06,
90
- "loss": 0.4337,
91
  "step": 5000
92
  },
93
  {
94
- "epoch": 1.5745777268823362,
95
- "grad_norm": 9.962566375732422,
96
- "learning_rate": 9.475140757705889e-06,
97
- "loss": 0.4306,
 
 
 
 
 
 
 
 
 
 
 
 
98
  "step": 5500
99
  },
100
  {
101
- "epoch": 1.7177211565989121,
102
- "grad_norm": 0.070244699716568,
103
- "learning_rate": 9.427426281133696e-06,
104
- "loss": 0.4054,
105
  "step": 6000
106
  },
107
  {
108
- "epoch": 1.860864586315488,
109
- "grad_norm": 5.674328804016113,
110
- "learning_rate": 9.379711804561505e-06,
111
- "loss": 0.3938,
112
  "step": 6500
113
  },
114
  {
115
- "epoch": 2.0,
116
- "eval_accuracy": 0.8670053722925833,
117
- "eval_f1": 0.6968295159652488,
118
- "eval_loss": 0.4775617718696594,
119
- "eval_precision": 0.6401326699834162,
120
- "eval_recall": 0.7645456796236693,
121
- "eval_runtime": 14.6745,
122
- "eval_samples_per_second": 238.032,
123
- "eval_steps_per_second": 59.559,
124
- "step": 6986
125
- },
126
- {
127
- "epoch": 2.004008016032064,
128
- "grad_norm": 2.6043221950531006,
129
- "learning_rate": 9.331997327989312e-06,
130
- "loss": 0.4227,
131
  "step": 7000
132
  },
133
  {
134
- "epoch": 2.1471514457486403,
135
- "grad_norm": 0.37574130296707153,
136
- "learning_rate": 9.284282851417121e-06,
137
- "loss": 0.3532,
138
  "step": 7500
139
  },
140
  {
141
- "epoch": 2.290294875465216,
142
- "grad_norm": 5.184791088104248,
143
- "learning_rate": 9.236568374844929e-06,
144
- "loss": 0.3008,
145
  "step": 8000
146
  },
147
  {
148
- "epoch": 2.433438305181792,
149
- "grad_norm": 10.052635192871094,
150
- "learning_rate": 9.188853898272737e-06,
151
- "loss": 0.4031,
152
  "step": 8500
153
  },
154
  {
155
- "epoch": 2.576581734898368,
156
- "grad_norm": 0.7776626348495483,
157
- "learning_rate": 9.141139421700545e-06,
158
- "loss": 0.3207,
159
  "step": 9000
160
  },
161
  {
162
- "epoch": 2.7197251646149443,
163
- "grad_norm": 1.3186050653457642,
164
- "learning_rate": 9.093424945128354e-06,
165
- "loss": 0.3637,
166
  "step": 9500
167
  },
168
  {
169
- "epoch": 2.86286859433152,
170
- "grad_norm": 0.26613909006118774,
171
- "learning_rate": 9.045710468556161e-06,
172
- "loss": 0.3492,
173
  "step": 10000
174
  },
175
  {
176
- "epoch": 3.0,
177
- "eval_accuracy": 0.8786493910591439,
178
- "eval_f1": 0.7150064531268332,
179
- "eval_loss": 0.5120731592178345,
180
- "eval_precision": 0.6795272078501338,
181
- "eval_recall": 0.7543946521416192,
182
- "eval_runtime": 14.5883,
183
- "eval_samples_per_second": 239.438,
184
- "eval_steps_per_second": 59.911,
185
- "step": 10479
186
- },
187
- {
188
- "epoch": 3.006012024048096,
189
- "grad_norm": 6.599904537200928,
190
- "learning_rate": 8.997995991983968e-06,
191
- "loss": 0.3317,
192
  "step": 10500
193
  },
194
  {
195
- "epoch": 3.1491554537646724,
196
- "grad_norm": 1.1644268035888672,
197
- "learning_rate": 8.950281515411775e-06,
198
- "loss": 0.2765,
199
  "step": 11000
200
  },
201
  {
202
- "epoch": 3.2922988834812483,
203
- "grad_norm": 0.775697648525238,
204
- "learning_rate": 8.902567038839584e-06,
205
- "loss": 0.2701,
206
  "step": 11500
207
  },
208
  {
209
- "epoch": 3.4354423131978242,
210
- "grad_norm": 5.776806354522705,
211
- "learning_rate": 8.854852562267393e-06,
212
- "loss": 0.3162,
213
  "step": 12000
214
  },
215
  {
216
- "epoch": 3.5785857429144,
217
- "grad_norm": 6.295269966125488,
218
- "learning_rate": 8.8071380856952e-06,
219
- "loss": 0.2598,
220
  "step": 12500
221
  },
222
  {
223
- "epoch": 3.721729172630976,
224
- "grad_norm": 0.7424159646034241,
225
- "learning_rate": 8.75942360912301e-06,
226
- "loss": 0.318,
227
  "step": 13000
228
  },
229
  {
230
- "epoch": 3.8648726023475524,
231
- "grad_norm": 0.3745361566543579,
232
- "learning_rate": 8.711709132550817e-06,
233
- "loss": 0.3355,
234
  "step": 13500
235
  },
236
  {
237
- "epoch": 4.0,
238
- "eval_accuracy": 0.8822714344750467,
239
- "eval_f1": 0.7246478053324019,
240
- "eval_loss": 0.5281853675842285,
241
- "eval_precision": 0.6839560439560439,
242
- "eval_recall": 0.7704877444912107,
243
- "eval_runtime": 15.0304,
244
- "eval_samples_per_second": 232.396,
245
- "eval_steps_per_second": 58.149,
246
- "step": 13972
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
  }
248
  ],
249
  "logging_steps": 500,
250
- "max_steps": 104790,
251
  "num_input_tokens_seen": 0,
252
  "num_train_epochs": 30,
253
  "save_steps": 500,
@@ -263,7 +272,7 @@
263
  "attributes": {}
264
  }
265
  },
266
- "total_flos": 4521341802434436.0,
267
  "train_batch_size": 4,
268
  "trial_name": null,
269
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.1846681833267212,
3
+ "best_model_checkpoint": "/mnt/b74bec9d-6899-4c43-8113-eae42a66c7c1/lujing/model_output1045/checkpoint-5161",
4
+ "epoch": 3.0,
5
  "eval_steps": 500,
6
+ "global_step": 15483,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.0968804495252858,
13
+ "grad_norm": 0.06996975094079971,
14
+ "learning_rate": 9.967706516824906e-06,
15
+ "loss": 0.0509,
16
  "step": 500
17
  },
18
  {
19
+ "epoch": 0.1937608990505716,
20
+ "grad_norm": 0.3793277442455292,
21
+ "learning_rate": 9.93541303364981e-06,
22
+ "loss": 0.0372,
23
  "step": 1000
24
  },
25
  {
26
+ "epoch": 0.2906413485758574,
27
+ "grad_norm": 0.030170898884534836,
28
+ "learning_rate": 9.903119550474715e-06,
29
+ "loss": 0.0596,
30
  "step": 1500
31
  },
32
  {
33
+ "epoch": 0.3875217981011432,
34
+ "grad_norm": 8.753244400024414,
35
+ "learning_rate": 9.870826067299619e-06,
36
+ "loss": 0.0423,
37
  "step": 2000
38
  },
39
  {
40
+ "epoch": 0.484402247626429,
41
+ "grad_norm": 0.19719867408275604,
42
+ "learning_rate": 9.838532584124525e-06,
43
+ "loss": 0.0547,
44
  "step": 2500
45
  },
46
  {
47
+ "epoch": 0.5812826971517148,
48
+ "grad_norm": 11.818449974060059,
49
+ "learning_rate": 9.80623910094943e-06,
50
+ "loss": 0.0494,
51
  "step": 3000
52
  },
53
  {
54
+ "epoch": 0.6781631466770006,
55
+ "grad_norm": 0.01820915751159191,
56
+ "learning_rate": 9.773945617774335e-06,
57
+ "loss": 0.0504,
 
 
 
 
 
 
 
 
 
 
 
 
58
  "step": 3500
59
  },
60
  {
61
+ "epoch": 0.7750435962022864,
62
+ "grad_norm": 0.275044709444046,
63
+ "learning_rate": 9.741652134599238e-06,
64
+ "loss": 0.0444,
65
  "step": 4000
66
  },
67
  {
68
+ "epoch": 0.8719240457275722,
69
+ "grad_norm": 5.492046356201172,
70
+ "learning_rate": 9.709358651424143e-06,
71
+ "loss": 0.0422,
72
  "step": 4500
73
  },
74
  {
75
+ "epoch": 0.968804495252858,
76
+ "grad_norm": 9.179338455200195,
77
+ "learning_rate": 9.677065168249048e-06,
78
+ "loss": 0.0464,
79
  "step": 5000
80
  },
81
  {
82
+ "epoch": 1.0,
83
+ "eval_accuracy": 0.9670047659782476,
84
+ "eval_f1": 0.8561757183691892,
85
+ "eval_loss": 0.1846681833267212,
86
+ "eval_precision": 0.8472532380527021,
87
+ "eval_recall": 0.8652881252850844,
88
+ "eval_runtime": 22.9571,
89
+ "eval_samples_per_second": 224.811,
90
+ "eval_steps_per_second": 56.235,
91
+ "step": 5161
92
+ },
93
+ {
94
+ "epoch": 1.0656849447781438,
95
+ "grad_norm": 0.6992437839508057,
96
+ "learning_rate": 9.644771685073953e-06,
97
+ "loss": 0.0398,
98
  "step": 5500
99
  },
100
  {
101
+ "epoch": 1.1625653943034295,
102
+ "grad_norm": 0.08979789912700653,
103
+ "learning_rate": 9.612478201898858e-06,
104
+ "loss": 0.0295,
105
  "step": 6000
106
  },
107
  {
108
+ "epoch": 1.2594458438287153,
109
+ "grad_norm": 0.005249473266303539,
110
+ "learning_rate": 9.580184718723763e-06,
111
+ "loss": 0.0291,
112
  "step": 6500
113
  },
114
  {
115
+ "epoch": 1.3563262933540012,
116
+ "grad_norm": 0.00042850736645050347,
117
+ "learning_rate": 9.547891235548667e-06,
118
+ "loss": 0.0306,
 
 
 
 
 
 
 
 
 
 
 
 
119
  "step": 7000
120
  },
121
  {
122
+ "epoch": 1.453206742879287,
123
+ "grad_norm": 0.43077152967453003,
124
+ "learning_rate": 9.515597752373572e-06,
125
+ "loss": 0.0499,
126
  "step": 7500
127
  },
128
  {
129
+ "epoch": 1.5500871924045727,
130
+ "grad_norm": 1.037180781364441,
131
+ "learning_rate": 9.483304269198477e-06,
132
+ "loss": 0.0319,
133
  "step": 8000
134
  },
135
  {
136
+ "epoch": 1.6469676419298587,
137
+ "grad_norm": 0.1504105031490326,
138
+ "learning_rate": 9.451010786023382e-06,
139
+ "loss": 0.0446,
140
  "step": 8500
141
  },
142
  {
143
+ "epoch": 1.7438480914551442,
144
+ "grad_norm": 0.004160536918789148,
145
+ "learning_rate": 9.418717302848285e-06,
146
+ "loss": 0.0504,
147
  "step": 9000
148
  },
149
  {
150
+ "epoch": 1.8407285409804302,
151
+ "grad_norm": 0.005177629180252552,
152
+ "learning_rate": 9.38642381967319e-06,
153
+ "loss": 0.0328,
154
  "step": 9500
155
  },
156
  {
157
+ "epoch": 1.9376089905057161,
158
+ "grad_norm": 0.1239800825715065,
159
+ "learning_rate": 9.354130336498095e-06,
160
+ "loss": 0.0427,
161
  "step": 10000
162
  },
163
  {
164
+ "epoch": 2.0,
165
+ "eval_accuracy": 0.9648661859953562,
166
+ "eval_f1": 0.8497607655502392,
167
+ "eval_loss": 0.21138954162597656,
168
+ "eval_precision": 0.8236301369863014,
169
+ "eval_recall": 0.8776037707161319,
170
+ "eval_runtime": 21.1406,
171
+ "eval_samples_per_second": 244.127,
172
+ "eval_steps_per_second": 61.067,
173
+ "step": 10322
174
+ },
175
+ {
176
+ "epoch": 2.0344894400310016,
177
+ "grad_norm": 0.001920273876748979,
178
+ "learning_rate": 9.321836853323e-06,
179
+ "loss": 0.0366,
180
  "step": 10500
181
  },
182
  {
183
+ "epoch": 2.1313698895562876,
184
+ "grad_norm": 0.04532007873058319,
185
+ "learning_rate": 9.289543370147905e-06,
186
+ "loss": 0.0247,
187
  "step": 11000
188
  },
189
  {
190
+ "epoch": 2.2282503390815736,
191
+ "grad_norm": 0.9340131282806396,
192
+ "learning_rate": 9.25724988697281e-06,
193
+ "loss": 0.022,
194
  "step": 11500
195
  },
196
  {
197
+ "epoch": 2.325130788606859,
198
+ "grad_norm": 2.6077468395233154,
199
+ "learning_rate": 9.224956403797715e-06,
200
+ "loss": 0.0193,
201
  "step": 12000
202
  },
203
  {
204
+ "epoch": 2.422011238132145,
205
+ "grad_norm": 0.002708667889237404,
206
+ "learning_rate": 9.19266292062262e-06,
207
+ "loss": 0.0317,
208
  "step": 12500
209
  },
210
  {
211
+ "epoch": 2.5188916876574305,
212
+ "grad_norm": 0.019491495564579964,
213
+ "learning_rate": 9.160369437447524e-06,
214
+ "loss": 0.0251,
215
  "step": 13000
216
  },
217
  {
218
+ "epoch": 2.6157721371827165,
219
+ "grad_norm": 0.0052880775183439255,
220
+ "learning_rate": 9.12807595427243e-06,
221
+ "loss": 0.0278,
222
  "step": 13500
223
  },
224
  {
225
+ "epoch": 2.7126525867080025,
226
+ "grad_norm": 0.04109632968902588,
227
+ "learning_rate": 9.095782471097332e-06,
228
+ "loss": 0.0208,
229
+ "step": 14000
230
+ },
231
+ {
232
+ "epoch": 2.809533036233288,
233
+ "grad_norm": 0.002209634520113468,
234
+ "learning_rate": 9.063488987922237e-06,
235
+ "loss": 0.0259,
236
+ "step": 14500
237
+ },
238
+ {
239
+ "epoch": 2.906413485758574,
240
+ "grad_norm": 0.28515392541885376,
241
+ "learning_rate": 9.031195504747142e-06,
242
+ "loss": 0.0231,
243
+ "step": 15000
244
+ },
245
+ {
246
+ "epoch": 3.0,
247
+ "eval_accuracy": 0.9655688622754491,
248
+ "eval_f1": 0.8564688536675268,
249
+ "eval_loss": 0.22559861838817596,
250
+ "eval_precision": 0.8312821980538065,
251
+ "eval_recall": 0.8832294359130303,
252
+ "eval_runtime": 22.5739,
253
+ "eval_samples_per_second": 228.626,
254
+ "eval_steps_per_second": 57.19,
255
+ "step": 15483
256
  }
257
  ],
258
  "logging_steps": 500,
259
+ "max_steps": 154830,
260
  "num_input_tokens_seen": 0,
261
  "num_train_epochs": 30,
262
  "save_steps": 500,
 
272
  "attributes": {}
273
  }
274
  },
275
+ "total_flos": 4682006759071614.0,
276
  "train_batch_size": 4,
277
  "trial_name": null,
278
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ea54c5c3ecb17a9b5085c0e74fa160bbf98c5cff0611b508ab5de736a894d28
3
  size 5841
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:382eee7be9b7efad2239b0f2906de0ba9fd12ae5a42e6ce1012cf9901952ef87
3
  size 5841