oilyshelf commited on
Commit
4b94c70
verified
1 Parent(s): 4723468

Model save

Browse files
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ eval_nbest_predictions.json filter=lfs diff=lfs merge=lfs -text
README.md CHANGED
@@ -3,14 +3,14 @@ base_model: huawei-noah/TinyBERT_General_4L_312D
3
  tags:
4
  - generated_from_trainer
5
  model-index:
6
- - name: result
7
  results: []
8
  ---
9
 
10
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
11
  should probably proofread and complete it, then remove this comment. -->
12
 
13
- # result
14
 
15
  This model is a fine-tuned version of [huawei-noah/TinyBERT_General_4L_312D](https://huggingface.co/huawei-noah/TinyBERT_General_4L_312D) on an unknown dataset.
16
 
@@ -46,6 +46,6 @@ The following hyperparameters were used during training:
46
  ### Framework versions
47
 
48
  - Transformers 4.38.0.dev0
49
- - Pytorch 2.1.0+cu121
50
  - Datasets 2.16.1
51
  - Tokenizers 0.15.0
 
3
  tags:
4
  - generated_from_trainer
5
  model-index:
6
+ - name: TUDNLP4W
7
  results: []
8
  ---
9
 
10
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
11
  should probably proofread and complete it, then remove this comment. -->
12
 
13
+ # TUDNLP4W
14
 
15
  This model is a fine-tuned version of [huawei-noah/TinyBERT_General_4L_312D](https://huggingface.co/huawei-noah/TinyBERT_General_4L_312D) on an unknown dataset.
16
 
 
46
  ### Framework versions
47
 
48
  - Transformers 4.38.0.dev0
49
+ - Pytorch 2.2.0
50
  - Datasets 2.16.1
51
  - Tokenizers 0.15.0
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "epoch": 2.0,
3
- "eval_exact_match": 55.54993252361673,
4
- "eval_f1": 67.43200197066156,
5
- "eval_runtime": 20.5032,
6
  "eval_samples": 6056,
7
- "eval_samples_per_second": 295.368,
8
- "eval_steps_per_second": 36.921,
9
- "train_loss": 2.134629626242807,
10
- "train_runtime": 1518.4117,
11
  "train_samples": 87739,
12
- "train_samples_per_second": 115.567,
13
- "train_steps_per_second": 9.631
14
  }
 
1
  {
2
  "epoch": 2.0,
3
+ "eval_exact_match": 58.04655870445344,
4
+ "eval_f1": 70.00816554397369,
5
+ "eval_runtime": 37.0671,
6
  "eval_samples": 6056,
7
+ "eval_samples_per_second": 163.379,
8
+ "eval_steps_per_second": 20.422,
9
+ "train_loss": 0.013409816984013865,
10
+ "train_runtime": 36.1557,
11
  "train_samples": 87739,
12
+ "train_samples_per_second": 4853.397,
13
+ "train_steps_per_second": 404.473
14
  }
eval_nbest_predictions.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cda9b172f98c9e9376f0df6c872fa698d46bbfd4d45b552f9d3a980ab1aa0a50
3
+ size 28157111
eval_predictions.json CHANGED
The diff for this file is too large to render. See raw diff
 
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 2.0,
3
- "eval_exact_match": 55.54993252361673,
4
- "eval_f1": 67.43200197066156,
5
- "eval_runtime": 20.5032,
6
  "eval_samples": 6056,
7
- "eval_samples_per_second": 295.368,
8
- "eval_steps_per_second": 36.921
9
  }
 
1
  {
2
  "epoch": 2.0,
3
+ "eval_exact_match": 58.04655870445344,
4
+ "eval_f1": 70.00816554397369,
5
+ "eval_runtime": 37.0671,
6
  "eval_samples": 6056,
7
+ "eval_samples_per_second": 163.379,
8
+ "eval_steps_per_second": 20.422
9
  }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cb5e71a14c70818def6025d7b984487a38053815f7bf0da800eaaf80ae15690
3
+ size 57020992
tokenizer.json CHANGED
@@ -1,21 +1,7 @@
1
  {
2
  "version": "1.0",
3
- "truncation": {
4
- "direction": "Right",
5
- "max_length": 384,
6
- "strategy": "OnlySecond",
7
- "stride": 128
8
- },
9
- "padding": {
10
- "strategy": {
11
- "Fixed": 384
12
- },
13
- "direction": "Right",
14
- "pad_to_multiple_of": null,
15
- "pad_id": 0,
16
- "pad_type_id": 0,
17
- "pad_token": "[PAD]"
18
- },
19
  "added_tokens": [
20
  {
21
  "id": 0,
 
1
  {
2
  "version": "1.0",
3
+ "truncation": null,
4
+ "padding": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  "added_tokens": [
6
  {
7
  "id": 0,
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 2.0,
3
- "train_loss": 2.134629626242807,
4
- "train_runtime": 1518.4117,
5
  "train_samples": 87739,
6
- "train_samples_per_second": 115.567,
7
- "train_steps_per_second": 9.631
8
  }
 
1
  {
2
  "epoch": 2.0,
3
+ "train_loss": 0.013409816984013865,
4
+ "train_runtime": 36.1557,
5
  "train_samples": 87739,
6
+ "train_samples_per_second": 4853.397,
7
+ "train_steps_per_second": 404.473
8
  }
trainer_state.json CHANGED
@@ -11,185 +11,185 @@
11
  {
12
  "epoch": 0.07,
13
  "learning_rate": 2.897428884026258e-05,
14
- "loss": 3.8747,
15
  "step": 500
16
  },
17
  {
18
  "epoch": 0.14,
19
  "learning_rate": 2.7948577680525165e-05,
20
- "loss": 3.0141,
21
  "step": 1000
22
  },
23
  {
24
  "epoch": 0.21,
25
  "learning_rate": 2.6922866520787748e-05,
26
- "loss": 2.7867,
27
  "step": 1500
28
  },
29
  {
30
  "epoch": 0.27,
31
  "learning_rate": 2.589715536105033e-05,
32
- "loss": 2.6368,
33
  "step": 2000
34
  },
35
  {
36
  "epoch": 0.34,
37
  "learning_rate": 2.4871444201312912e-05,
38
- "loss": 2.556,
39
  "step": 2500
40
  },
41
  {
42
  "epoch": 0.41,
43
  "learning_rate": 2.3845733041575492e-05,
44
- "loss": 2.4478,
45
  "step": 3000
46
  },
47
  {
48
  "epoch": 0.48,
49
  "learning_rate": 2.2820021881838072e-05,
50
- "loss": 2.4204,
51
  "step": 3500
52
  },
53
  {
54
  "epoch": 0.55,
55
  "learning_rate": 2.179431072210066e-05,
56
- "loss": 2.381,
57
  "step": 4000
58
  },
59
  {
60
  "epoch": 0.62,
61
  "learning_rate": 2.076859956236324e-05,
62
- "loss": 2.2586,
63
  "step": 4500
64
  },
65
  {
66
  "epoch": 0.68,
67
  "learning_rate": 1.9742888402625823e-05,
68
- "loss": 2.1788,
69
  "step": 5000
70
  },
71
  {
72
  "epoch": 0.75,
73
  "learning_rate": 1.8717177242888403e-05,
74
- "loss": 2.1679,
75
  "step": 5500
76
  },
77
  {
78
  "epoch": 0.82,
79
  "learning_rate": 1.7691466083150983e-05,
80
- "loss": 2.0685,
81
  "step": 6000
82
  },
83
  {
84
  "epoch": 0.89,
85
  "learning_rate": 1.6665754923413567e-05,
86
- "loss": 2.0419,
87
  "step": 6500
88
  },
89
  {
90
  "epoch": 0.96,
91
  "learning_rate": 1.564004376367615e-05,
92
- "loss": 2.0122,
93
  "step": 7000
94
  },
95
  {
96
  "epoch": 1.03,
97
  "learning_rate": 1.4614332603938733e-05,
98
- "loss": 1.9763,
99
  "step": 7500
100
  },
101
  {
102
  "epoch": 1.09,
103
  "learning_rate": 1.3588621444201313e-05,
104
- "loss": 1.8748,
105
  "step": 8000
106
  },
107
  {
108
  "epoch": 1.16,
109
  "learning_rate": 1.2562910284463895e-05,
110
- "loss": 1.8163,
111
  "step": 8500
112
  },
113
  {
114
  "epoch": 1.23,
115
  "learning_rate": 1.1537199124726478e-05,
116
- "loss": 1.8351,
117
  "step": 9000
118
  },
119
  {
120
  "epoch": 1.3,
121
  "learning_rate": 1.0511487964989058e-05,
122
- "loss": 1.8195,
123
  "step": 9500
124
  },
125
  {
126
  "epoch": 1.37,
127
  "learning_rate": 9.485776805251642e-06,
128
- "loss": 1.8151,
129
  "step": 10000
130
  },
131
  {
132
  "epoch": 1.44,
133
  "learning_rate": 8.460065645514224e-06,
134
- "loss": 1.8129,
135
  "step": 10500
136
  },
137
  {
138
  "epoch": 1.5,
139
  "learning_rate": 7.434354485776806e-06,
140
- "loss": 1.7618,
141
  "step": 11000
142
  },
143
  {
144
  "epoch": 1.57,
145
  "learning_rate": 6.408643326039388e-06,
146
- "loss": 1.7896,
147
  "step": 11500
148
  },
149
  {
150
  "epoch": 1.64,
151
  "learning_rate": 5.3829321663019695e-06,
152
- "loss": 1.7948,
153
  "step": 12000
154
  },
155
  {
156
  "epoch": 1.71,
157
  "learning_rate": 4.357221006564551e-06,
158
- "loss": 1.789,
159
  "step": 12500
160
  },
161
  {
162
  "epoch": 1.78,
163
  "learning_rate": 3.3315098468271337e-06,
164
- "loss": 1.7811,
165
  "step": 13000
166
  },
167
  {
168
  "epoch": 1.85,
169
  "learning_rate": 2.3057986870897156e-06,
170
- "loss": 1.757,
171
  "step": 13500
172
  },
173
  {
174
  "epoch": 1.91,
175
  "learning_rate": 1.2800875273522977e-06,
176
- "loss": 1.778,
177
  "step": 14000
178
  },
179
  {
180
  "epoch": 1.98,
181
  "learning_rate": 2.543763676148797e-07,
182
- "loss": 1.7596,
183
  "step": 14500
184
  },
185
  {
186
  "epoch": 2.0,
187
  "step": 14624,
188
  "total_flos": 1847651335621632.0,
189
- "train_loss": 2.134629626242807,
190
- "train_runtime": 1518.4117,
191
- "train_samples_per_second": 115.567,
192
- "train_steps_per_second": 9.631
193
  }
194
  ],
195
  "logging_steps": 500,
 
11
  {
12
  "epoch": 0.07,
13
  "learning_rate": 2.897428884026258e-05,
14
+ "loss": 3.9021,
15
  "step": 500
16
  },
17
  {
18
  "epoch": 0.14,
19
  "learning_rate": 2.7948577680525165e-05,
20
+ "loss": 3.0233,
21
  "step": 1000
22
  },
23
  {
24
  "epoch": 0.21,
25
  "learning_rate": 2.6922866520787748e-05,
26
+ "loss": 2.7852,
27
  "step": 1500
28
  },
29
  {
30
  "epoch": 0.27,
31
  "learning_rate": 2.589715536105033e-05,
32
+ "loss": 2.6301,
33
  "step": 2000
34
  },
35
  {
36
  "epoch": 0.34,
37
  "learning_rate": 2.4871444201312912e-05,
38
+ "loss": 2.4956,
39
  "step": 2500
40
  },
41
  {
42
  "epoch": 0.41,
43
  "learning_rate": 2.3845733041575492e-05,
44
+ "loss": 2.2733,
45
  "step": 3000
46
  },
47
  {
48
  "epoch": 0.48,
49
  "learning_rate": 2.2820021881838072e-05,
50
+ "loss": 2.2072,
51
  "step": 3500
52
  },
53
  {
54
  "epoch": 0.55,
55
  "learning_rate": 2.179431072210066e-05,
56
+ "loss": 2.1646,
57
  "step": 4000
58
  },
59
  {
60
  "epoch": 0.62,
61
  "learning_rate": 2.076859956236324e-05,
62
+ "loss": 2.0685,
63
  "step": 4500
64
  },
65
  {
66
  "epoch": 0.68,
67
  "learning_rate": 1.9742888402625823e-05,
68
+ "loss": 2.006,
69
  "step": 5000
70
  },
71
  {
72
  "epoch": 0.75,
73
  "learning_rate": 1.8717177242888403e-05,
74
+ "loss": 2.0149,
75
  "step": 5500
76
  },
77
  {
78
  "epoch": 0.82,
79
  "learning_rate": 1.7691466083150983e-05,
80
+ "loss": 1.9361,
81
  "step": 6000
82
  },
83
  {
84
  "epoch": 0.89,
85
  "learning_rate": 1.6665754923413567e-05,
86
+ "loss": 1.9372,
87
  "step": 6500
88
  },
89
  {
90
  "epoch": 0.96,
91
  "learning_rate": 1.564004376367615e-05,
92
+ "loss": 1.9043,
93
  "step": 7000
94
  },
95
  {
96
  "epoch": 1.03,
97
  "learning_rate": 1.4614332603938733e-05,
98
+ "loss": 1.8555,
99
  "step": 7500
100
  },
101
  {
102
  "epoch": 1.09,
103
  "learning_rate": 1.3588621444201313e-05,
104
+ "loss": 1.7623,
105
  "step": 8000
106
  },
107
  {
108
  "epoch": 1.16,
109
  "learning_rate": 1.2562910284463895e-05,
110
+ "loss": 1.7307,
111
  "step": 8500
112
  },
113
  {
114
  "epoch": 1.23,
115
  "learning_rate": 1.1537199124726478e-05,
116
+ "loss": 1.7475,
117
  "step": 9000
118
  },
119
  {
120
  "epoch": 1.3,
121
  "learning_rate": 1.0511487964989058e-05,
122
+ "loss": 1.7261,
123
  "step": 9500
124
  },
125
  {
126
  "epoch": 1.37,
127
  "learning_rate": 9.485776805251642e-06,
128
+ "loss": 1.7196,
129
  "step": 10000
130
  },
131
  {
132
  "epoch": 1.44,
133
  "learning_rate": 8.460065645514224e-06,
134
+ "loss": 1.7055,
135
  "step": 10500
136
  },
137
  {
138
  "epoch": 1.5,
139
  "learning_rate": 7.434354485776806e-06,
140
+ "loss": 1.6923,
141
  "step": 11000
142
  },
143
  {
144
  "epoch": 1.57,
145
  "learning_rate": 6.408643326039388e-06,
146
+ "loss": 1.677,
147
  "step": 11500
148
  },
149
  {
150
  "epoch": 1.64,
151
  "learning_rate": 5.3829321663019695e-06,
152
+ "loss": 1.7111,
153
  "step": 12000
154
  },
155
  {
156
  "epoch": 1.71,
157
  "learning_rate": 4.357221006564551e-06,
158
+ "loss": 1.701,
159
  "step": 12500
160
  },
161
  {
162
  "epoch": 1.78,
163
  "learning_rate": 3.3315098468271337e-06,
164
+ "loss": 1.6748,
165
  "step": 13000
166
  },
167
  {
168
  "epoch": 1.85,
169
  "learning_rate": 2.3057986870897156e-06,
170
+ "loss": 1.6773,
171
  "step": 13500
172
  },
173
  {
174
  "epoch": 1.91,
175
  "learning_rate": 1.2800875273522977e-06,
176
+ "loss": 1.6973,
177
  "step": 14000
178
  },
179
  {
180
  "epoch": 1.98,
181
  "learning_rate": 2.543763676148797e-07,
182
+ "loss": 1.6668,
183
  "step": 14500
184
  },
185
  {
186
  "epoch": 2.0,
187
  "step": 14624,
188
  "total_flos": 1847651335621632.0,
189
+ "train_loss": 0.013409816984013865,
190
+ "train_runtime": 36.1557,
191
+ "train_samples_per_second": 4853.397,
192
+ "train_steps_per_second": 404.473
193
  }
194
  ],
195
  "logging_steps": 500,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:007f46ea94b8d67dc8cb3f6da65ee29923b99f42839fe274dd8111973c46e82a
3
- size 4728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5fd73e0cd7777d4653116a4e00ee666d42b88fdcd22ca0f1763d2bae0feb85b9
3
+ size 4792