Tural commited on
Commit
d5cac16
1 Parent(s): 77bff6d

End of training

Browse files
Files changed (5) hide show
  1. README.md +24 -2
  2. all_results.json +11 -11
  3. eval_results.json +7 -7
  4. train_results.json +4 -4
  5. trainer_state.json +7 -7
README.md CHANGED
@@ -1,13 +1,30 @@
1
  ---
 
 
2
  license: apache-2.0
3
  base_model: bert-base-uncased
4
  tags:
5
  - generated_from_trainer
6
  datasets:
7
  - glue
 
 
8
  model-index:
9
  - name: out-glue-stsb
10
- results: []
 
 
 
 
 
 
 
 
 
 
 
 
 
11
  ---
12
 
13
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -15,7 +32,12 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  # out-glue-stsb
17
 
18
- This model is a fine-tuned version of [bert-base-uncased](https://huggingface.co/bert-base-uncased) on the glue dataset.
 
 
 
 
 
19
 
20
  ## Model description
21
 
 
1
  ---
2
+ language:
3
+ - en
4
  license: apache-2.0
5
  base_model: bert-base-uncased
6
  tags:
7
  - generated_from_trainer
8
  datasets:
9
  - glue
10
+ metrics:
11
+ - spearmanr
12
  model-index:
13
  - name: out-glue-stsb
14
+ results:
15
+ - task:
16
+ name: Text Classification
17
+ type: text-classification
18
+ dataset:
19
+ name: GLUE STSB
20
+ type: glue
21
+ config: stsb
22
+ split: validation
23
+ args: stsb
24
+ metrics:
25
+ - name: Spearmanr
26
+ type: spearmanr
27
+ value: 0.8807931789519186
28
  ---
29
 
30
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
32
 
33
  # out-glue-stsb
34
 
35
+ This model is a fine-tuned version of [bert-base-uncased](https://huggingface.co/bert-base-uncased) on the GLUE STSB dataset.
36
+ It achieves the following results on the evaluation set:
37
+ - Loss: 0.5330
38
+ - Pearson: 0.8855
39
+ - Spearmanr: 0.8808
40
+ - Combined Score: 0.8831
41
 
42
  ## Model description
43
 
all_results.json CHANGED
@@ -1,16 +1,16 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_combined_score": 0.8830287553865886,
4
- "eval_loss": 0.533576250076294,
5
- "eval_pearson": 0.8853386403755572,
6
- "eval_runtime": 1.1414,
7
  "eval_samples": 1500,
8
- "eval_samples_per_second": 1314.172,
9
- "eval_spearmanr": 0.8807188703976201,
10
- "eval_steps_per_second": 5.257,
11
- "train_loss": 0.4903079410394033,
12
- "train_runtime": 237.6265,
13
  "train_samples": 5749,
14
- "train_samples_per_second": 483.869,
15
- "train_steps_per_second": 2.525
16
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_combined_score": 0.8831457963278682,
4
+ "eval_loss": 0.5330392122268677,
5
+ "eval_pearson": 0.8854984137038179,
6
+ "eval_runtime": 1.1378,
7
  "eval_samples": 1500,
8
+ "eval_samples_per_second": 1318.312,
9
+ "eval_spearmanr": 0.8807931789519186,
10
+ "eval_steps_per_second": 5.273,
11
+ "train_loss": 0.017963338692982993,
12
+ "train_runtime": 43.8472,
13
  "train_samples": 5749,
14
+ "train_samples_per_second": 2622.287,
15
+ "train_steps_per_second": 13.684
16
  }
eval_results.json CHANGED
@@ -1,11 +1,11 @@
1
  {
2
  "epoch": 20.0,
3
- "eval_combined_score": 0.8830287553865886,
4
- "eval_loss": 0.533576250076294,
5
- "eval_pearson": 0.8853386403755572,
6
- "eval_runtime": 1.1414,
7
  "eval_samples": 1500,
8
- "eval_samples_per_second": 1314.172,
9
- "eval_spearmanr": 0.8807188703976201,
10
- "eval_steps_per_second": 5.257
11
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "eval_combined_score": 0.8831457963278682,
4
+ "eval_loss": 0.5330392122268677,
5
+ "eval_pearson": 0.8854984137038179,
6
+ "eval_runtime": 1.1378,
7
  "eval_samples": 1500,
8
+ "eval_samples_per_second": 1318.312,
9
+ "eval_spearmanr": 0.8807931789519186,
10
+ "eval_steps_per_second": 5.273
11
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 20.0,
3
- "train_loss": 0.4903079410394033,
4
- "train_runtime": 237.6265,
5
  "train_samples": 5749,
6
- "train_samples_per_second": 483.869,
7
- "train_steps_per_second": 2.525
8
  }
 
1
  {
2
  "epoch": 20.0,
3
+ "train_loss": 0.017963338692982993,
4
+ "train_runtime": 43.8472,
5
  "train_samples": 5749,
6
+ "train_samples_per_second": 2622.287,
7
+ "train_steps_per_second": 13.684
8
  }
trainer_state.json CHANGED
@@ -131,13 +131,13 @@
131
  {
132
  "epoch": 17.5,
133
  "learning_rate": 2.6e-06,
134
- "loss": 0.1095,
135
  "step": 525
136
  },
137
  {
138
  "epoch": 18.33,
139
  "learning_rate": 1.7666666666666668e-06,
140
- "loss": 0.1088,
141
  "step": 550
142
  },
143
  {
@@ -149,17 +149,17 @@
149
  {
150
  "epoch": 20.0,
151
  "learning_rate": 1.0000000000000001e-07,
152
- "loss": 0.1063,
153
  "step": 600
154
  },
155
  {
156
  "epoch": 20.0,
157
  "step": 600,
158
  "total_flos": 7563059380055040.0,
159
- "train_loss": 0.4903079410394033,
160
- "train_runtime": 237.6265,
161
- "train_samples_per_second": 483.869,
162
- "train_steps_per_second": 2.525
163
  }
164
  ],
165
  "logging_steps": 25,
 
131
  {
132
  "epoch": 17.5,
133
  "learning_rate": 2.6e-06,
134
+ "loss": 0.1094,
135
  "step": 525
136
  },
137
  {
138
  "epoch": 18.33,
139
  "learning_rate": 1.7666666666666668e-06,
140
+ "loss": 0.1093,
141
  "step": 550
142
  },
143
  {
 
149
  {
150
  "epoch": 20.0,
151
  "learning_rate": 1.0000000000000001e-07,
152
+ "loss": 0.1064,
153
  "step": 600
154
  },
155
  {
156
  "epoch": 20.0,
157
  "step": 600,
158
  "total_flos": 7563059380055040.0,
159
+ "train_loss": 0.017963338692982993,
160
+ "train_runtime": 43.8472,
161
+ "train_samples_per_second": 2622.287,
162
+ "train_steps_per_second": 13.684
163
  }
164
  ],
165
  "logging_steps": 25,