Cheng98 commited on
Commit
5cb7188
1 Parent(s): dd437f3

Upload folder using huggingface_hub

Browse files
README.md CHANGED
@@ -22,7 +22,7 @@ model-index:
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
- value: 0.6425992779783394
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
@@ -32,8 +32,8 @@ should probably proofread and complete it, then remove this comment. -->
32
 
33
  This model is a fine-tuned version of [facebook/opt-125m](https://huggingface.co/facebook/opt-125m) on the GLUE RTE dataset.
34
  It achieves the following results on the evaluation set:
35
- - Loss: 3.2055
36
- - Accuracy: 0.6426
37
 
38
  ## Model description
39
 
@@ -53,7 +53,7 @@ More information needed
53
 
54
  The following hyperparameters were used during training:
55
  - learning_rate: 2e-05
56
- - train_batch_size: 8
57
  - eval_batch_size: 8
58
  - seed: 42
59
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 
22
  metrics:
23
  - name: Accuracy
24
  type: accuracy
25
+ value: 0.6787003610108303
26
  ---
27
 
28
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
 
32
 
33
  This model is a fine-tuned version of [facebook/opt-125m](https://huggingface.co/facebook/opt-125m) on the GLUE RTE dataset.
34
  It achieves the following results on the evaluation set:
35
+ - Loss: 1.7714
36
+ - Accuracy: 0.6787
37
 
38
  ## Model description
39
 
 
53
 
54
  The following hyperparameters were used during training:
55
  - learning_rate: 2e-05
56
+ - train_batch_size: 16
57
  - eval_batch_size: 8
58
  - seed: 42
59
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_accuracy": 0.6425992779783394,
4
- "eval_loss": 3.205474376678467,
5
- "eval_runtime": 0.5373,
6
  "eval_samples": 277,
7
- "eval_samples_per_second": 515.583,
8
- "eval_steps_per_second": 65.146,
9
- "train_loss": 0.3344010169689472,
10
- "train_runtime": 98.9629,
11
  "train_samples": 2490,
12
- "train_samples_per_second": 125.805,
13
- "train_steps_per_second": 15.763
14
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_accuracy": 0.6787003610108303,
4
+ "eval_loss": 1.771389365196228,
5
+ "eval_runtime": 0.7069,
6
  "eval_samples": 277,
7
+ "eval_samples_per_second": 391.849,
8
+ "eval_steps_per_second": 49.512,
9
+ "train_loss": 0.3118787203079615,
10
+ "train_runtime": 82.0111,
11
  "train_samples": 2490,
12
+ "train_samples_per_second": 151.809,
13
+ "train_steps_per_second": 9.511
14
  }
eval_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "epoch": 5.0,
3
- "eval_accuracy": 0.6425992779783394,
4
- "eval_loss": 3.205474376678467,
5
- "eval_runtime": 0.5373,
6
  "eval_samples": 277,
7
- "eval_samples_per_second": 515.583,
8
- "eval_steps_per_second": 65.146
9
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "eval_accuracy": 0.6787003610108303,
4
+ "eval_loss": 1.771389365196228,
5
+ "eval_runtime": 0.7069,
6
  "eval_samples": 277,
7
+ "eval_samples_per_second": 391.849,
8
+ "eval_steps_per_second": 49.512
9
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8cb641be82ca0f1ccfb4f61fe42542d9659f201169ff773b587f2a936c8dfb1a
3
  size 501029729
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:781808a175754627018579d28acc8bfdd46b49fe2ebba66f729431a19f617612
3
  size 501029729
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 5.0,
3
- "train_loss": 0.3344010169689472,
4
- "train_runtime": 98.9629,
5
  "train_samples": 2490,
6
- "train_samples_per_second": 125.805,
7
- "train_steps_per_second": 15.763
8
  }
 
1
  {
2
  "epoch": 5.0,
3
+ "train_loss": 0.3118787203079615,
4
+ "train_runtime": 82.0111,
5
  "train_samples": 2490,
6
+ "train_samples_per_second": 151.809,
7
+ "train_steps_per_second": 9.511
8
  }
trainer_state.json CHANGED
@@ -2,40 +2,28 @@
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 5.0,
5
- "global_step": 1560,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
- {
11
- "epoch": 1.6,
12
- "learning_rate": 1.3589743589743592e-05,
13
- "loss": 0.6095,
14
- "step": 500
15
- },
16
  {
17
  "epoch": 3.21,
18
- "learning_rate": 7.17948717948718e-06,
19
- "loss": 0.3272,
20
- "step": 1000
21
- },
22
- {
23
- "epoch": 4.81,
24
- "learning_rate": 7.692307692307694e-07,
25
- "loss": 0.0972,
26
- "step": 1500
27
  },
28
  {
29
  "epoch": 5.0,
30
- "step": 1560,
31
  "total_flos": 813286136217600.0,
32
- "train_loss": 0.3344010169689472,
33
- "train_runtime": 98.9629,
34
- "train_samples_per_second": 125.805,
35
- "train_steps_per_second": 15.763
36
  }
37
  ],
38
- "max_steps": 1560,
39
  "num_train_epochs": 5,
40
  "total_flos": 813286136217600.0,
41
  "trial_name": null,
 
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 5.0,
5
+ "global_step": 780,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
 
 
 
 
 
 
10
  {
11
  "epoch": 3.21,
12
+ "learning_rate": 7.282051282051282e-06,
13
+ "loss": 0.4533,
14
+ "step": 500
 
 
 
 
 
 
15
  },
16
  {
17
  "epoch": 5.0,
18
+ "step": 780,
19
  "total_flos": 813286136217600.0,
20
+ "train_loss": 0.3118787203079615,
21
+ "train_runtime": 82.0111,
22
+ "train_samples_per_second": 151.809,
23
+ "train_steps_per_second": 9.511
24
  }
25
  ],
26
+ "max_steps": 780,
27
  "num_train_epochs": 5,
28
  "total_flos": 813286136217600.0,
29
  "trial_name": null,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b7acb9d14b816ffeacb7e2beaba2d6037a7bff441ba00c7df1b155086660724
3
  size 3963
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1da5bc24ce2ee57a0ae282557258fdf6c887a02ea72d1c5da88e78847d79679b
3
  size 3963