biggy-smiley commited on
Commit
dc8fbb5
·
verified ·
1 Parent(s): e26c47a

Training in progress, step 500, checkpoint

Browse files
last-checkpoint/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "biggy-smiley/bert-base-uncased-fibe-v1",
3
  "architectures": [
4
  "BertForSequenceClassification"
5
  ],
 
1
  {
2
+ "_name_or_path": "biggy-smiley/bert-base-uncased-fibe-v2",
3
  "architectures": [
4
  "BertForSequenceClassification"
5
  ],
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2004ebbab03de74d00c60e27573907a37373fc0055ecb5c433827b13812ce0ed
3
  size 438032472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30a7148af1b2c111e9a3bb314228e73eb5195ee5f22f77fbde2824a584b8cb1d
3
  size 438032472
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6458c45bb22e19e9697b93fdf578d5b1611dd3db0c74d9ad1f2cf8625ef5be48
3
  size 876185914
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eca7587deedabd245a1bbc4fbaa8399f189705649a6032380499cb578acd7e1d
3
  size 876185914
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c6b9361df27a0d1575bc9d13b96c3afa7adc7c7aa5590196bb3a80cbc6d4fe17
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0bcc01bf3ed855f7704cc44c730c23da8b2c5cbdb141fc38bacc6e369f90c912
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c049b2e3ea1cce3b5a07099de3f2a7712b4074618db3d2296051173e338de365
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59ca3596845b36ceb40554e9ac681b4f58ff5a427f785c30ed05587a822df839
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,102 +1,27 @@
1
  {
2
- "best_metric": 0.5547525882720947,
3
- "best_model_checkpoint": "/kaggle/working/results/checkpoint-3000",
4
- "epoch": 0.21528525296017223,
5
  "eval_steps": 500,
6
- "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.03588087549336204,
13
- "grad_norm": 5.634653091430664,
14
  "learning_rate": 3.856476498026552e-05,
15
- "loss": 0.5127,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.03588087549336204,
20
- "eval_loss": 0.5660845637321472,
21
- "eval_runtime": 163.3238,
22
- "eval_samples_per_second": 63.677,
23
- "eval_steps_per_second": 0.502,
24
  "step": 500
25
- },
26
- {
27
- "epoch": 0.07176175098672408,
28
- "grad_norm": 5.935914516448975,
29
- "learning_rate": 3.7129529960531036e-05,
30
- "loss": 0.5172,
31
- "step": 1000
32
- },
33
- {
34
- "epoch": 0.07176175098672408,
35
- "eval_loss": 0.5715007781982422,
36
- "eval_runtime": 162.9037,
37
- "eval_samples_per_second": 63.841,
38
- "eval_steps_per_second": 0.503,
39
- "step": 1000
40
- },
41
- {
42
- "epoch": 0.10764262648008611,
43
- "grad_norm": 10.494462966918945,
44
- "learning_rate": 3.569429494079656e-05,
45
- "loss": 0.5146,
46
- "step": 1500
47
- },
48
- {
49
- "epoch": 0.10764262648008611,
50
- "eval_loss": 0.6872914433479309,
51
- "eval_runtime": 162.5808,
52
- "eval_samples_per_second": 63.968,
53
- "eval_steps_per_second": 0.504,
54
- "step": 1500
55
- },
56
- {
57
- "epoch": 0.14352350197344815,
58
- "grad_norm": 7.07112979888916,
59
- "learning_rate": 3.425905992106208e-05,
60
- "loss": 0.5173,
61
- "step": 2000
62
- },
63
- {
64
- "epoch": 0.14352350197344815,
65
- "eval_loss": 0.5986515283584595,
66
- "eval_runtime": 162.9374,
67
- "eval_samples_per_second": 63.828,
68
- "eval_steps_per_second": 0.503,
69
- "step": 2000
70
- },
71
- {
72
- "epoch": 0.17940437746681018,
73
- "grad_norm": 5.930263042449951,
74
- "learning_rate": 3.28238249013276e-05,
75
- "loss": 0.5222,
76
- "step": 2500
77
- },
78
- {
79
- "epoch": 0.17940437746681018,
80
- "eval_loss": 0.564910352230072,
81
- "eval_runtime": 162.9111,
82
- "eval_samples_per_second": 63.839,
83
- "eval_steps_per_second": 0.503,
84
- "step": 2500
85
- },
86
- {
87
- "epoch": 0.21528525296017223,
88
- "grad_norm": 3.9764771461486816,
89
- "learning_rate": 3.1388589881593114e-05,
90
- "loss": 0.5103,
91
- "step": 3000
92
- },
93
- {
94
- "epoch": 0.21528525296017223,
95
- "eval_loss": 0.5547525882720947,
96
- "eval_runtime": 162.9245,
97
- "eval_samples_per_second": 63.833,
98
- "eval_steps_per_second": 0.503,
99
- "step": 3000
100
  }
101
  ],
102
  "logging_steps": 500,
@@ -116,7 +41,7 @@
116
  "attributes": {}
117
  }
118
  },
119
- "total_flos": 2.5264104210432e+16,
120
  "train_batch_size": 32,
121
  "trial_name": null,
122
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.7613404989242554,
3
+ "best_model_checkpoint": "/kaggle/working/results/checkpoint-500",
4
+ "epoch": 0.03588087549336204,
5
  "eval_steps": 500,
6
+ "global_step": 500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.03588087549336204,
13
+ "grad_norm": 5.043849468231201,
14
  "learning_rate": 3.856476498026552e-05,
15
+ "loss": 0.3432,
16
  "step": 500
17
  },
18
  {
19
  "epoch": 0.03588087549336204,
20
+ "eval_loss": 0.7613404989242554,
21
+ "eval_runtime": 151.9935,
22
+ "eval_samples_per_second": 68.424,
23
+ "eval_steps_per_second": 0.539,
24
  "step": 500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  }
26
  ],
27
  "logging_steps": 500,
 
41
  "attributes": {}
42
  }
43
  },
44
+ "total_flos": 4210684035072000.0,
45
  "train_batch_size": 32,
46
  "trial_name": null,
47
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:024404cab287149bf244ccc23f601abbe7bc7e89069fc8b56dff5b18ec2e7ebe
3
  size 5240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:97df412f733e7b3de5ed3af702cb7e80553e26b0f47356c4f1b0fb309bee1536
3
  size 5240