fpuentes commited on
Commit
fec60a7
β€’
1 Parent(s): 880a81f

Model save

Browse files
last-checkpoint/config.json DELETED
@@ -1,28 +0,0 @@
1
- {
2
- "_name_or_path": "/home/pcjf/CESGA/works/lmodels/models/tiny",
3
- "architectures": [
4
- "RobertaForMaskedLM"
5
- ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "bos_token_id": 0,
8
- "classifier_dropout": null,
9
- "eos_token_id": 2,
10
- "gradient_checkpointing": false,
11
- "hidden_act": "gelu",
12
- "hidden_dropout_prob": 0.1,
13
- "hidden_size": 768,
14
- "initializer_range": 0.02,
15
- "intermediate_size": 3072,
16
- "layer_norm_eps": 1e-12,
17
- "max_position_embeddings": 514,
18
- "model_type": "roberta",
19
- "num_attention_heads": 12,
20
- "num_hidden_layers": 6,
21
- "pad_token_id": 1,
22
- "position_embedding_type": "absolute",
23
- "torch_dtype": "float32",
24
- "transformers_version": "4.25.1",
25
- "type_vocab_size": 1,
26
- "use_cache": true,
27
- "vocab_size": 31002
28
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:b02d30dbb1390014003809a5ade58a1972fa2143041cc4f6d2e32c656290b47e
3
- size 538943941
 
 
 
 
last-checkpoint/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:0c7bd2b3fc4c75870baf39c97dd0a294d6537e6ed402cade193d9ca453a2b3ce
3
- size 269468281
 
 
 
 
last-checkpoint/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:7ccc59651e44275f9521ab69d703635efaa8d9ba49d678eac525ffc2eaf55996
3
- size 14575
 
 
 
 
last-checkpoint/scaler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:367928c873a79d90abcb66db0f8b320eea7346dc9bc779b4e7963dfc82cf2ada
3
- size 557
 
 
 
 
last-checkpoint/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:e576c1af05d044d7dfe73b333bc091a6eb71e1a62324659c9b82c58e9887a777
3
- size 627
 
 
 
 
last-checkpoint/trainer_state.json DELETED
@@ -1,170 +0,0 @@
1
- {
2
- "best_metric": 2.2180914878845215,
3
- "best_model_checkpoint": "/home/pcjf/CESGA/works/lmodels/models/tiny/checkpoint-5500",
4
- "epoch": 4.968273681833578,
5
- "global_step": 5500,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.45,
12
- "learning_rate": 9.096657633242999e-05,
13
- "loss": 2.6679,
14
- "step": 500
15
- },
16
- {
17
- "epoch": 0.45,
18
- "eval_loss": 2.550431489944458,
19
- "eval_runtime": 33.087,
20
- "eval_samples_per_second": 347.327,
21
- "eval_steps_per_second": 43.431,
22
- "step": 500
23
- },
24
- {
25
- "epoch": 0.9,
26
- "learning_rate": 8.193315266485999e-05,
27
- "loss": 2.6019,
28
- "step": 1000
29
- },
30
- {
31
- "epoch": 0.9,
32
- "eval_loss": 2.475651741027832,
33
- "eval_runtime": 32.0278,
34
- "eval_samples_per_second": 358.813,
35
- "eval_steps_per_second": 44.867,
36
- "step": 1000
37
- },
38
- {
39
- "epoch": 1.35,
40
- "learning_rate": 7.289972899728998e-05,
41
- "loss": 2.5247,
42
- "step": 1500
43
- },
44
- {
45
- "epoch": 1.35,
46
- "eval_loss": 2.4407145977020264,
47
- "eval_runtime": 32.3311,
48
- "eval_samples_per_second": 355.447,
49
- "eval_steps_per_second": 44.446,
50
- "step": 1500
51
- },
52
- {
53
- "epoch": 1.81,
54
- "learning_rate": 6.386630532971996e-05,
55
- "loss": 2.4669,
56
- "step": 2000
57
- },
58
- {
59
- "epoch": 1.81,
60
- "eval_loss": 2.389404773712158,
61
- "eval_runtime": 32.7481,
62
- "eval_samples_per_second": 350.921,
63
- "eval_steps_per_second": 43.88,
64
- "step": 2000
65
- },
66
- {
67
- "epoch": 2.26,
68
- "learning_rate": 5.483288166214996e-05,
69
- "loss": 2.4136,
70
- "step": 2500
71
- },
72
- {
73
- "epoch": 2.26,
74
- "eval_loss": 2.3637166023254395,
75
- "eval_runtime": 32.183,
76
- "eval_samples_per_second": 357.083,
77
- "eval_steps_per_second": 44.651,
78
- "step": 2500
79
- },
80
- {
81
- "epoch": 2.71,
82
- "learning_rate": 4.579945799457995e-05,
83
- "loss": 2.3683,
84
- "step": 3000
85
- },
86
- {
87
- "epoch": 2.71,
88
- "eval_loss": 2.3127212524414062,
89
- "eval_runtime": 33.1444,
90
- "eval_samples_per_second": 346.725,
91
- "eval_steps_per_second": 43.356,
92
- "step": 3000
93
- },
94
- {
95
- "epoch": 3.16,
96
- "learning_rate": 3.6766034327009944e-05,
97
- "loss": 2.3371,
98
- "step": 3500
99
- },
100
- {
101
- "epoch": 3.16,
102
- "eval_loss": 2.294386625289917,
103
- "eval_runtime": 32.528,
104
- "eval_samples_per_second": 353.296,
105
- "eval_steps_per_second": 44.177,
106
- "step": 3500
107
- },
108
- {
109
- "epoch": 3.61,
110
- "learning_rate": 2.7732610659439927e-05,
111
- "loss": 2.3041,
112
- "step": 4000
113
- },
114
- {
115
- "epoch": 3.61,
116
- "eval_loss": 2.26143741607666,
117
- "eval_runtime": 32.4261,
118
- "eval_samples_per_second": 354.406,
119
- "eval_steps_per_second": 44.316,
120
- "step": 4000
121
- },
122
- {
123
- "epoch": 4.07,
124
- "learning_rate": 1.869918699186992e-05,
125
- "loss": 2.2757,
126
- "step": 4500
127
- },
128
- {
129
- "epoch": 4.07,
130
- "eval_loss": 2.2403604984283447,
131
- "eval_runtime": 32.4416,
132
- "eval_samples_per_second": 354.236,
133
- "eval_steps_per_second": 44.295,
134
- "step": 4500
135
- },
136
- {
137
- "epoch": 4.52,
138
- "learning_rate": 9.66576332429991e-06,
139
- "loss": 2.2508,
140
- "step": 5000
141
- },
142
- {
143
- "epoch": 4.52,
144
- "eval_loss": 2.2336952686309814,
145
- "eval_runtime": 32.5285,
146
- "eval_samples_per_second": 353.29,
147
- "eval_steps_per_second": 44.177,
148
- "step": 5000
149
- },
150
- {
151
- "epoch": 4.97,
152
- "learning_rate": 6.323396567299007e-07,
153
- "loss": 2.2363,
154
- "step": 5500
155
- },
156
- {
157
- "epoch": 4.97,
158
- "eval_loss": 2.2180914878845215,
159
- "eval_runtime": 32.458,
160
- "eval_samples_per_second": 354.058,
161
- "eval_steps_per_second": 44.273,
162
- "step": 5500
163
- }
164
- ],
165
- "max_steps": 5535,
166
- "num_train_epochs": 5,
167
- "total_flos": 8.726670833749062e+16,
168
- "trial_name": null,
169
- "trial_params": null
170
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
last-checkpoint/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:687f2fe53f71dbd495a1980f0ba26bc3f950baedfff342880439c0c7042fd562
3
- size 3515
 
 
 
 
log.txt CHANGED
@@ -2916,3 +2916,17 @@ Deleting older checkpoint [/home/pcjf/CESGA/works/lmodels/models/tiny/checkpoint
2916
 
2917
  Saving model checkpoint to /home/pcjf/CESGA/works/lmodels/models/tiny/checkpoint-5500
2918
  Configuration saved in /home/pcjf/CESGA/works/lmodels/models/tiny/checkpoint-5500/config.json
2919
  Model weights saved in /home/pcjf/CESGA/works/lmodels/models/tiny/checkpoint-5500/pytorch_model.bin
 
 
2920
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 5501/5535 [3:23:57<24:39, 43.53s/it]
2921
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 5502/5535 [3:23:58<17:02, 30.98s/it]
2922
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 5503/5535 [3:24:01<11:54, 22.34s/it]
2923
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 5504/5535 [3:24:02<08:19, 16.10s/it]
2924
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 5505/5535 [3:24:04<05:54, 11.82s/it]
2925
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 5506/5535 [3:24:06<04:14, 8.79s/it]
2926
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 5507/5535 [3:24:08<03:07, 6.70s/it]
 
 
 
 
 
 
2927
 
 
 
 
 
 
 
 
2916
 
2917
  Saving model checkpoint to /home/pcjf/CESGA/works/lmodels/models/tiny/checkpoint-5500
2918
  Configuration saved in /home/pcjf/CESGA/works/lmodels/models/tiny/checkpoint-5500/config.json
2919
  Model weights saved in /home/pcjf/CESGA/works/lmodels/models/tiny/checkpoint-5500/pytorch_model.bin
2920
+ Deleting older checkpoint [/home/pcjf/CESGA/works/lmodels/models/tiny/checkpoint-3000] due to args.save_total_limit
2921
+
2922
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 5501/5535 [3:23:57<24:39, 43.53s/it]
2923
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 5502/5535 [3:23:58<17:02, 30.98s/it]
2924
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 5503/5535 [3:24:01<11:54, 22.34s/it]
2925
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 5504/5535 [3:24:02<08:19, 16.10s/it]
2926
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 5505/5535 [3:24:04<05:54, 11.82s/it]
2927
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 5506/5535 [3:24:06<04:14, 8.79s/it]
2928
  99%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–‰| 5507/5535 [3:24:08<03:07, 6.70s/it]
2929
+
2930
+ Training completed. Do not forget to share your model on huggingface.co/models =)
2931
+
2932
+
2933
+ Loading best model from /home/pcjf/CESGA/works/lmodels/models/tiny/checkpoint-5500 (score: 2.2180914878845215).
2934
+
2935
 
2936
+ Saving model checkpoint to /home/pcjf/CESGA/works/lmodels/models/tiny
2937
+ Configuration saved in /home/pcjf/CESGA/works/lmodels/models/tiny/config.json
2938
+ Model weights saved in /home/pcjf/CESGA/works/lmodels/models/tiny/pytorch_model.bin
2939
+ Saving model checkpoint to /home/pcjf/CESGA/works/lmodels/models/tiny
2940
+ Configuration saved in /home/pcjf/CESGA/works/lmodels/models/tiny/config.json
2941
+ Model weights saved in /home/pcjf/CESGA/works/lmodels/models/tiny/pytorch_model.bin
runs/Jan16_09-20-43_turing/events.out.tfevents.1673857249.turing.343391.1 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fd5a5c60e2b5269ffe970bc6f4215dfdd6880246819c0c78403571c5d64d5e9b
3
- size 8620
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afe6a154248e750b5630727b98592e9310c3101e1f3f3a647c5769bc3ff0b095
3
+ size 8974