MikhailPugachev commited on
Commit
bb1ee88
·
1 Parent(s): 106e870

Force add model files

Browse files
checkpoint-23985/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:da681d00471785472c51b663fa5dfc09d86055e11281b2f9ca1d89f44cc450e2
3
- size 207734077
 
 
 
 
checkpoint-23985/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:44b8d226f925403ae3da29607377591d51678f6b810b38292a8988d53c35c49d
3
- size 14244
 
 
 
 
checkpoint-23985/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ddb29c7c2b214d417422c344dc6f897586016be65d4e0e163c3818c7e263f168
3
- size 1064
 
 
 
 
checkpoint-23985/trainer_state.json DELETED
@@ -1,180 +0,0 @@
1
- {
2
- "best_metric": 0.718079007713154,
3
- "best_model_checkpoint": "./checkpoints/checkpoint-23985",
4
- "epoch": 5.0,
5
- "eval_steps": 500,
6
- "global_step": 23985,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "epoch": 0.26,
13
- "learning_rate": 2.5e-05,
14
- "loss": 2.119,
15
- "step": 1250
16
- },
17
- {
18
- "epoch": 0.52,
19
- "learning_rate": 1.75e-05,
20
- "loss": 1.2117,
21
- "step": 2500
22
- },
23
- {
24
- "epoch": 0.78,
25
- "learning_rate": 1.75e-05,
26
- "loss": 1.0565,
27
- "step": 3750
28
- },
29
- {
30
- "epoch": 1.0,
31
- "eval_accuracy": 0.6968938920158433,
32
- "eval_loss": 0.930023193359375,
33
- "eval_runtime": 327.1586,
34
- "eval_samples_per_second": 117.301,
35
- "eval_steps_per_second": 3.668,
36
- "step": 4797
37
- },
38
- {
39
- "epoch": 1.04,
40
- "learning_rate": 1.2249999999999998e-05,
41
- "loss": 0.9975,
42
- "step": 5000
43
- },
44
- {
45
- "epoch": 1.3,
46
- "learning_rate": 1.2249999999999998e-05,
47
- "loss": 0.9029,
48
- "step": 6250
49
- },
50
- {
51
- "epoch": 1.56,
52
- "learning_rate": 8.574999999999998e-06,
53
- "loss": 0.8903,
54
- "step": 7500
55
- },
56
- {
57
- "epoch": 1.82,
58
- "learning_rate": 8.574999999999998e-06,
59
- "loss": 0.874,
60
- "step": 8750
61
- },
62
- {
63
- "epoch": 2.0,
64
- "eval_accuracy": 0.7115905774442359,
65
- "eval_loss": 0.8782150745391846,
66
- "eval_runtime": 300.3321,
67
- "eval_samples_per_second": 127.779,
68
- "eval_steps_per_second": 3.996,
69
- "step": 9594
70
- },
71
- {
72
- "epoch": 2.08,
73
- "learning_rate": 6.002499999999999e-06,
74
- "loss": 0.8538,
75
- "step": 10000
76
- },
77
- {
78
- "epoch": 2.35,
79
- "learning_rate": 6.002499999999999e-06,
80
- "loss": 0.8203,
81
- "step": 11250
82
- },
83
- {
84
- "epoch": 2.61,
85
- "learning_rate": 4.201749999999999e-06,
86
- "loss": 0.8195,
87
- "step": 12500
88
- },
89
- {
90
- "epoch": 2.87,
91
- "learning_rate": 4.201749999999999e-06,
92
- "loss": 0.8116,
93
- "step": 13750
94
- },
95
- {
96
- "epoch": 3.0,
97
- "eval_accuracy": 0.7145611840733792,
98
- "eval_loss": 0.8623952865600586,
99
- "eval_runtime": 300.1975,
100
- "eval_samples_per_second": 127.836,
101
- "eval_steps_per_second": 3.997,
102
- "step": 14391
103
- },
104
- {
105
- "epoch": 3.13,
106
- "learning_rate": 2.941224999999999e-06,
107
- "loss": 0.7909,
108
- "step": 15000
109
- },
110
- {
111
- "epoch": 3.39,
112
- "learning_rate": 2.941224999999999e-06,
113
- "loss": 0.7914,
114
- "step": 16250
115
- },
116
- {
117
- "epoch": 3.65,
118
- "learning_rate": 2.058857499999999e-06,
119
- "loss": 0.7926,
120
- "step": 17500
121
- },
122
- {
123
- "epoch": 3.91,
124
- "learning_rate": 2.058857499999999e-06,
125
- "loss": 0.7839,
126
- "step": 18750
127
- },
128
- {
129
- "epoch": 4.0,
130
- "eval_accuracy": 0.7179226599958307,
131
- "eval_loss": 0.8576174378395081,
132
- "eval_runtime": 300.1549,
133
- "eval_samples_per_second": 127.854,
134
- "eval_steps_per_second": 3.998,
135
- "step": 19188
136
- },
137
- {
138
- "epoch": 4.17,
139
- "learning_rate": 1.4412002499999993e-06,
140
- "loss": 0.7696,
141
- "step": 20000
142
- },
143
- {
144
- "epoch": 4.43,
145
- "learning_rate": 1.4412002499999993e-06,
146
- "loss": 0.7667,
147
- "step": 21250
148
- },
149
- {
150
- "epoch": 4.69,
151
- "learning_rate": 1.0088401749999995e-06,
152
- "loss": 0.7701,
153
- "step": 22500
154
- },
155
- {
156
- "epoch": 4.95,
157
- "learning_rate": 1.0088401749999995e-06,
158
- "loss": 0.7726,
159
- "step": 23750
160
- },
161
- {
162
- "epoch": 5.0,
163
- "eval_accuracy": 0.718079007713154,
164
- "eval_loss": 0.8557529449462891,
165
- "eval_runtime": 300.2736,
166
- "eval_samples_per_second": 127.803,
167
- "eval_steps_per_second": 3.996,
168
- "step": 23985
169
- }
170
- ],
171
- "logging_steps": 1250,
172
- "max_steps": 23985,
173
- "num_input_tokens_seen": 0,
174
- "num_train_epochs": 5,
175
- "save_steps": 500,
176
- "total_flos": 0.0,
177
- "train_batch_size": 32,
178
- "trial_name": null,
179
- "trial_params": null
180
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-23985/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:351ee58f715ddbda098b9c7aa4f73852ef8355fd01d77a909cbdf33db04aedc4
3
- size 4664