exontidev commited on
Commit
adac9aa
·
1 Parent(s): 4689f8e

Training in progress, step 20, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:831e0bb05b9a1f36b45a9a19c3eecd1dc0fb1c2969369ba1ed34980e9fac859a
3
  size 9443384
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e08e739dfe9ce7a6b1ceea6e42d1bdad45e2fe71584162c058377ef279a58eab
3
  size 9443384
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:620eff553fb45535fa832e5821e526b46bdef98bb70af4a85cd0f8aaa5d67da4
3
  size 18915130
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f169b79530e61ae7689d39ba804d5d85e54f495402fa5f685977d7ac56e2649
3
  size 18915130
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7335e8cb289a1ea0e43c1def6041371603222b00ab32f88b9848447ef726d6c
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ad978cca8e8c4322b29aa3e0a4609c75d2569be4addeebd03d3b57f8afb1187
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb8b0b45d9468b388201df4849fd7d961d84a89385733d7db18055480fb25ddf
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6c0d912a292bbdd68cfb4f0c80a88e4888079a5e3307ae09473cfae396375942
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0012910305651486298,
5
  "eval_steps": 500,
6
- "global_step": 10,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -67,6 +67,66 @@
67
  "learning_rate": 1.8e-06,
68
  "loss": 9.1707,
69
  "step": 10
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
70
  }
71
  ],
72
  "logging_steps": 1,
@@ -74,7 +134,7 @@
74
  "num_input_tokens_seen": 0,
75
  "num_train_epochs": 1,
76
  "save_steps": 10,
77
- "total_flos": 13280654057472.0,
78
  "train_batch_size": 4,
79
  "trial_name": null,
80
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0025820611302972596,
5
  "eval_steps": 500,
6
+ "global_step": 20,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
67
  "learning_rate": 1.8e-06,
68
  "loss": 9.1707,
69
  "step": 10
70
+ },
71
+ {
72
+ "epoch": 0.0,
73
+ "learning_rate": 2.0000000000000003e-06,
74
+ "loss": 10.1454,
75
+ "step": 11
76
+ },
77
+ {
78
+ "epoch": 0.0,
79
+ "learning_rate": 2.2e-06,
80
+ "loss": 9.3448,
81
+ "step": 12
82
+ },
83
+ {
84
+ "epoch": 0.0,
85
+ "learning_rate": 2.4000000000000003e-06,
86
+ "loss": 9.7101,
87
+ "step": 13
88
+ },
89
+ {
90
+ "epoch": 0.0,
91
+ "learning_rate": 2.6e-06,
92
+ "loss": 9.9546,
93
+ "step": 14
94
+ },
95
+ {
96
+ "epoch": 0.0,
97
+ "learning_rate": 2.8000000000000003e-06,
98
+ "loss": 9.6989,
99
+ "step": 15
100
+ },
101
+ {
102
+ "epoch": 0.0,
103
+ "learning_rate": 3e-06,
104
+ "loss": 9.6937,
105
+ "step": 16
106
+ },
107
+ {
108
+ "epoch": 0.0,
109
+ "learning_rate": 3.2000000000000003e-06,
110
+ "loss": 9.5372,
111
+ "step": 17
112
+ },
113
+ {
114
+ "epoch": 0.0,
115
+ "learning_rate": 3.4000000000000005e-06,
116
+ "loss": 9.1956,
117
+ "step": 18
118
+ },
119
+ {
120
+ "epoch": 0.0,
121
+ "learning_rate": 3.6e-06,
122
+ "loss": 9.7139,
123
+ "step": 19
124
+ },
125
+ {
126
+ "epoch": 0.0,
127
+ "learning_rate": 3.8e-06,
128
+ "loss": 9.5589,
129
+ "step": 20
130
  }
131
  ],
132
  "logging_steps": 1,
 
134
  "num_input_tokens_seen": 0,
135
  "num_train_epochs": 1,
136
  "save_steps": 10,
137
+ "total_flos": 26086414196736.0,
138
  "train_batch_size": 4,
139
  "trial_name": null,
140
  "trial_params": null