MohamedAhmedAE commited on
Commit
aa18b6c
1 Parent(s): 91c3149

Training in progress, step 3200, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0b17e3cc5361a6fb95a762276cf91289aa13131a8fbd9766fc3a4c840df560a
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ffa24c6d65d6ebe046b12c298cf4ae161aa8f7a73a62399ba6434af84128daa
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1d0d3e162d54962a24152dbd5b6c2d74f4ab4f1cdf06ddc47eddf9d8b6d07ca2
3
  size 84581014
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ecd655e3d91e89f6e1fa258974b79bf0d215f8ace68f3d365b99110b5abe49ce
3
  size 84581014
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c3c8b2c540004b798ef14a7898f64a9814643e8791bc9d21b3f9eb260faee38
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4c5ce141a264dd8520f51f3b5490a7f5208fde47774153324df63d2668f9a7d7
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b791cefec27286739c8a8bdafe733eef68db8d09837c7ad1df5edca79390d24b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0770650b062be7cee981b3da10cdf7010bb9457e5bd03d190388904fc6d3b7aa
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0022309891834207756,
5
  "eval_steps": 2000,
6
- "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -112,6 +112,13 @@
112
  "learning_rate": 1.9999990234026036e-05,
113
  "loss": 1.586,
114
  "step": 3000
 
 
 
 
 
 
 
115
  }
116
  ],
117
  "logging_steps": 200,
@@ -119,7 +126,7 @@
119
  "num_input_tokens_seen": 0,
120
  "num_train_epochs": 5,
121
  "save_steps": 200,
122
- "total_flos": 3.905599131493171e+16,
123
  "train_batch_size": 1,
124
  "trial_name": null,
125
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.0023797217956488276,
5
  "eval_steps": 2000,
6
+ "global_step": 3200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
112
  "learning_rate": 1.9999990234026036e-05,
113
  "loss": 1.586,
114
  "step": 3000
115
+ },
116
+ {
117
+ "epoch": 0.0,
118
+ "grad_norm": 2.8140385150909424,
119
+ "learning_rate": 1.9999988884312347e-05,
120
+ "loss": 1.6221,
121
+ "step": 3200
122
  }
123
  ],
124
  "logging_steps": 200,
 
126
  "num_input_tokens_seen": 0,
127
  "num_train_epochs": 5,
128
  "save_steps": 200,
129
+ "total_flos": 4.180089275793408e+16,
130
  "train_batch_size": 1,
131
  "trial_name": null,
132
  "trial_params": null