moos124 commited on
Commit
92d4fba
·
verified ·
1 Parent(s): 0381ec2

Training in progress, step 4720, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:78750084a2c8a275ab30bb3b663f4c03362b9954a25da4d54c0c54f5e2b4e314
3
  size 70430032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d1ab2ab8575c3321712c1a6bad2eba029eedcf542bf41c1ccaab7fa22efcbcb
3
  size 70430032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:51b8e10db94ef1001dd5e11bd1e56d16121fb23ac1147dad6b4cc1002033d5de
3
  size 141058579
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b970592946f6915d514b0c5fde882560cf4b0d7152bf01c4bbd220428b47cf61
3
  size 141058579
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed9e4bcf51f3b4e6ca7c363bc097e5e709bb09103be45200e9f0306962a4a816
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9767e25e15d5c9765677c20f262aa1568a5646ba47fed1966038f40ce9201802
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:587fb5736d58a8ebe02aff6e5c955fe956c7ebf07cdf325915871bad975246c5
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7addd295ee72ea36828978c8aecf66577e94dd9559017f38aea68d35ed7e152
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 1.0046933333333334,
6
  "eval_steps": 500,
7
- "global_step": 4710,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -4728,6 +4728,16 @@
4728
  "mean_token_accuracy": 0.7781821310520172,
4729
  "num_tokens": 21879326.0,
4730
  "step": 4710
 
 
 
 
 
 
 
 
 
 
4731
  }
4732
  ],
4733
  "logging_steps": 10,
@@ -4747,7 +4757,7 @@
4747
  "attributes": {}
4748
  }
4749
  },
4750
- "total_flos": 1.0359050216216678e+17,
4751
  "train_batch_size": 4,
4752
  "trial_name": null,
4753
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 1.0068266666666668,
6
  "eval_steps": 500,
7
+ "global_step": 4720,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
4728
  "mean_token_accuracy": 0.7781821310520172,
4729
  "num_tokens": 21879326.0,
4730
  "step": 4710
4731
+ },
4732
+ {
4733
+ "entropy": 0.9335578382015228,
4734
+ "epoch": 1.0068266666666668,
4735
+ "grad_norm": 0.2416774481534958,
4736
+ "learning_rate": 5.2058967697330784e-05,
4737
+ "loss": 0.9976616859436035,
4738
+ "mean_token_accuracy": 0.7626704692840576,
4739
+ "num_tokens": 21933750.0,
4740
+ "step": 4720
4741
  }
4742
  ],
4743
  "logging_steps": 10,
 
4757
  "attributes": {}
4758
  }
4759
  },
4760
+ "total_flos": 1.0386211988394086e+17,
4761
  "train_batch_size": 4,
4762
  "trial_name": null,
4763
  "trial_params": null