bhuvanmdev commited on
Commit
e3ae59c
1 Parent(s): 82f5f98

Training in progress, step 620, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e4aa0619718301c4cee6a46cec390e99f468dbc3294652a09dad80c2c1c52ac
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c42e3144125416adcb9a1ec0d9b0eb55619b6a3619ed6542a06de661bbf161b2
3
  size 100697728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2f3f0d6d31265332cf0153ce373f616a507a1b2bdb68c61151d6f231efb64720
3
  size 201541754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9177ecdd03afda4d13d5d2eb2a0c1d5fbe522c00f0127e7917ebf328b282a0c8
3
  size 201541754
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a3896f4879562a69571237c21fd91dc0cba3215ed23fb9ae7f0374432fee52f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57a916e4fd36223dffeadbeb32e21c87fb935df188a4e2e19aafc7b1c3d84241
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a18b7d573288acd75186bfa1799a658370b6b400daf569612aa7a4ba8b0298a1
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:398bdc23ac4d9e39643bb660fbaeca4b591289face79e5178809ed45f99c413b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.2146690518783542,
5
  "eval_steps": 500,
6
- "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -487,14 +487,30 @@
487
  "loss": 0.4148,
488
  "num_input_tokens_seen": 399368,
489
  "step": 600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
490
  }
491
  ],
492
  "logging_steps": 10,
493
  "max_steps": 2795,
494
- "num_input_tokens_seen": 399368,
495
  "num_train_epochs": 1,
496
  "save_steps": 20,
497
- "total_flos": 8980375995039744.0,
498
  "train_batch_size": 1,
499
  "trial_name": null,
500
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.22182468694096602,
5
  "eval_steps": 500,
6
+ "global_step": 620,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
487
  "loss": 0.4148,
488
  "num_input_tokens_seen": 399368,
489
  "step": 600
490
+ },
491
+ {
492
+ "epoch": 0.2182468694096601,
493
+ "grad_norm": 0.35863760113716125,
494
+ "learning_rate": 0.000156350626118068,
495
+ "loss": 0.4105,
496
+ "num_input_tokens_seen": 407343,
497
+ "step": 610
498
+ },
499
+ {
500
+ "epoch": 0.22182468694096602,
501
+ "grad_norm": 0.287056028842926,
502
+ "learning_rate": 0.0001556350626118068,
503
+ "loss": 0.4495,
504
+ "num_input_tokens_seen": 413867,
505
+ "step": 620
506
  }
507
  ],
508
  "logging_steps": 10,
509
  "max_steps": 2795,
510
+ "num_input_tokens_seen": 413867,
511
  "num_train_epochs": 1,
512
  "save_steps": 20,
513
+ "total_flos": 9306407303387136.0,
514
  "train_batch_size": 1,
515
  "trial_name": null,
516
  "trial_params": null