bhuvanmdev commited on
Commit
697913b
·
verified ·
1 Parent(s): 4a88b7b

Training in progress, step 2040, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a7b6655782a983e665dba0e849d536995a49fc28327e1a340c6d0acb3e6a9346
3
  size 100697728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e47362b6d0eac1f82180e6b29ee3f7c2d915006bc065406131a0fd35b2efbbc1
3
  size 100697728
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7bf5ba4c5bdffdca097a4cab622da35d4625e043ff5ff81b133e63498d06dbfc
3
  size 201541754
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd11da65ffb0d4570e59961b0c132d6bd8b1f1fdc0c2f1a395db947863533b9d
3
  size 201541754
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6c676bb33648ee9263046f9d978c9dc81390c0ac42995b13679bcd936e804701
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68d8d961de9fa331face7d8cad742caf1cb05dec24ac6128db0c9c0b06d4b11b
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e1288e563fd14386ce1f1f209b36f861a91ceda68715fc12dfb92acd5d04d997
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d1f92df614fc27cf99bd5eb65435800e9a0c33f53a2b39dfcb3917ef6db37df5
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.7227191413237924,
5
  "eval_steps": 500,
6
- "global_step": 2020,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1623,14 +1623,30 @@
1623
  "loss": 0.3982,
1624
  "num_input_tokens_seen": 1366487,
1625
  "step": 2020
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1626
  }
1627
  ],
1628
  "logging_steps": 10,
1629
  "max_steps": 2795,
1630
- "num_input_tokens_seen": 1366487,
1631
  "num_train_epochs": 1,
1632
  "save_steps": 20,
1633
- "total_flos": 3.0727467028740096e+16,
1634
  "train_batch_size": 1,
1635
  "trial_name": null,
1636
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.7298747763864043,
5
  "eval_steps": 500,
6
+ "global_step": 2040,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1623
  "loss": 0.3982,
1624
  "num_input_tokens_seen": 1366487,
1625
  "step": 2020
1626
+ },
1627
+ {
1628
+ "epoch": 0.7262969588550984,
1629
+ "grad_norm": 0.2706156373023987,
1630
+ "learning_rate": 5.4740608228980325e-05,
1631
+ "loss": 0.3715,
1632
+ "num_input_tokens_seen": 1374437,
1633
+ "step": 2030
1634
+ },
1635
+ {
1636
+ "epoch": 0.7298747763864043,
1637
+ "grad_norm": 0.27796509861946106,
1638
+ "learning_rate": 5.4025044722719145e-05,
1639
+ "loss": 0.3981,
1640
+ "num_input_tokens_seen": 1382038,
1641
+ "step": 2040
1642
  }
1643
  ],
1644
  "logging_steps": 10,
1645
  "max_steps": 2795,
1646
+ "num_input_tokens_seen": 1382038,
1647
  "num_train_epochs": 1,
1648
  "save_steps": 20,
1649
+ "total_flos": 3.1077154102063104e+16,
1650
  "train_batch_size": 1,
1651
  "trial_name": null,
1652
  "trial_params": null