masatochi commited on
Commit
46dcb6a
1 Parent(s): 4aaed5c

Training in progress, step 140, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99cfac1c2baddf1f9ea0d4faa7680daa5c2e80d219f5986eddd54fd836006122
3
  size 59827904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ab2e0910ee8ff17a681acdd56f9074e96fa82a916a8f3239bf3f1f3cd80ce465
3
  size 59827904
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a18d2c0a01d50a4c51715dadf4a0be0be51dc97452581044464ba783c996e19
3
  size 30875540
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:13e25365edc54422e82cbac4c671e2435e8bbd2900af3e9044725a0b51ab79a1
3
  size 30875540
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:392da1c3f18aef86ac42978c5ab2b3ef58330aa9d54adc511669e79f01b39081
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3620fd05065514f79fda6c0faf379ce93ec4350e71e2f2104721a44e9230ff6
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6fc750a6dfb3e5c9f642238b7443b0984a56e79b7c2731a6e152ecfc3e32f4e7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:671828e69cd8fd42106344a5797cbc78f701a434a6386d9dfacd16451ba179aa
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.06601870529983496,
5
  "eval_steps": 34,
6
- "global_step": 135,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -984,6 +984,49 @@
984
  "learning_rate": 6.387583338128471e-05,
985
  "loss": 9.7205,
986
  "step": 135
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
987
  }
988
  ],
989
  "logging_steps": 1,
@@ -1003,7 +1046,7 @@
1003
  "attributes": {}
1004
  }
1005
  },
1006
- "total_flos": 2.976047562542285e+17,
1007
  "train_batch_size": 3,
1008
  "trial_name": null,
1009
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.06846384253316218,
5
  "eval_steps": 34,
6
+ "global_step": 140,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
984
  "learning_rate": 6.387583338128471e-05,
985
  "loss": 9.7205,
986
  "step": 135
987
+ },
988
+ {
989
+ "epoch": 0.0665077327465004,
990
+ "grad_norm": 9.914893519909028e+17,
991
+ "learning_rate": 6.215889499576898e-05,
992
+ "loss": 8.1576,
993
+ "step": 136
994
+ },
995
+ {
996
+ "epoch": 0.0665077327465004,
997
+ "eval_loss": 9.333438873291016,
998
+ "eval_runtime": 880.1222,
999
+ "eval_samples_per_second": 2.935,
1000
+ "eval_steps_per_second": 0.978,
1001
+ "step": 136
1002
+ },
1003
+ {
1004
+ "epoch": 0.06699676019316585,
1005
+ "grad_norm": Infinity,
1006
+ "learning_rate": 6.0454879312945754e-05,
1007
+ "loss": 9.5434,
1008
+ "step": 137
1009
+ },
1010
+ {
1011
+ "epoch": 0.06748578763983129,
1012
+ "grad_norm": Infinity,
1013
+ "learning_rate": 5.876436825260967e-05,
1014
+ "loss": 9.9258,
1015
+ "step": 138
1016
+ },
1017
+ {
1018
+ "epoch": 0.06797481508649673,
1019
+ "grad_norm": Infinity,
1020
+ "learning_rate": 5.708793912273911e-05,
1021
+ "loss": 8.8349,
1022
+ "step": 139
1023
+ },
1024
+ {
1025
+ "epoch": 0.06846384253316218,
1026
+ "grad_norm": Infinity,
1027
+ "learning_rate": 5.542616442234618e-05,
1028
+ "loss": 10.4222,
1029
+ "step": 140
1030
  }
1031
  ],
1032
  "logging_steps": 1,
 
1046
  "attributes": {}
1047
  }
1048
  },
1049
+ "total_flos": 3.086271546340147e+17,
1050
  "train_batch_size": 3,
1051
  "trial_name": null,
1052
  "trial_params": null