Rakhman16 commited on
Commit
ff9e7c2
·
verified ·
1 Parent(s): 8cc2bcf

Training in progress, step 5000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8556a691c4ab5fb89b6e13fbc99580121d0e3f94363e6ac2a01dd331ba85836b
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98792de487e557bcbe0988d5db38585c344e70547f2893c062bf3578dc66a39a
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8eaf3ec360621c1ce85b720dd55783852c2df291d2c2dee679c14211f9f7d6ad
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:574c1f1337303e1e9ccc23d6ff418de9615ff0ff5522f313a45b3d7696c8a9e5
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8a5efc71021ffa065719e3167f9e1a963daf4640f03e0c3b7abc98600034804
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c37dbee48e833b4e9057a552c8f0d7af1270c21cc7d8732a724befd1c946e235
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:13e5f31b24159dd112a4c253836ceb74f5ca9e31a7bde4abe64305021c33f511
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a400764dfcb252d330daa830113fe3425a92b352a05d6f9c2bcc0960871c50c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.11007058620452881,
3
- "best_model_checkpoint": "./fine-tuned/checkpoint-4500",
4
- "epoch": 0.7903749890225696,
5
  "eval_steps": 100,
6
- "global_step": 4500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -997,6 +997,116 @@
997
  "eval_samples_per_second": 25.244,
998
  "eval_steps_per_second": 3.158,
999
  "step": 4500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1000
  }
1001
  ],
1002
  "logging_steps": 50,
@@ -1016,7 +1126,7 @@
1016
  "attributes": {}
1017
  }
1018
  },
1019
- "total_flos": 2.192248406016e+16,
1020
  "train_batch_size": 8,
1021
  "trial_name": null,
1022
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.10880845785140991,
3
+ "best_model_checkpoint": "./fine-tuned/checkpoint-5000",
4
+ "epoch": 0.8781944322472995,
5
  "eval_steps": 100,
6
+ "global_step": 5000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
997
  "eval_samples_per_second": 25.244,
998
  "eval_steps_per_second": 3.158,
999
  "step": 4500
1000
+ },
1001
+ {
1002
+ "epoch": 0.7991569333450426,
1003
+ "grad_norm": 18708.171875,
1004
+ "learning_rate": 2.4005796592306344e-05,
1005
+ "loss": 0.1165,
1006
+ "step": 4550
1007
+ },
1008
+ {
1009
+ "epoch": 0.8079388776675156,
1010
+ "grad_norm": 7859.576171875,
1011
+ "learning_rate": 2.3939926225188827e-05,
1012
+ "loss": 0.1147,
1013
+ "step": 4600
1014
+ },
1015
+ {
1016
+ "epoch": 0.8079388776675156,
1017
+ "eval_loss": 0.10995937138795853,
1018
+ "eval_runtime": 176.8489,
1019
+ "eval_samples_per_second": 25.219,
1020
+ "eval_steps_per_second": 3.155,
1021
+ "step": 4600
1022
+ },
1023
+ {
1024
+ "epoch": 0.8167208219899886,
1025
+ "grad_norm": 27483.9140625,
1026
+ "learning_rate": 2.3874055858071317e-05,
1027
+ "loss": 0.1224,
1028
+ "step": 4650
1029
+ },
1030
+ {
1031
+ "epoch": 0.8255027663124616,
1032
+ "grad_norm": 8125.94580078125,
1033
+ "learning_rate": 2.3808185490953804e-05,
1034
+ "loss": 0.1112,
1035
+ "step": 4700
1036
+ },
1037
+ {
1038
+ "epoch": 0.8255027663124616,
1039
+ "eval_loss": 0.10972581803798676,
1040
+ "eval_runtime": 176.1825,
1041
+ "eval_samples_per_second": 25.315,
1042
+ "eval_steps_per_second": 3.167,
1043
+ "step": 4700
1044
+ },
1045
+ {
1046
+ "epoch": 0.8342847106349346,
1047
+ "grad_norm": 20998.330078125,
1048
+ "learning_rate": 2.3742315123836294e-05,
1049
+ "loss": 0.1213,
1050
+ "step": 4750
1051
+ },
1052
+ {
1053
+ "epoch": 0.8430666549574076,
1054
+ "grad_norm": 7832.6513671875,
1055
+ "learning_rate": 2.3676444756718777e-05,
1056
+ "loss": 0.1163,
1057
+ "step": 4800
1058
+ },
1059
+ {
1060
+ "epoch": 0.8430666549574076,
1061
+ "eval_loss": 0.10929498076438904,
1062
+ "eval_runtime": 175.008,
1063
+ "eval_samples_per_second": 25.485,
1064
+ "eval_steps_per_second": 3.188,
1065
+ "step": 4800
1066
+ },
1067
+ {
1068
+ "epoch": 0.8518485992798805,
1069
+ "grad_norm": 10396.4267578125,
1070
+ "learning_rate": 2.3610574389601264e-05,
1071
+ "loss": 0.1056,
1072
+ "step": 4850
1073
+ },
1074
+ {
1075
+ "epoch": 0.8606305436023536,
1076
+ "grad_norm": 10345.142578125,
1077
+ "learning_rate": 2.3544704022483754e-05,
1078
+ "loss": 0.1131,
1079
+ "step": 4900
1080
+ },
1081
+ {
1082
+ "epoch": 0.8606305436023536,
1083
+ "eval_loss": 0.10942210257053375,
1084
+ "eval_runtime": 175.0447,
1085
+ "eval_samples_per_second": 25.479,
1086
+ "eval_steps_per_second": 3.188,
1087
+ "step": 4900
1088
+ },
1089
+ {
1090
+ "epoch": 0.8694124879248265,
1091
+ "grad_norm": 7617.70703125,
1092
+ "learning_rate": 2.3478833655366237e-05,
1093
+ "loss": 0.1033,
1094
+ "step": 4950
1095
+ },
1096
+ {
1097
+ "epoch": 0.8781944322472995,
1098
+ "grad_norm": 8898.314453125,
1099
+ "learning_rate": 2.3412963288248727e-05,
1100
+ "loss": 0.1168,
1101
+ "step": 5000
1102
+ },
1103
+ {
1104
+ "epoch": 0.8781944322472995,
1105
+ "eval_loss": 0.10880845785140991,
1106
+ "eval_runtime": 175.2118,
1107
+ "eval_samples_per_second": 25.455,
1108
+ "eval_steps_per_second": 3.185,
1109
+ "step": 5000
1110
  }
1111
  ],
1112
  "logging_steps": 50,
 
1126
  "attributes": {}
1127
  }
1128
  },
1129
+ "total_flos": 2.43583156224e+16,
1130
  "train_batch_size": 8,
1131
  "trial_name": null,
1132
  "trial_params": null