TeamNL commited on
Commit
15af97e
1 Parent(s): a6422fa

Training in progress, epoch 69, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3aa7b71f0186326e5dfcd1ebc696485cf4085109a236a1a5e231389a112749b6
3
  size 672610316
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:247791b25e1ae4add138e7227ed13cc2d6026bec2a436315a86d315f780e5f05
3
  size 672610316
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c757045631650e1b5afc471f91c69405d6f765f948c7a10620d8501d09b3b218
3
  size 1345341690
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f201711d26dce4748090ae33db3e3808c1d3c95ec23a22353e06c8a632ddcfb2
3
  size 1345341690
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:69ba5062855bc5b787b9131a380b45c1ef093b8f5bda042958f933465fdfe6e2
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:827a0d427586f5124064647455e6a166beb4ca620d95c3ccbb8e558f5f888f08
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66a72a0f03788668d35910cde40b02ba0eb590aa49feadc63f673cede562ba7d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea9ea2935d057334cc98150650e98c17b6bacf2fe7222d479a5d4e08bcbac6e4
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 0.8533899487562311,
3
  "best_model_checkpoint": "./results/checkpoint-1200",
4
- "epoch": 65.0,
5
  "eval_steps": 500,
6
- "global_step": 1560,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1047,6 +1047,70 @@
1047
  "eval_samples_per_second": 897.779,
1048
  "eval_steps_per_second": 16.473,
1049
  "step": 1560
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1050
  }
1051
  ],
1052
  "logging_steps": 500,
@@ -1066,7 +1130,7 @@
1066
  "attributes": {}
1067
  }
1068
  },
1069
- "total_flos": 2403310835213736.0,
1070
  "train_batch_size": 64,
1071
  "trial_name": null,
1072
  "trial_params": null
 
1
  {
2
  "best_metric": 0.8533899487562311,
3
  "best_model_checkpoint": "./results/checkpoint-1200",
4
+ "epoch": 69.0,
5
  "eval_steps": 500,
6
+ "global_step": 1656,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1047
  "eval_samples_per_second": 897.779,
1048
  "eval_steps_per_second": 16.473,
1049
  "step": 1560
1050
+ },
1051
+ {
1052
+ "epoch": 66.0,
1053
+ "grad_norm": 0.001049485057592392,
1054
+ "learning_rate": 1.7000000000000003e-05,
1055
+ "loss": 0.0001,
1056
+ "step": 1584
1057
+ },
1058
+ {
1059
+ "epoch": 66.0,
1060
+ "eval_f1": 0.8471931986968837,
1061
+ "eval_loss": 1.180769443511963,
1062
+ "eval_runtime": 0.2525,
1063
+ "eval_samples_per_second": 863.258,
1064
+ "eval_steps_per_second": 15.84,
1065
+ "step": 1584
1066
+ },
1067
+ {
1068
+ "epoch": 67.0,
1069
+ "grad_norm": 0.0010408489033579826,
1070
+ "learning_rate": 1.65e-05,
1071
+ "loss": 0.0001,
1072
+ "step": 1608
1073
+ },
1074
+ {
1075
+ "epoch": 67.0,
1076
+ "eval_f1": 0.8471931986968837,
1077
+ "eval_loss": 1.1815507411956787,
1078
+ "eval_runtime": 0.2505,
1079
+ "eval_samples_per_second": 870.186,
1080
+ "eval_steps_per_second": 15.967,
1081
+ "step": 1608
1082
+ },
1083
+ {
1084
+ "epoch": 68.0,
1085
+ "grad_norm": 0.0012064232723787427,
1086
+ "learning_rate": 1.6000000000000003e-05,
1087
+ "loss": 0.0001,
1088
+ "step": 1632
1089
+ },
1090
+ {
1091
+ "epoch": 68.0,
1092
+ "eval_f1": 0.8471931986968837,
1093
+ "eval_loss": 1.182210922241211,
1094
+ "eval_runtime": 0.2583,
1095
+ "eval_samples_per_second": 844.028,
1096
+ "eval_steps_per_second": 15.487,
1097
+ "step": 1632
1098
+ },
1099
+ {
1100
+ "epoch": 69.0,
1101
+ "grad_norm": 0.0014143523294478655,
1102
+ "learning_rate": 1.55e-05,
1103
+ "loss": 0.0001,
1104
+ "step": 1656
1105
+ },
1106
+ {
1107
+ "epoch": 69.0,
1108
+ "eval_f1": 0.8524668435013261,
1109
+ "eval_loss": 1.1818993091583252,
1110
+ "eval_runtime": 0.2452,
1111
+ "eval_samples_per_second": 888.962,
1112
+ "eval_steps_per_second": 16.311,
1113
+ "step": 1656
1114
  }
1115
  ],
1116
  "logging_steps": 500,
 
1130
  "attributes": {}
1131
  }
1132
  },
1133
+ "total_flos": 2551863638350032.0,
1134
  "train_batch_size": 64,
1135
  "trial_name": null,
1136
  "trial_params": null