dq158 commited on
Commit
845b642
1 Parent(s): f796642

Training in progress, epoch 12, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b685eb47baafc7984a2354c64f3a633c89ac5489423119e83feaa1b146cbedb
3
  size 2372346
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41341b31785ed0a2fbc8630488a4926f852161ef06ea08cc89540c9c37dc6630
3
  size 2372346
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6600748b28844d6445286c2db32da87f8f4ae7df8cb2643db5bb76ae67233234
3
  size 990409330
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7137ffdadd3d98f6eb4df58e0d93207f78d2d3a226609b6aa51f825be2c62570
3
  size 990409330
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4073b96953e05453b4b6e4a3030f1c740fa0e02670dbb4843214e79d4e7e84c5
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:81575766e0f892673e3292549b8eae61ee2367f845a1b2b10898b9e83a3ef05f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b44cd738bffad7be53bf491009475d442fa0e6452600c820ee5979a9a8ce3a05
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fc3d9b89093dc44b3f8ae65848b5c79017341da78ff50d3adcc276738c797e7
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": 1.5654487609863281,
3
  "best_model_checkpoint": "dq158/pingusPongus/checkpoint-6323",
4
- "epoch": 11.0,
5
  "eval_steps": 500,
6
- "global_step": 69553,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1050,13 +1050,104 @@
1050
  "eval_steps_per_second": 1.037,
1051
  "eval_translation_length": 52469,
1052
  "step": 69553
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1053
  }
1054
  ],
1055
  "logging_steps": 500,
1056
  "max_steps": 126460,
1057
  "num_train_epochs": 20,
1058
  "save_steps": 500,
1059
- "total_flos": 1.9050018020838605e+17,
1060
  "trial_name": null,
1061
  "trial_params": null
1062
  }
 
1
  {
2
  "best_metric": 1.5654487609863281,
3
  "best_model_checkpoint": "dq158/pingusPongus/checkpoint-6323",
4
+ "epoch": 12.0,
5
  "eval_steps": 500,
6
+ "global_step": 75876,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1050
  "eval_steps_per_second": 1.037,
1051
  "eval_translation_length": 52469,
1052
  "step": 69553
1053
+ },
1054
+ {
1055
+ "epoch": 11.07,
1056
+ "learning_rate": 2.0952479625603017e-06,
1057
+ "loss": 1.7783,
1058
+ "step": 70000
1059
+ },
1060
+ {
1061
+ "epoch": 11.15,
1062
+ "learning_rate": 2.0645150514646657e-06,
1063
+ "loss": 1.7443,
1064
+ "step": 70500
1065
+ },
1066
+ {
1067
+ "epoch": 11.23,
1068
+ "learning_rate": 2.0338498642707977e-06,
1069
+ "loss": 1.7678,
1070
+ "step": 71000
1071
+ },
1072
+ {
1073
+ "epoch": 11.31,
1074
+ "learning_rate": 2.0032571698372577e-06,
1075
+ "loss": 1.7786,
1076
+ "step": 71500
1077
+ },
1078
+ {
1079
+ "epoch": 11.39,
1080
+ "learning_rate": 1.9727417257489874e-06,
1081
+ "loss": 1.7768,
1082
+ "step": 72000
1083
+ },
1084
+ {
1085
+ "epoch": 11.47,
1086
+ "learning_rate": 1.9423082775774337e-06,
1087
+ "loss": 1.7953,
1088
+ "step": 72500
1089
+ },
1090
+ {
1091
+ "epoch": 11.55,
1092
+ "learning_rate": 1.9119615581425524e-06,
1093
+ "loss": 1.7715,
1094
+ "step": 73000
1095
+ },
1096
+ {
1097
+ "epoch": 11.62,
1098
+ "learning_rate": 1.881706286776785e-06,
1099
+ "loss": 1.8047,
1100
+ "step": 73500
1101
+ },
1102
+ {
1103
+ "epoch": 11.7,
1104
+ "learning_rate": 1.8515471685911402e-06,
1105
+ "loss": 1.7781,
1106
+ "step": 74000
1107
+ },
1108
+ {
1109
+ "epoch": 11.78,
1110
+ "learning_rate": 1.821488893743488e-06,
1111
+ "loss": 1.8197,
1112
+ "step": 74500
1113
+ },
1114
+ {
1115
+ "epoch": 11.86,
1116
+ "learning_rate": 1.7915361367091677e-06,
1117
+ "loss": 1.8159,
1118
+ "step": 75000
1119
+ },
1120
+ {
1121
+ "epoch": 11.94,
1122
+ "learning_rate": 1.7616935555540475e-06,
1123
+ "loss": 1.8004,
1124
+ "step": 75500
1125
+ },
1126
+ {
1127
+ "epoch": 12.0,
1128
+ "eval_bleu": 1.0,
1129
+ "eval_brevity_penalty": 1.0,
1130
+ "eval_length_ratio": 1.0,
1131
+ "eval_loss": 1.5739296674728394,
1132
+ "eval_precisions": [
1133
+ 1.0,
1134
+ 1.0,
1135
+ 1.0,
1136
+ 1.0
1137
+ ],
1138
+ "eval_reference_length": 52457,
1139
+ "eval_runtime": 676.9983,
1140
+ "eval_samples_per_second": 4.152,
1141
+ "eval_steps_per_second": 1.038,
1142
+ "eval_translation_length": 52457,
1143
+ "step": 75876
1144
  }
1145
  ],
1146
  "logging_steps": 500,
1147
  "max_steps": 126460,
1148
  "num_train_epochs": 20,
1149
  "save_steps": 500,
1150
+ "total_flos": 2.078183784091484e+17,
1151
  "trial_name": null,
1152
  "trial_params": null
1153
  }