AlekseyKorshuk commited on
Commit
44865d9
1 Parent(s): 0ee9a7e

huggingartists

Browse files
README.md CHANGED
@@ -14,7 +14,7 @@ widget:
14
  <div class="inline-flex flex-col" style="line-height: 1.5;">
15
  <div class="flex">
16
  <div
17
- style="display:DISPLAY_1; margin-left: auto; margin-right: auto; width: 92px; height:92px; border-radius: 50%; background-size: cover; background-image: url(&#39;https://images.genius.com/3c1f124fcbbc2857a95e513fb34cc5a8.400x400x1.jpg&#39;)">
18
  </div>
19
  </div>
20
  <div style="text-align: center; margin-top: 3px; font-size: 16px; font-weight: 800">🤖 HuggingArtists Model 🤖</div>
@@ -45,15 +45,15 @@ from datasets import load_dataset
45
  dataset = load_dataset("huggingartists/taylor-swift")
46
  ```
47
 
48
- [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/orx9pgcj/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Taylor Swift's lyrics.
53
 
54
- Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/13d0g1o4) for full transparency and reproducibility.
55
 
56
- At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/13d0g1o4/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
14
  <div class="inline-flex flex-col" style="line-height: 1.5;">
15
  <div class="flex">
16
  <div
17
+ style="display:DISPLAY_1; margin-left: auto; margin-right: auto; width: 92px; height:92px; border-radius: 50%; background-size: cover; background-image: url(&#39;https://images.genius.com/721a6c465a666419bf286b473287c33f.446x446x1.jpg&#39;)">
18
  </div>
19
  </div>
20
  <div style="text-align: center; margin-top: 3px; font-size: 16px; font-weight: 800">🤖 HuggingArtists Model 🤖</div>
45
  dataset = load_dataset("huggingartists/taylor-swift")
46
  ```
47
 
48
+ [Explore the data](https://wandb.ai/huggingartists/huggingartists/runs/11hietbj/artifacts), which is tracked with [W&B artifacts](https://docs.wandb.com/artifacts) at every step of the pipeline.
49
 
50
  ## Training procedure
51
 
52
  The model is based on a pre-trained [GPT-2](https://huggingface.co/gpt2) which is fine-tuned on Taylor Swift's lyrics.
53
 
54
+ Hyperparameters and metrics are recorded in the [W&B training run](https://wandb.ai/huggingartists/huggingartists/runs/jwz5zda0) for full transparency and reproducibility.
55
 
56
+ At the end of training, [the final model](https://wandb.ai/huggingartists/huggingartists/runs/jwz5zda0/artifacts) is logged and versioned.
57
 
58
  ## How to use
59
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "gpt2",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
@@ -36,7 +36,7 @@
36
  }
37
  },
38
  "torch_dtype": "float32",
39
- "transformers_version": "4.12.2",
40
  "use_cache": true,
41
  "vocab_size": 50257
42
  }
1
  {
2
+ "_name_or_path": "taylor-swift",
3
  "activation_function": "gelu_new",
4
  "architectures": [
5
  "GPT2LMHeadModel"
36
  }
37
  },
38
  "torch_dtype": "float32",
39
+ "transformers_version": "4.16.2",
40
  "use_cache": true,
41
  "vocab_size": 50257
42
  }
evaluation.txt CHANGED
@@ -1 +1 @@
1
- {"eval_loss": 2.1195662021636963, "eval_runtime": 2.7951, "eval_samples_per_second": 76.561, "eval_steps_per_second": 9.66, "epoch": 5.0}
1
+ {"eval_loss": 1.4377235174179077, "eval_runtime": 9.5718, "eval_samples_per_second": 20.895, "eval_steps_per_second": 2.612, "epoch": 7.0}
flax_model.msgpack CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:283ca8f9344b855954167e60f2db67ac2afc241037fe870abcdcea59b7262caf
3
  size 497764120
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76ae138578ec229ab199e35e58682cc0818297990880e98b41bb7caca21799bb
3
  size 497764120
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:642b9dfcc4d3d148df04dbd5dd9fc2e06a625d5877f48239cd186fadbd82f0c0
3
  size 995604017
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd96b2ecf6961b37a51c11488af2110f05d27248a9d735d9cec37797c7c45cbe
3
  size 995604017
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:875d52e81b7c5b242a7496cd7be0fe29bfb0475bf80207c43583aeaede1cdaed
3
  size 510403817
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a05cf58e9ce70f6bc6054d004220f5b59634b254e9d1c7c20c77dd1d160dacdc
3
  size 510403817
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8108166cfcb949d51c2438ab25802532e6d155d969ce33575bd2e1f5af1b15a0
3
  size 14503
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0917e5420fc90d80d96ba9647582ee18ea3c8fd35cfb2e8cff174e79d4c678c0
3
  size 14503
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7733e18eff27c9a4da0b58764924ec541924a4be41ac9a645f4ee09be1782f09
3
  size 623
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:696255b08bfcc0a618196cb4d29aaed44996ae47e5ca36d14f60bf60ece9f170
3
  size 623
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
tokenizer_config.json CHANGED
@@ -1 +1 @@
1
- {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "gpt2", "tokenizer_class": "GPT2Tokenizer"}
1
+ {"unk_token": "<|endoftext|>", "bos_token": "<|endoftext|>", "eos_token": "<|endoftext|>", "add_prefix_space": false, "model_max_length": 1024, "special_tokens_map_file": null, "name_or_path": "huggingartists/taylor-swift", "tokenizer_class": "GPT2Tokenizer"}
trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 2.1195662021636963,
3
- "best_model_checkpoint": "output/taylor-swift/checkpoint-790",
4
- "epoch": 5.0,
5
- "global_step": 790,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -994,11 +994,431 @@
994
  "eval_samples_per_second": 76.91,
995
  "eval_steps_per_second": 9.704,
996
  "step": 790
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
997
  }
998
  ],
999
- "max_steps": 790,
1000
- "num_train_epochs": 5,
1001
- "total_flos": 825029591040000.0,
1002
  "trial_name": null,
1003
  "trial_params": null
1004
  }
1
  {
2
+ "best_metric": 1.4377235174179077,
3
+ "best_model_checkpoint": "output/taylor-swift/checkpoint-1120",
4
+ "epoch": 7.0,
5
+ "global_step": 1120,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
994
  "eval_samples_per_second": 76.91,
995
  "eval_steps_per_second": 9.704,
996
  "step": 790
997
+ },
998
+ {
999
+ "epoch": 4.97,
1000
+ "learning_rate": 3.303277502872983e-07,
1001
+ "loss": 1.9264,
1002
+ "step": 795
1003
+ },
1004
+ {
1005
+ "epoch": 5.0,
1006
+ "learning_rate": 0.0,
1007
+ "loss": 1.9314,
1008
+ "step": 800
1009
+ },
1010
+ {
1011
+ "epoch": 5.0,
1012
+ "eval_loss": 1.466734766960144,
1013
+ "eval_runtime": 9.3674,
1014
+ "eval_samples_per_second": 21.351,
1015
+ "eval_steps_per_second": 2.669,
1016
+ "step": 800
1017
+ },
1018
+ {
1019
+ "epoch": 5.03,
1020
+ "learning_rate": 3.303277502872907e-07,
1021
+ "loss": 1.908,
1022
+ "step": 805
1023
+ },
1024
+ {
1025
+ "epoch": 5.06,
1026
+ "learning_rate": 1.3181297643383773e-06,
1027
+ "loss": 1.8545,
1028
+ "step": 810
1029
+ },
1030
+ {
1031
+ "epoch": 5.09,
1032
+ "learning_rate": 2.9538929687704367e-06,
1033
+ "loss": 2.0338,
1034
+ "step": 815
1035
+ },
1036
+ {
1037
+ "epoch": 5.12,
1038
+ "learning_rate": 5.221864069725677e-06,
1039
+ "loss": 1.7718,
1040
+ "step": 820
1041
+ },
1042
+ {
1043
+ "epoch": 5.16,
1044
+ "learning_rate": 8.100201265702767e-06,
1045
+ "loss": 1.8307,
1046
+ "step": 825
1047
+ },
1048
+ {
1049
+ "epoch": 5.19,
1050
+ "learning_rate": 1.1561184596045435e-05,
1051
+ "loss": 1.8088,
1052
+ "step": 830
1053
+ },
1054
+ {
1055
+ "epoch": 5.22,
1056
+ "learning_rate": 1.5571482899316272e-05,
1057
+ "loss": 1.6236,
1058
+ "step": 835
1059
+ },
1060
+ {
1061
+ "epoch": 5.25,
1062
+ "learning_rate": 2.0092474810602853e-05,
1063
+ "loss": 2.1289,
1064
+ "step": 840
1065
+ },
1066
+ {
1067
+ "epoch": 5.28,
1068
+ "learning_rate": 2.5080620706373914e-05,
1069
+ "loss": 1.4544,
1070
+ "step": 845
1071
+ },
1072
+ {
1073
+ "epoch": 5.31,
1074
+ "learning_rate": 3.048788201485526e-05,
1075
+ "loss": 2.0407,
1076
+ "step": 850
1077
+ },
1078
+ {
1079
+ "epoch": 5.34,
1080
+ "learning_rate": 3.6262183853736515e-05,
1081
+ "loss": 1.8147,
1082
+ "step": 855
1083
+ },
1084
+ {
1085
+ "epoch": 5.38,
1086
+ "learning_rate": 4.2347916539754777e-05,
1087
+ "loss": 1.8843,
1088
+ "step": 860
1089
+ },
1090
+ {
1091
+ "epoch": 5.41,
1092
+ "learning_rate": 4.868647114034379e-05,
1093
+ "loss": 1.8188,
1094
+ "step": 865
1095
+ },
1096
+ {
1097
+ "epoch": 5.44,
1098
+ "learning_rate": 5.521680390969348e-05,
1099
+ "loss": 1.8368,
1100
+ "step": 870
1101
+ },
1102
+ {
1103
+ "epoch": 5.47,
1104
+ "learning_rate": 6.1876024173392e-05,
1105
+ "loss": 1.8814,
1106
+ "step": 875
1107
+ },
1108
+ {
1109
+ "epoch": 5.5,
1110
+ "learning_rate": 6.859999999999984e-05,
1111
+ "loss": 1.9456,
1112
+ "step": 880
1113
+ },
1114
+ {
1115
+ "epoch": 5.53,
1116
+ "learning_rate": 7.532397582660791e-05,
1117
+ "loss": 1.6534,
1118
+ "step": 885
1119
+ },
1120
+ {
1121
+ "epoch": 5.56,
1122
+ "learning_rate": 8.198319609030643e-05,
1123
+ "loss": 1.8587,
1124
+ "step": 890
1125
+ },
1126
+ {
1127
+ "epoch": 5.59,
1128
+ "learning_rate": 8.851352885965613e-05,
1129
+ "loss": 2.0249,
1130
+ "step": 895
1131
+ },
1132
+ {
1133
+ "epoch": 5.62,
1134
+ "learning_rate": 9.485208346024515e-05,
1135
+ "loss": 1.9597,
1136
+ "step": 900
1137
+ },
1138
+ {
1139
+ "epoch": 5.66,
1140
+ "learning_rate": 0.00010093781614626339,
1141
+ "loss": 1.499,
1142
+ "step": 905
1143
+ },
1144
+ {
1145
+ "epoch": 5.69,
1146
+ "learning_rate": 0.00010671211798514466,
1147
+ "loss": 1.6634,
1148
+ "step": 910
1149
+ },
1150
+ {
1151
+ "epoch": 5.72,
1152
+ "learning_rate": 0.00011211937929362601,
1153
+ "loss": 1.789,
1154
+ "step": 915
1155
+ },
1156
+ {
1157
+ "epoch": 5.75,
1158
+ "learning_rate": 0.00011710752518939709,
1159
+ "loss": 1.6422,
1160
+ "step": 920
1161
+ },
1162
+ {
1163
+ "epoch": 5.78,
1164
+ "learning_rate": 0.00012162851710068368,
1165
+ "loss": 1.83,
1166
+ "step": 925
1167
+ },
1168
+ {
1169
+ "epoch": 5.81,
1170
+ "learning_rate": 0.00012563881540395453,
1171
+ "loss": 1.8398,
1172
+ "step": 930
1173
+ },
1174
+ {
1175
+ "epoch": 5.84,
1176
+ "learning_rate": 0.0001290997987342972,
1177
+ "loss": 1.8865,
1178
+ "step": 935
1179
+ },
1180
+ {
1181
+ "epoch": 5.88,
1182
+ "learning_rate": 0.0001319781359302743,
1183
+ "loss": 2.0384,
1184
+ "step": 940
1185
+ },
1186
+ {
1187
+ "epoch": 5.91,
1188
+ "learning_rate": 0.00013424610703122953,
1189
+ "loss": 1.8054,
1190
+ "step": 945
1191
+ },
1192
+ {
1193
+ "epoch": 5.94,
1194
+ "learning_rate": 0.0001358818702356616,
1195
+ "loss": 1.6484,
1196
+ "step": 950
1197
+ },
1198
+ {
1199
+ "epoch": 5.97,
1200
+ "learning_rate": 0.0001368696722497127,
1201
+ "loss": 2.0007,
1202
+ "step": 955
1203
+ },
1204
+ {
1205
+ "epoch": 6.0,
1206
+ "learning_rate": 0.0001372,
1207
+ "loss": 1.8572,
1208
+ "step": 960
1209
+ },
1210
+ {
1211
+ "epoch": 6.0,
1212
+ "eval_loss": 1.5031535625457764,
1213
+ "eval_runtime": 9.4961,
1214
+ "eval_samples_per_second": 21.061,
1215
+ "eval_steps_per_second": 2.633,
1216
+ "step": 960
1217
+ },
1218
+ {
1219
+ "epoch": 6.03,
1220
+ "learning_rate": 0.00013686967224971273,
1221
+ "loss": 1.4942,
1222
+ "step": 965
1223
+ },
1224
+ {
1225
+ "epoch": 6.06,
1226
+ "learning_rate": 0.00013588187023566163,
1227
+ "loss": 1.782,
1228
+ "step": 970
1229
+ },
1230
+ {
1231
+ "epoch": 6.09,
1232
+ "learning_rate": 0.00013424610703122958,
1233
+ "loss": 1.8084,
1234
+ "step": 975
1235
+ },
1236
+ {
1237
+ "epoch": 6.12,
1238
+ "learning_rate": 0.00013197813593027432,
1239
+ "loss": 1.7091,
1240
+ "step": 980
1241
+ },
1242
+ {
1243
+ "epoch": 6.16,
1244
+ "learning_rate": 0.00012909979873429724,
1245
+ "loss": 1.8472,
1246
+ "step": 985
1247
+ },
1248
+ {
1249
+ "epoch": 6.19,
1250
+ "learning_rate": 0.00012563881540395458,
1251
+ "loss": 1.7417,
1252
+ "step": 990
1253
+ },
1254
+ {
1255
+ "epoch": 6.22,
1256
+ "learning_rate": 0.00012162851710068373,
1257
+ "loss": 1.6717,
1258
+ "step": 995
1259
+ },
1260
+ {
1261
+ "epoch": 6.25,
1262
+ "learning_rate": 0.00011710752518939715,
1263
+ "loss": 1.6891,
1264
+ "step": 1000
1265
+ },
1266
+ {
1267
+ "epoch": 6.28,
1268
+ "learning_rate": 0.00011211937929362609,
1269
+ "loss": 1.8756,
1270
+ "step": 1005
1271
+ },
1272
+ {
1273
+ "epoch": 6.31,
1274
+ "learning_rate": 0.00010671211798514474,
1275
+ "loss": 1.6079,
1276
+ "step": 1010
1277
+ },
1278
+ {
1279
+ "epoch": 6.34,
1280
+ "learning_rate": 0.00010093781614626349,
1281
+ "loss": 1.7571,
1282
+ "step": 1015
1283
+ },
1284
+ {
1285
+ "epoch": 6.38,
1286
+ "learning_rate": 9.485208346024524e-05,
1287
+ "loss": 1.3666,
1288
+ "step": 1020
1289
+ },
1290
+ {
1291
+ "epoch": 6.41,
1292
+ "learning_rate": 8.851352885965622e-05,
1293
+ "loss": 1.7333,
1294
+ "step": 1025
1295
+ },
1296
+ {
1297
+ "epoch": 6.44,
1298
+ "learning_rate": 8.198319609030653e-05,
1299
+ "loss": 1.4846,
1300
+ "step": 1030
1301
+ },
1302
+ {
1303
+ "epoch": 6.47,
1304
+ "learning_rate": 7.532397582660802e-05,
1305
+ "loss": 1.4929,
1306
+ "step": 1035
1307
+ },
1308
+ {
1309
+ "epoch": 6.5,
1310
+ "learning_rate": 6.859999999999993e-05,
1311
+ "loss": 1.7269,
1312
+ "step": 1040
1313
+ },
1314
+ {
1315
+ "epoch": 6.53,
1316
+ "learning_rate": 6.18760241733921e-05,
1317
+ "loss": 1.5586,
1318
+ "step": 1045
1319
+ },
1320
+ {
1321
+ "epoch": 6.56,
1322
+ "learning_rate": 5.5216803909693576e-05,
1323
+ "loss": 1.7124,
1324
+ "step": 1050
1325
+ },
1326
+ {
1327
+ "epoch": 6.59,
1328
+ "learning_rate": 4.868647114034389e-05,
1329
+ "loss": 1.7847,
1330
+ "step": 1055
1331
+ },
1332
+ {
1333
+ "epoch": 6.62,
1334
+ "learning_rate": 4.2347916539754865e-05,
1335
+ "loss": 1.5791,
1336
+ "step": 1060
1337
+ },
1338
+ {
1339
+ "epoch": 6.66,
1340
+ "learning_rate": 3.62621838537366e-05,
1341
+ "loss": 1.71,
1342
+ "step": 1065
1343
+ },
1344
+ {
1345
+ "epoch": 6.69,
1346
+ "learning_rate": 3.0487882014855342e-05,
1347
+ "loss": 1.6788,
1348
+ "step": 1070
1349
+ },
1350
+ {
1351
+ "epoch": 6.72,
1352
+ "learning_rate": 2.5080620706373995e-05,
1353
+ "loss": 1.6755,
1354
+ "step": 1075
1355
+ },
1356
+ {
1357
+ "epoch": 6.75,
1358
+ "learning_rate": 2.009247481060292e-05,
1359
+ "loss": 1.4464,
1360
+ "step": 1080
1361
+ },
1362
+ {
1363
+ "epoch": 6.78,
1364
+ "learning_rate": 1.5571482899316333e-05,
1365
+ "loss": 1.7238,
1366
+ "step": 1085
1367
+ },
1368
+ {
1369
+ "epoch": 6.81,
1370
+ "learning_rate": 1.1561184596045489e-05,
1371
+ "loss": 1.4748,
1372
+ "step": 1090
1373
+ },
1374
+ {
1375
+ "epoch": 6.84,
1376
+ "learning_rate": 8.100201265702821e-06,
1377
+ "loss": 1.488,
1378
+ "step": 1095
1379
+ },
1380
+ {
1381
+ "epoch": 6.88,
1382
+ "learning_rate": 5.221864069725715e-06,
1383
+ "loss": 1.545,
1384
+ "step": 1100
1385
+ },
1386
+ {
1387
+ "epoch": 6.91,
1388
+ "learning_rate": 2.9538929687704672e-06,
1389
+ "loss": 1.5979,
1390
+ "step": 1105
1391
+ },
1392
+ {
1393
+ "epoch": 6.94,
1394
+ "learning_rate": 1.3181297643383925e-06,
1395
+ "loss": 1.7046,
1396
+ "step": 1110
1397
+ },
1398
+ {
1399
+ "epoch": 6.97,
1400
+ "learning_rate": 3.303277502872983e-07,
1401
+ "loss": 1.5848,
1402
+ "step": 1115
1403
+ },
1404
+ {
1405
+ "epoch": 7.0,
1406
+ "learning_rate": 0.0,
1407
+ "loss": 1.6901,
1408
+ "step": 1120
1409
+ },
1410
+ {
1411
+ "epoch": 7.0,
1412
+ "eval_loss": 1.4377235174179077,
1413
+ "eval_runtime": 9.5035,
1414
+ "eval_samples_per_second": 21.045,
1415
+ "eval_steps_per_second": 2.631,
1416
+ "step": 1120
1417
  }
1418
  ],
1419
+ "max_steps": 1120,
1420
+ "num_train_epochs": 7,
1421
+ "total_flos": 1168759259136000.0,
1422
  "trial_name": null,
1423
  "trial_params": null
1424
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4cfb6e247e098310914adb6ffa99cbac18054688ed991eef0c2acb3531d8319c
3
- size 2863
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ca623b70d31f90284e0c9f8a31f7cb178f04870d48cd416af0fe84dfe57c1e5
3
+ size 3055