adding model finetuned on QA (TAR)

Browse files

Files changed (13) hide show

.gitattributes +1 -0
all_results.json +11 -0
config.json +33 -0
eval_nbest_predictions.json +3 -0
eval_predictions.json +0 -0
eval_results.json +6 -0
pytorch_model.bin +3 -0
special_tokens_map.json +1 -0
tokenizer.json +0 -0
tokenizer_config.json +1 -0
train_results.json +8 -0
trainer_state.json +721 -0
training_args.bin +3 -0

.gitattributes CHANGED Viewed

@@ -25,3 +25,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zstandard filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zstandard filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+eval_nbest_predictions.json filter=lfs diff=lfs merge=lfs -text

all_results.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+    "epoch": 4.0,
+    "eval_exact_match": 48.33491012298959,
+    "eval_f1": 66.26645652732091,
+    "eval_samples": 10624,
+    "train_loss": 1.5832800059288115,
+    "train_runtime": 4642.7737,
+    "train_samples": 87747,
+    "train_samples_per_second": 75.599,
+    "train_steps_per_second": 4.726
+}

config.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "_name_or_path": "CenIA/albert_tiny_spanish",
+  "architectures": [
+    "AlbertForQuestionAnswering"
+  ],
+  "attention_probs_dropout_prob": 0,
+  "bos_token_id": 2,
+  "classifier_dropout_prob": 0.1,
+  "down_scale_factor": 1,
+  "embedding_size": 128,
+  "eos_token_id": 3,
+  "gap_size": 0,
+  "hidden_act": "gelu",
+  "hidden_dropout_prob": 0,
+  "hidden_size": 312,
+  "initializer_range": 0.02,
+  "inner_group_num": 1,
+  "intermediate_size": 1248,
+  "layer_norm_eps": 1e-12,
+  "max_position_embeddings": 512,
+  "model_type": "albert",
+  "net_structure_type": 0,
+  "num_attention_heads": 12,
+  "num_hidden_groups": 1,
+  "num_hidden_layers": 4,
+  "num_memory_blocks": 0,
+  "pad_token_id": 0,
+  "position_embedding_type": "absolute",
+  "torch_dtype": "float32",
+  "transformers_version": "4.15.0",
+  "type_vocab_size": 2,
+  "vocab_size": 31000
+}

eval_nbest_predictions.json ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:44fc814520bf527058d9b3242f2ba9452822b7af130cd1d01a257caa73aa4d86
+size 45642296

eval_predictions.json ADDED Viewed

The diff for this file is too large to render. See raw diff

eval_results.json ADDED Viewed

	@@ -0,0 +1,6 @@

+{
+    "epoch": 4.0,
+    "eval_exact_match": 48.33491012298959,
+    "eval_f1": 66.26645652732091,
+    "eval_samples": 10624
+}

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:78e5f4d5d65979fda9fbf3f567f6c704a322b10d53e30b3e8bb698200fc3b72d
+size 21002519

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1 @@


1	+ {"bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "<unk>", "sep_token": "[SEP]", "pad_token": "<pad>", "cls_token": "[CLS]", "mask_token": "[MASK]"}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1 @@

+ {"do_lower_case": true, "remove_space": true, "keep_accents": true, "bos_token": "[CLS]", "eos_token": "[SEP]", "unk_token": "<unk>", "sep_token": "[SEP]", "pad_token": "<pad>", "cls_token": "[CLS]", "mask_token": {"content": "[MASK]", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "__type": "AddedToken"}, "model_max_length": 512, "special_tokens_map_file": null, "name_or_path": "CenIA/albert_tiny_spanish", "tokenizer_class": "AlbertTokenizer"}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 4.0,
+    "train_loss": 1.5832800059288115,
+    "train_runtime": 4642.7737,
+    "train_samples": 87747,
+    "train_samples_per_second": 75.599,
+    "train_steps_per_second": 4.726
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,721 @@

+{
+  "best_metric": 66.26645652732091,
+  "best_model_checkpoint": "/data/jcanete/all_results/tar/albeto_tiny/epochs_4_bs_16_lr_5e-5/checkpoint-10800",
+  "epoch": 4.0,
+  "global_step": 21940,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.05,
+      "eval_exact_match": 25.279091769157993,
+      "eval_f1": 41.71460503665182,
+      "step": 300
+    },
+    {
+      "epoch": 0.09,
+      "learning_rate": 4.8867365542388335e-05,
+      "loss": 3.1064,
+      "step": 500
+    },
+    {
+      "epoch": 0.11,
+      "eval_exact_match": 33.77483443708609,
+      "eval_f1": 51.462032531677195,
+      "step": 600
+    },
+    {
+      "epoch": 0.16,
+      "eval_exact_match": 37.78618732261116,
+      "eval_f1": 55.147295523401176,
+      "step": 900
+    },
+    {
+      "epoch": 0.18,
+      "learning_rate": 4.7727894257064726e-05,
+      "loss": 2.4193,
+      "step": 1000
+    },
+    {
+      "epoch": 0.22,
+      "eval_exact_match": 39.735099337748345,
+      "eval_f1": 57.70943484117446,
+      "step": 1200
+    },
+    {
+      "epoch": 0.27,
+      "learning_rate": 4.6588422971741116e-05,
+      "loss": 2.2494,
+      "step": 1500
+    },
+    {
+      "epoch": 0.27,
+      "eval_exact_match": 40.69063386944182,
+      "eval_f1": 58.161449729726456,
+      "step": 1500
+    },
+    {
+      "epoch": 0.33,
+      "eval_exact_match": 41.72185430463576,
+      "eval_f1": 59.240651392335586,
+      "step": 1800
+    },
+    {
+      "epoch": 0.36,
+      "learning_rate": 4.544895168641751e-05,
+      "loss": 2.1889,
+      "step": 2000
+    },
+    {
+      "epoch": 0.38,
+      "eval_exact_match": 41.627246925260174,
+      "eval_f1": 60.05133975647362,
+      "step": 2100
+    },
+    {
+      "epoch": 0.44,
+      "eval_exact_match": 43.33964049195837,
+      "eval_f1": 61.334205952553624,
+      "step": 2400
+    },
+    {
+      "epoch": 0.46,
+      "learning_rate": 4.431175934366454e-05,
+      "loss": 2.0954,
+      "step": 2500
+    },
+    {
+      "epoch": 0.49,
+      "eval_exact_match": 44.1438032166509,
+      "eval_f1": 61.6109819191811,
+      "step": 2700
+    },
+    {
+      "epoch": 0.55,
+      "learning_rate": 4.317228805834093e-05,
+      "loss": 2.0585,
+      "step": 3000
+    },
+    {
+      "epoch": 0.55,
+      "eval_exact_match": 44.55061494796594,
+      "eval_f1": 62.74097437748746,
+      "step": 3000
+    },
+    {
+      "epoch": 0.6,
+      "eval_exact_match": 45.14664143803217,
+      "eval_f1": 62.716820973684904,
+      "step": 3300
+    },
+    {
+      "epoch": 0.64,
+      "learning_rate": 4.203281677301732e-05,
+      "loss": 2.0252,
+      "step": 3500
+    },
+    {
+      "epoch": 0.66,
+      "eval_exact_match": 45.22232734153264,
+      "eval_f1": 63.111925308146255,
+      "step": 3600
+    },
+    {
+      "epoch": 0.71,
+      "eval_exact_match": 45.13718070009461,
+      "eval_f1": 63.12474940531198,
+      "step": 3900
+    },
+    {
+      "epoch": 0.73,
+      "learning_rate": 4.0893345487693714e-05,
+      "loss": 1.9861,
+      "step": 4000
+    },
+    {
+      "epoch": 0.77,
+      "eval_exact_match": 46.21570482497635,
+      "eval_f1": 64.00817566022319,
+      "step": 4200
+    },
+    {
+      "epoch": 0.82,
+      "learning_rate": 3.975615314494075e-05,
+      "loss": 1.9881,
+      "step": 4500
+    },
+    {
+      "epoch": 0.82,
+      "eval_exact_match": 46.72658467360454,
+      "eval_f1": 64.45739871415881,
+      "step": 4500
+    },
+    {
+      "epoch": 0.88,
+      "eval_exact_match": 46.31031220435194,
+      "eval_f1": 64.03851598846974,
+      "step": 4800
+    },
+    {
+      "epoch": 0.91,
+      "learning_rate": 3.861668185961714e-05,
+      "loss": 1.9619,
+      "step": 5000
+    },
+    {
+      "epoch": 0.93,
+      "eval_exact_match": 46.57521286660359,
+      "eval_f1": 64.71289725300291,
+      "step": 5100
+    },
+    {
+      "epoch": 0.98,
+      "eval_exact_match": 46.80227057710501,
+      "eval_f1": 64.89896140882486,
+      "step": 5400
+    },
+    {
+      "epoch": 1.0,
+      "learning_rate": 3.747721057429353e-05,
+      "loss": 1.8961,
+      "step": 5500
+    },
+    {
+      "epoch": 1.04,
+      "eval_exact_match": 46.868495742667925,
+      "eval_f1": 64.9566553926742,
+      "step": 5700
+    },
+    {
+      "epoch": 1.09,
+      "learning_rate": 3.633773928896992e-05,
+      "loss": 1.6613,
+      "step": 6000
+    },
+    {
+      "epoch": 1.09,
+      "eval_exact_match": 47.05771050141911,
+      "eval_f1": 65.16060187033897,
+      "step": 6000
+    },
+    {
+      "epoch": 1.15,
+      "eval_exact_match": 47.010406811731315,
+      "eval_f1": 64.63527459887219,
+      "step": 6300
+    },
+    {
+      "epoch": 1.19,
+      "learning_rate": 3.520054694621696e-05,
+      "loss": 1.6724,
+      "step": 6500
+    },
+    {
+      "epoch": 1.2,
+      "eval_exact_match": 46.44276253547777,
+      "eval_f1": 64.6485660215026,
+      "step": 6600
+    },
+    {
+      "epoch": 1.26,
+      "eval_exact_match": 47.02932828760643,
+      "eval_f1": 65.25056698142618,
+      "step": 6900
+    },
+    {
+      "epoch": 1.28,
+      "learning_rate": 3.406107566089335e-05,
+      "loss": 1.6622,
+      "step": 7000
+    },
+    {
+      "epoch": 1.31,
+      "eval_exact_match": 47.38883632923368,
+      "eval_f1": 65.14765281416761,
+      "step": 7200
+    },
+    {
+      "epoch": 1.37,
+      "learning_rate": 3.292160437556974e-05,
+      "loss": 1.6765,
+      "step": 7500
+    },
+    {
+      "epoch": 1.37,
+      "eval_exact_match": 47.52128666035951,
+      "eval_f1": 65.23959133286817,
+      "step": 7500
+    },
+    {
+      "epoch": 1.42,
+      "eval_exact_match": 47.24692526017029,
+      "eval_f1": 64.89498224105805,
+      "step": 7800
+    },
+    {
+      "epoch": 1.46,
+      "learning_rate": 3.178213309024613e-05,
+      "loss": 1.6831,
+      "step": 8000
+    },
+    {
+      "epoch": 1.48,
+      "eval_exact_match": 47.42667928098392,
+      "eval_f1": 65.43419870447086,
+      "step": 8100
+    },
+    {
+      "epoch": 1.53,
+      "eval_exact_match": 47.237464522232735,
+      "eval_f1": 65.29424940770944,
+      "step": 8400
+    },
+    {
+      "epoch": 1.55,
+      "learning_rate": 3.0644940747493164e-05,
+      "loss": 1.6652,
+      "step": 8500
+    },
+    {
+      "epoch": 1.59,
+      "eval_exact_match": 47.74834437086093,
+      "eval_f1": 65.62503101399187,
+      "step": 8700
+    },
+    {
+      "epoch": 1.64,
+      "learning_rate": 2.9505469462169554e-05,
+      "loss": 1.6622,
+      "step": 9000
+    },
+    {
+      "epoch": 1.64,
+      "eval_exact_match": 48.04162724692526,
+      "eval_f1": 66.13531062019833,
+      "step": 9000
+    },
+    {
+      "epoch": 1.7,
+      "eval_exact_match": 47.6631977294229,
+      "eval_f1": 65.69495765526571,
+      "step": 9300
+    },
+    {
+      "epoch": 1.73,
+      "learning_rate": 2.836599817684594e-05,
+      "loss": 1.6282,
+      "step": 9500
+    },
+    {
+      "epoch": 1.75,
+      "eval_exact_match": 48.15515610217597,
+      "eval_f1": 65.85406198831797,
+      "step": 9600
+    },
+    {
+      "epoch": 1.8,
+      "eval_exact_match": 48.34437086092715,
+      "eval_f1": 65.82648855586231,
+      "step": 9900
+    },
+    {
+      "epoch": 1.82,
+      "learning_rate": 2.7226526891522335e-05,
+      "loss": 1.6311,
+      "step": 10000
+    },
+    {
+      "epoch": 1.86,
+      "eval_exact_match": 48.17407757805109,
+      "eval_f1": 65.95821129767383,
+      "step": 10200
+    },
+    {
+      "epoch": 1.91,
+      "learning_rate": 2.6089334548769374e-05,
+      "loss": 1.6879,
+      "step": 10500
+    },
+    {
+      "epoch": 1.91,
+      "eval_exact_match": 48.070009460737936,
+      "eval_f1": 65.61674709032437,
+      "step": 10500
+    },
+    {
+      "epoch": 1.97,
+      "eval_exact_match": 48.33491012298959,
+      "eval_f1": 66.26645652732091,
+      "step": 10800
+    },
+    {
+      "epoch": 2.01,
+      "learning_rate": 2.4949863263445765e-05,
+      "loss": 1.6127,
+      "step": 11000
+    },
+    {
+      "epoch": 2.02,
+      "eval_exact_match": 47.776726584673604,
+      "eval_f1": 65.67716714026636,
+      "step": 11100
+    },
+    {
+      "epoch": 2.08,
+      "eval_exact_match": 46.97256385998108,
+      "eval_f1": 65.35488377881,
+      "step": 11400
+    },
+    {
+      "epoch": 2.1,
+      "learning_rate": 2.3810391978122152e-05,
+      "loss": 1.3442,
+      "step": 11500
+    },
+    {
+      "epoch": 2.13,
+      "eval_exact_match": 47.25638599810785,
+      "eval_f1": 65.11588968827414,
+      "step": 11700
+    },
+    {
+      "epoch": 2.19,
+      "learning_rate": 2.2670920692798542e-05,
+      "loss": 1.3419,
+      "step": 12000
+    },
+    {
+      "epoch": 2.19,
+      "eval_exact_match": 47.918637653736994,
+      "eval_f1": 65.76384490998669,
+      "step": 12000
+    },
+    {
+      "epoch": 2.24,
+      "eval_exact_match": 47.71996215704825,
+      "eval_f1": 65.51523648050365,
+      "step": 12300
+    },
+    {
+      "epoch": 2.28,
+      "learning_rate": 2.1533728350045578e-05,
+      "loss": 1.3392,
+      "step": 12500
+    },
+    {
+      "epoch": 2.3,
+      "eval_exact_match": 47.81456953642384,
+      "eval_f1": 65.72328974412012,
+      "step": 12600
+    },
+    {
+      "epoch": 2.35,
+      "eval_exact_match": 46.868495742667925,
+      "eval_f1": 64.95043374963731,
+      "step": 12900
+    },
+    {
+      "epoch": 2.37,
+      "learning_rate": 2.0396536007292617e-05,
+      "loss": 1.3365,
+      "step": 13000
+    },
+    {
+      "epoch": 2.41,
+      "eval_exact_match": 47.47398297067171,
+      "eval_f1": 65.46687084562042,
+      "step": 13200
+    },
+    {
+      "epoch": 2.46,
+      "learning_rate": 1.9257064721969007e-05,
+      "loss": 1.3863,
+      "step": 13500
+    },
+    {
+      "epoch": 2.46,
+      "eval_exact_match": 47.64427625354778,
+      "eval_f1": 65.54344572464974,
+      "step": 13500
+    },
+    {
+      "epoch": 2.52,
+      "eval_exact_match": 47.70104068117313,
+      "eval_f1": 65.29014560945414,
+      "step": 13800
+    },
+    {
+      "epoch": 2.55,
+      "learning_rate": 1.8117593436645398e-05,
+      "loss": 1.364,
+      "step": 14000
+    },
+    {
+      "epoch": 2.57,
+      "eval_exact_match": 47.918637653736994,
+      "eval_f1": 65.66170183373373,
+      "step": 14100
+    },
+    {
+      "epoch": 2.63,
+      "eval_exact_match": 48.33491012298959,
+      "eval_f1": 65.94289128043252,
+      "step": 14400
+    },
+    {
+      "epoch": 2.64,
+      "learning_rate": 1.697812215132179e-05,
+      "loss": 1.3864,
+      "step": 14500
+    },
+    {
+      "epoch": 2.68,
+      "eval_exact_match": 48.24976348155156,
+      "eval_f1": 65.78253411845539,
+      "step": 14700
+    },
+    {
+      "epoch": 2.73,
+      "learning_rate": 1.583865086599818e-05,
+      "loss": 1.3636,
+      "step": 15000
+    },
+    {
+      "epoch": 2.73,
+      "eval_exact_match": 48.438978240302745,
+      "eval_f1": 66.07660779936401,
+      "step": 15000
+    },
+    {
+      "epoch": 2.79,
+      "eval_exact_match": 48.16461684011353,
+      "eval_f1": 66.2498588216324,
+      "step": 15300
+    },
+    {
+      "epoch": 2.83,
+      "learning_rate": 1.469917958067457e-05,
+      "loss": 1.3726,
+      "step": 15500
+    },
+    {
+      "epoch": 2.84,
+      "eval_exact_match": 47.95648060548723,
+      "eval_f1": 65.7260651497257,
+      "step": 15600
+    },
+    {
+      "epoch": 2.9,
+      "eval_exact_match": 47.88079470198676,
+      "eval_f1": 66.01379305937202,
+      "step": 15900
+    },
+    {
+      "epoch": 2.92,
+      "learning_rate": 1.355970829535096e-05,
+      "loss": 1.379,
+      "step": 16000
+    },
+    {
+      "epoch": 2.95,
+      "eval_exact_match": 48.240302743614,
+      "eval_f1": 65.53479147043561,
+      "step": 16200
+    },
+    {
+      "epoch": 3.01,
+      "learning_rate": 1.2422515952597995e-05,
+      "loss": 1.3401,
+      "step": 16500
+    },
+    {
+      "epoch": 3.01,
+      "eval_exact_match": 48.221381267738884,
+      "eval_f1": 65.86023587215384,
+      "step": 16500
+    },
+    {
+      "epoch": 3.06,
+      "eval_exact_match": 47.615894039735096,
+      "eval_f1": 65.12853949917962,
+      "step": 16800
+    },
+    {
+      "epoch": 3.1,
+      "learning_rate": 1.1283044667274386e-05,
+      "loss": 1.1281,
+      "step": 17000
+    },
+    {
+      "epoch": 3.12,
+      "eval_exact_match": 47.12393566698202,
+      "eval_f1": 65.15991467915553,
+      "step": 17100
+    },
+    {
+      "epoch": 3.17,
+      "eval_exact_match": 47.21854304635762,
+      "eval_f1": 65.02752189426162,
+      "step": 17400
+    },
+    {
+      "epoch": 3.19,
+      "learning_rate": 1.0143573381950776e-05,
+      "loss": 1.1161,
+      "step": 17500
+    },
+    {
+      "epoch": 3.23,
+      "eval_exact_match": 47.13339640491959,
+      "eval_f1": 65.2429493800908,
+      "step": 17700
+    },
+    {
+      "epoch": 3.28,
+      "learning_rate": 9.004102096627165e-06,
+      "loss": 1.1337,
+      "step": 18000
+    },
+    {
+      "epoch": 3.28,
+      "eval_exact_match": 47.010406811731315,
+      "eval_f1": 65.03577782296541,
+      "step": 18000
+    },
+    {
+      "epoch": 3.34,
+      "eval_exact_match": 47.379375591296125,
+      "eval_f1": 65.13654689416043,
+      "step": 18300
+    },
+    {
+      "epoch": 3.37,
+      "learning_rate": 7.864630811303556e-06,
+      "loss": 1.1477,
+      "step": 18500
+    },
+    {
+      "epoch": 3.39,
+      "eval_exact_match": 47.35099337748344,
+      "eval_f1": 64.86398318568581,
+      "step": 18600
+    },
+    {
+      "epoch": 3.45,
+      "eval_exact_match": 47.30368968779565,
+      "eval_f1": 65.05463221900013,
+      "step": 18900
+    },
+    {
+      "epoch": 3.46,
+      "learning_rate": 6.725159525979946e-06,
+      "loss": 1.1414,
+      "step": 19000
+    },
+    {
+      "epoch": 3.5,
+      "eval_exact_match": 46.97256385998108,
+      "eval_f1": 65.11680930447929,
+      "step": 19200
+    },
+    {
+      "epoch": 3.56,
+      "learning_rate": 5.587967183226983e-06,
+      "loss": 1.1454,
+      "step": 19500
+    },
+    {
+      "epoch": 3.56,
+      "eval_exact_match": 47.13339640491959,
+      "eval_f1": 65.165125839751,
+      "step": 19500
+    },
+    {
+      "epoch": 3.61,
+      "eval_exact_match": 47.20908230842006,
+      "eval_f1": 65.09005212297234,
+      "step": 19800
+    },
+    {
+      "epoch": 3.65,
+      "learning_rate": 4.448495897903373e-06,
+      "loss": 1.1318,
+      "step": 20000
+    },
+    {
+      "epoch": 3.66,
+      "eval_exact_match": 46.95364238410596,
+      "eval_f1": 65.02854138983005,
+      "step": 20100
+    },
+    {
+      "epoch": 3.72,
+      "eval_exact_match": 46.773888363292336,
+      "eval_f1": 64.98586781544525,
+      "step": 20400
+    },
+    {
+      "epoch": 3.74,
+      "learning_rate": 3.3090246125797635e-06,
+      "loss": 1.124,
+      "step": 20500
+    },
+    {
+      "epoch": 3.77,
+      "eval_exact_match": 46.76442762535478,
+      "eval_f1": 65.02603195736303,
+      "step": 20700
+    },
+    {
+      "epoch": 3.83,
+      "learning_rate": 2.169553327256153e-06,
+      "loss": 1.1301,
+      "step": 21000
+    },
+    {
+      "epoch": 3.83,
+      "eval_exact_match": 46.88741721854305,
+      "eval_f1": 65.03187807653661,
+      "step": 21000
+    },
+    {
+      "epoch": 3.88,
+      "eval_exact_match": 47.086092715231786,
+      "eval_f1": 65.13748531716237,
+      "step": 21300
+    },
+    {
+      "epoch": 3.92,
+      "learning_rate": 1.0323609845031905e-06,
+      "loss": 1.1298,
+      "step": 21500
+    },
+    {
+      "epoch": 3.94,
+      "eval_exact_match": 47.095553453169344,
+      "eval_f1": 65.03629129514118,
+      "step": 21600
+    },
+    {
+      "epoch": 3.99,
+      "eval_exact_match": 47.095553453169344,
+      "eval_f1": 65.08861259333764,
+      "step": 21900
+    },
+    {
+      "epoch": 4.0,
+      "step": 21940,
+      "total_flos": 859515753161088.0,
+      "train_loss": 1.5832800059288115,
+      "train_runtime": 4642.7737,
+      "train_samples_per_second": 75.599,
+      "train_steps_per_second": 4.726
+    }
+  ],
+  "max_steps": 21940,
+  "num_train_epochs": 4,
+  "total_flos": 859515753161088.0,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:982e4931a79301177c8bf323ef95407c0eedc1337333ab618067287f62a88e26
+size 2991