Commit
·
b1b112c
1
Parent(s):
f69e02f
Training in progress, epoch 7
Browse files- checkpoint-17112/config.json +39 -0
- checkpoint-17112/generation_config.json +6 -0
- checkpoint-17112/merges.txt +0 -0
- checkpoint-17112/optimizer.pt +3 -0
- checkpoint-17112/pytorch_model.bin +3 -0
- checkpoint-17112/rng_state.pth +3 -0
- checkpoint-17112/scaler.pt +3 -0
- checkpoint-17112/scheduler.pt +3 -0
- checkpoint-17112/special_tokens_map.json +6 -0
- checkpoint-17112/tokenizer.json +0 -0
- checkpoint-17112/tokenizer_config.json +10 -0
- checkpoint-17112/trainer_state.json +100 -0
- checkpoint-17112/training_args.bin +3 -0
- checkpoint-17112/vocab.json +0 -0
- checkpoint-19964/config.json +39 -0
- checkpoint-19964/generation_config.json +6 -0
- checkpoint-19964/merges.txt +0 -0
- checkpoint-19964/optimizer.pt +3 -0
- checkpoint-19964/pytorch_model.bin +3 -0
- checkpoint-19964/rng_state.pth +3 -0
- checkpoint-19964/scaler.pt +3 -0
- checkpoint-19964/scheduler.pt +3 -0
- checkpoint-19964/special_tokens_map.json +6 -0
- checkpoint-19964/tokenizer.json +0 -0
- checkpoint-19964/tokenizer_config.json +10 -0
- checkpoint-19964/trainer_state.json +114 -0
- checkpoint-19964/training_args.bin +3 -0
- checkpoint-19964/vocab.json +0 -0
- pytorch_model.bin +1 -1
- runs/Mar20_15-06-31_jupyter-austincarthy/events.out.tfevents.1679324800.jupyter-austincarthy.84.0 +2 -2
checkpoint-17112/config.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "gpt2",
|
| 3 |
+
"activation_function": "gelu_new",
|
| 4 |
+
"architectures": [
|
| 5 |
+
"GPT2LMHeadModel"
|
| 6 |
+
],
|
| 7 |
+
"attn_pdrop": 0.1,
|
| 8 |
+
"bos_token_id": 50256,
|
| 9 |
+
"embd_pdrop": 0.1,
|
| 10 |
+
"eos_token_id": 50256,
|
| 11 |
+
"initializer_range": 0.02,
|
| 12 |
+
"layer_norm_epsilon": 1e-05,
|
| 13 |
+
"model_type": "gpt2",
|
| 14 |
+
"n_ctx": 1024,
|
| 15 |
+
"n_embd": 768,
|
| 16 |
+
"n_head": 12,
|
| 17 |
+
"n_inner": null,
|
| 18 |
+
"n_layer": 12,
|
| 19 |
+
"n_positions": 256,
|
| 20 |
+
"reorder_and_upcast_attn": false,
|
| 21 |
+
"resid_pdrop": 0.1,
|
| 22 |
+
"scale_attn_by_inverse_layer_idx": false,
|
| 23 |
+
"scale_attn_weights": true,
|
| 24 |
+
"summary_activation": null,
|
| 25 |
+
"summary_first_dropout": 0.1,
|
| 26 |
+
"summary_proj_to_labels": true,
|
| 27 |
+
"summary_type": "cls_index",
|
| 28 |
+
"summary_use_proj": true,
|
| 29 |
+
"task_specific_params": {
|
| 30 |
+
"text-generation": {
|
| 31 |
+
"do_sample": true,
|
| 32 |
+
"max_length": 50
|
| 33 |
+
}
|
| 34 |
+
},
|
| 35 |
+
"torch_dtype": "float32",
|
| 36 |
+
"transformers_version": "4.26.0",
|
| 37 |
+
"use_cache": true,
|
| 38 |
+
"vocab_size": 50257
|
| 39 |
+
}
|
checkpoint-17112/generation_config.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
+
"bos_token_id": 50256,
|
| 4 |
+
"eos_token_id": 50256,
|
| 5 |
+
"transformers_version": "4.26.0"
|
| 6 |
+
}
|
checkpoint-17112/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint-17112/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:0caec4fa0e1a961e8f98741e400f506df1af0cf1f5692a84cd0720621469c39a
|
| 3 |
+
size 990885425
|
checkpoint-17112/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:5fb3579ed568233065d923086e8d19a11065bf8c72f1b96eda66b4d611ac7361
|
| 3 |
+
size 496240745
|
checkpoint-17112/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:70a2787572e97ef94e2129f4028b207217c9327a387f81c3c273b5c6be60eb3e
|
| 3 |
+
size 14503
|
checkpoint-17112/scaler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6973c25ac22fc97fc337b4abf4b0f9748bf1eca031decf31dd6b9c007be9bdb0
|
| 3 |
+
size 559
|
checkpoint-17112/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:6465f50eac26e5d8790bd922669189b1d2b2699858c66d03db7f954e8fbe8ca8
|
| 3 |
+
size 623
|
checkpoint-17112/special_tokens_map.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "<|endoftext|>",
|
| 3 |
+
"eos_token": "<|endoftext|>",
|
| 4 |
+
"pad_token": "<|endoftext|>",
|
| 5 |
+
"unk_token": "<|endoftext|>"
|
| 6 |
+
}
|
checkpoint-17112/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint-17112/tokenizer_config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": false,
|
| 3 |
+
"bos_token": "<|endoftext|>",
|
| 4 |
+
"eos_token": "<|endoftext|>",
|
| 5 |
+
"model_max_length": 1024,
|
| 6 |
+
"name_or_path": "gpt2",
|
| 7 |
+
"special_tokens_map_file": null,
|
| 8 |
+
"tokenizer_class": "GPT2Tokenizer",
|
| 9 |
+
"unk_token": "<|endoftext|>"
|
| 10 |
+
}
|
checkpoint-17112/trainer_state.json
ADDED
|
@@ -0,0 +1,100 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 6.0,
|
| 5 |
+
"global_step": 17112,
|
| 6 |
+
"is_hyper_param_search": false,
|
| 7 |
+
"is_local_process_zero": true,
|
| 8 |
+
"is_world_process_zero": true,
|
| 9 |
+
"log_history": [
|
| 10 |
+
{
|
| 11 |
+
"epoch": 1.0,
|
| 12 |
+
"learning_rate": 0.0004944335719677752,
|
| 13 |
+
"loss": 1.7574,
|
| 14 |
+
"step": 2852
|
| 15 |
+
},
|
| 16 |
+
{
|
| 17 |
+
"epoch": 1.0,
|
| 18 |
+
"eval_loss": 1.1653077602386475,
|
| 19 |
+
"eval_runtime": 5.8814,
|
| 20 |
+
"eval_samples_per_second": 1723.907,
|
| 21 |
+
"eval_steps_per_second": 53.899,
|
| 22 |
+
"step": 2852
|
| 23 |
+
},
|
| 24 |
+
{
|
| 25 |
+
"epoch": 2.0,
|
| 26 |
+
"learning_rate": 0.0004648126812298226,
|
| 27 |
+
"loss": 1.1185,
|
| 28 |
+
"step": 5704
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"epoch": 2.0,
|
| 32 |
+
"eval_loss": 1.0457065105438232,
|
| 33 |
+
"eval_runtime": 12.2627,
|
| 34 |
+
"eval_samples_per_second": 826.814,
|
| 35 |
+
"eval_steps_per_second": 25.851,
|
| 36 |
+
"step": 5704
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"epoch": 3.0,
|
| 40 |
+
"learning_rate": 0.00041264395087890917,
|
| 41 |
+
"loss": 1.0203,
|
| 42 |
+
"step": 8556
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"epoch": 3.0,
|
| 46 |
+
"eval_loss": 1.0074150562286377,
|
| 47 |
+
"eval_runtime": 12.4439,
|
| 48 |
+
"eval_samples_per_second": 814.775,
|
| 49 |
+
"eval_steps_per_second": 25.474,
|
| 50 |
+
"step": 8556
|
| 51 |
+
},
|
| 52 |
+
{
|
| 53 |
+
"epoch": 4.0,
|
| 54 |
+
"learning_rate": 0.0003433987425131291,
|
| 55 |
+
"loss": 0.9556,
|
| 56 |
+
"step": 11408
|
| 57 |
+
},
|
| 58 |
+
{
|
| 59 |
+
"epoch": 4.0,
|
| 60 |
+
"eval_loss": 0.9763176441192627,
|
| 61 |
+
"eval_runtime": 12.3993,
|
| 62 |
+
"eval_samples_per_second": 817.705,
|
| 63 |
+
"eval_steps_per_second": 25.566,
|
| 64 |
+
"step": 11408
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"epoch": 5.0,
|
| 68 |
+
"learning_rate": 0.00026434731900978713,
|
| 69 |
+
"loss": 0.8938,
|
| 70 |
+
"step": 14260
|
| 71 |
+
},
|
| 72 |
+
{
|
| 73 |
+
"epoch": 5.0,
|
| 74 |
+
"eval_loss": 0.9555851221084595,
|
| 75 |
+
"eval_runtime": 11.4331,
|
| 76 |
+
"eval_samples_per_second": 886.808,
|
| 77 |
+
"eval_steps_per_second": 27.726,
|
| 78 |
+
"step": 14260
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"epoch": 6.0,
|
| 82 |
+
"learning_rate": 0.0001837895272934318,
|
| 83 |
+
"loss": 0.8298,
|
| 84 |
+
"step": 17112
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"epoch": 6.0,
|
| 88 |
+
"eval_loss": 0.9401514530181885,
|
| 89 |
+
"eval_runtime": 11.415,
|
| 90 |
+
"eval_samples_per_second": 888.217,
|
| 91 |
+
"eval_steps_per_second": 27.77,
|
| 92 |
+
"step": 17112
|
| 93 |
+
}
|
| 94 |
+
],
|
| 95 |
+
"max_steps": 28520,
|
| 96 |
+
"num_train_epochs": 10,
|
| 97 |
+
"total_flos": 1.7633821487616e+16,
|
| 98 |
+
"trial_name": null,
|
| 99 |
+
"trial_params": null
|
| 100 |
+
}
|
checkpoint-17112/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c8c5a084a5cbd7aa033d2c6e8ff526bb65b17feb5570391cc215fdf455a6fd41
|
| 3 |
+
size 3503
|
checkpoint-17112/vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint-19964/config.json
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_name_or_path": "gpt2",
|
| 3 |
+
"activation_function": "gelu_new",
|
| 4 |
+
"architectures": [
|
| 5 |
+
"GPT2LMHeadModel"
|
| 6 |
+
],
|
| 7 |
+
"attn_pdrop": 0.1,
|
| 8 |
+
"bos_token_id": 50256,
|
| 9 |
+
"embd_pdrop": 0.1,
|
| 10 |
+
"eos_token_id": 50256,
|
| 11 |
+
"initializer_range": 0.02,
|
| 12 |
+
"layer_norm_epsilon": 1e-05,
|
| 13 |
+
"model_type": "gpt2",
|
| 14 |
+
"n_ctx": 1024,
|
| 15 |
+
"n_embd": 768,
|
| 16 |
+
"n_head": 12,
|
| 17 |
+
"n_inner": null,
|
| 18 |
+
"n_layer": 12,
|
| 19 |
+
"n_positions": 256,
|
| 20 |
+
"reorder_and_upcast_attn": false,
|
| 21 |
+
"resid_pdrop": 0.1,
|
| 22 |
+
"scale_attn_by_inverse_layer_idx": false,
|
| 23 |
+
"scale_attn_weights": true,
|
| 24 |
+
"summary_activation": null,
|
| 25 |
+
"summary_first_dropout": 0.1,
|
| 26 |
+
"summary_proj_to_labels": true,
|
| 27 |
+
"summary_type": "cls_index",
|
| 28 |
+
"summary_use_proj": true,
|
| 29 |
+
"task_specific_params": {
|
| 30 |
+
"text-generation": {
|
| 31 |
+
"do_sample": true,
|
| 32 |
+
"max_length": 50
|
| 33 |
+
}
|
| 34 |
+
},
|
| 35 |
+
"torch_dtype": "float32",
|
| 36 |
+
"transformers_version": "4.26.0",
|
| 37 |
+
"use_cache": true,
|
| 38 |
+
"vocab_size": 50257
|
| 39 |
+
}
|
checkpoint-19964/generation_config.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"_from_model_config": true,
|
| 3 |
+
"bos_token_id": 50256,
|
| 4 |
+
"eos_token_id": 50256,
|
| 5 |
+
"transformers_version": "4.26.0"
|
| 6 |
+
}
|
checkpoint-19964/merges.txt
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint-19964/optimizer.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2e6a842d38405dc26283b1b080337b0fe333a12fbabc48b9252a93ad137afe10
|
| 3 |
+
size 990885425
|
checkpoint-19964/pytorch_model.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9bd1633547da651133192bdd8db081bb03dee6d0f62581244b9a2acb5a860c0a
|
| 3 |
+
size 496240745
|
checkpoint-19964/rng_state.pth
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1fb9552244a3079452f0e89d98e66d01731fe487face9c90396090ea670ea924
|
| 3 |
+
size 14503
|
checkpoint-19964/scaler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:e8752d0dc2446b6447c672fbb678b41b3ad25666c66986c3cbd9ff66643e2580
|
| 3 |
+
size 559
|
checkpoint-19964/scheduler.pt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:ca93883c2067ac84ff022b658b2f6124215f6833a6f18cd5c79bb172d6a807b2
|
| 3 |
+
size 623
|
checkpoint-19964/special_tokens_map.json
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"bos_token": "<|endoftext|>",
|
| 3 |
+
"eos_token": "<|endoftext|>",
|
| 4 |
+
"pad_token": "<|endoftext|>",
|
| 5 |
+
"unk_token": "<|endoftext|>"
|
| 6 |
+
}
|
checkpoint-19964/tokenizer.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
checkpoint-19964/tokenizer_config.json
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"add_prefix_space": false,
|
| 3 |
+
"bos_token": "<|endoftext|>",
|
| 4 |
+
"eos_token": "<|endoftext|>",
|
| 5 |
+
"model_max_length": 1024,
|
| 6 |
+
"name_or_path": "gpt2",
|
| 7 |
+
"special_tokens_map_file": null,
|
| 8 |
+
"tokenizer_class": "GPT2Tokenizer",
|
| 9 |
+
"unk_token": "<|endoftext|>"
|
| 10 |
+
}
|
checkpoint-19964/trainer_state.json
ADDED
|
@@ -0,0 +1,114 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"best_metric": null,
|
| 3 |
+
"best_model_checkpoint": null,
|
| 4 |
+
"epoch": 7.0,
|
| 5 |
+
"global_step": 19964,
|
| 6 |
+
"is_hyper_param_search": false,
|
| 7 |
+
"is_local_process_zero": true,
|
| 8 |
+
"is_world_process_zero": true,
|
| 9 |
+
"log_history": [
|
| 10 |
+
{
|
| 11 |
+
"epoch": 1.0,
|
| 12 |
+
"learning_rate": 0.0004944335719677752,
|
| 13 |
+
"loss": 1.7574,
|
| 14 |
+
"step": 2852
|
| 15 |
+
},
|
| 16 |
+
{
|
| 17 |
+
"epoch": 1.0,
|
| 18 |
+
"eval_loss": 1.1653077602386475,
|
| 19 |
+
"eval_runtime": 5.8814,
|
| 20 |
+
"eval_samples_per_second": 1723.907,
|
| 21 |
+
"eval_steps_per_second": 53.899,
|
| 22 |
+
"step": 2852
|
| 23 |
+
},
|
| 24 |
+
{
|
| 25 |
+
"epoch": 2.0,
|
| 26 |
+
"learning_rate": 0.0004648126812298226,
|
| 27 |
+
"loss": 1.1185,
|
| 28 |
+
"step": 5704
|
| 29 |
+
},
|
| 30 |
+
{
|
| 31 |
+
"epoch": 2.0,
|
| 32 |
+
"eval_loss": 1.0457065105438232,
|
| 33 |
+
"eval_runtime": 12.2627,
|
| 34 |
+
"eval_samples_per_second": 826.814,
|
| 35 |
+
"eval_steps_per_second": 25.851,
|
| 36 |
+
"step": 5704
|
| 37 |
+
},
|
| 38 |
+
{
|
| 39 |
+
"epoch": 3.0,
|
| 40 |
+
"learning_rate": 0.00041264395087890917,
|
| 41 |
+
"loss": 1.0203,
|
| 42 |
+
"step": 8556
|
| 43 |
+
},
|
| 44 |
+
{
|
| 45 |
+
"epoch": 3.0,
|
| 46 |
+
"eval_loss": 1.0074150562286377,
|
| 47 |
+
"eval_runtime": 12.4439,
|
| 48 |
+
"eval_samples_per_second": 814.775,
|
| 49 |
+
"eval_steps_per_second": 25.474,
|
| 50 |
+
"step": 8556
|
| 51 |
+
},
|
| 52 |
+
{
|
| 53 |
+
"epoch": 4.0,
|
| 54 |
+
"learning_rate": 0.0003433987425131291,
|
| 55 |
+
"loss": 0.9556,
|
| 56 |
+
"step": 11408
|
| 57 |
+
},
|
| 58 |
+
{
|
| 59 |
+
"epoch": 4.0,
|
| 60 |
+
"eval_loss": 0.9763176441192627,
|
| 61 |
+
"eval_runtime": 12.3993,
|
| 62 |
+
"eval_samples_per_second": 817.705,
|
| 63 |
+
"eval_steps_per_second": 25.566,
|
| 64 |
+
"step": 11408
|
| 65 |
+
},
|
| 66 |
+
{
|
| 67 |
+
"epoch": 5.0,
|
| 68 |
+
"learning_rate": 0.00026434731900978713,
|
| 69 |
+
"loss": 0.8938,
|
| 70 |
+
"step": 14260
|
| 71 |
+
},
|
| 72 |
+
{
|
| 73 |
+
"epoch": 5.0,
|
| 74 |
+
"eval_loss": 0.9555851221084595,
|
| 75 |
+
"eval_runtime": 11.4331,
|
| 76 |
+
"eval_samples_per_second": 886.808,
|
| 77 |
+
"eval_steps_per_second": 27.726,
|
| 78 |
+
"step": 14260
|
| 79 |
+
},
|
| 80 |
+
{
|
| 81 |
+
"epoch": 6.0,
|
| 82 |
+
"learning_rate": 0.0001837895272934318,
|
| 83 |
+
"loss": 0.8298,
|
| 84 |
+
"step": 17112
|
| 85 |
+
},
|
| 86 |
+
{
|
| 87 |
+
"epoch": 6.0,
|
| 88 |
+
"eval_loss": 0.9401514530181885,
|
| 89 |
+
"eval_runtime": 11.415,
|
| 90 |
+
"eval_samples_per_second": 888.217,
|
| 91 |
+
"eval_steps_per_second": 27.77,
|
| 92 |
+
"step": 17112
|
| 93 |
+
},
|
| 94 |
+
{
|
| 95 |
+
"epoch": 7.0,
|
| 96 |
+
"learning_rate": 0.00011018337242196222,
|
| 97 |
+
"loss": 0.7611,
|
| 98 |
+
"step": 19964
|
| 99 |
+
},
|
| 100 |
+
{
|
| 101 |
+
"epoch": 7.0,
|
| 102 |
+
"eval_loss": 0.9359964728355408,
|
| 103 |
+
"eval_runtime": 11.3883,
|
| 104 |
+
"eval_samples_per_second": 890.303,
|
| 105 |
+
"eval_steps_per_second": 27.836,
|
| 106 |
+
"step": 19964
|
| 107 |
+
}
|
| 108 |
+
],
|
| 109 |
+
"max_steps": 28520,
|
| 110 |
+
"num_train_epochs": 10,
|
| 111 |
+
"total_flos": 2.0546404982784e+16,
|
| 112 |
+
"trial_name": null,
|
| 113 |
+
"trial_params": null
|
| 114 |
+
}
|
checkpoint-19964/training_args.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c8c5a084a5cbd7aa033d2c6e8ff526bb65b17feb5570391cc215fdf455a6fd41
|
| 3 |
+
size 3503
|
checkpoint-19964/vocab.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
pytorch_model.bin
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
size 496240745
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:9bd1633547da651133192bdd8db081bb03dee6d0f62581244b9a2acb5a860c0a
|
| 3 |
size 496240745
|
runs/Mar20_15-06-31_jupyter-austincarthy/events.out.tfevents.1679324800.jupyter-austincarthy.84.0
CHANGED
|
@@ -1,3 +1,3 @@
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
-
oid sha256:
|
| 3 |
-
size
|
|
|
|
| 1 |
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:d248d6ea38907b57923894c772370c02937d48e2ac09e394c98b9ac0fe3c6800
|
| 3 |
+
size 7145
|