kpriyanshu256
commited on
Commit
•
ff53177
1
Parent(s):
ac0c95e
Upload folder using huggingface_hub
Browse files- checkpoint-2400/config.json +33 -0
- checkpoint-2400/merges.txt +0 -0
- checkpoint-2400/optimizer.pt +3 -0
- checkpoint-2400/pytorch_model.bin +3 -0
- checkpoint-2400/rng_state.pth +3 -0
- checkpoint-2400/scaler.pt +3 -0
- checkpoint-2400/scheduler.pt +3 -0
- checkpoint-2400/special_tokens_map.json +1 -0
- checkpoint-2400/tokenizer.json +0 -0
- checkpoint-2400/tokenizer_config.json +1 -0
- checkpoint-2400/trainer_state.json +136 -0
- checkpoint-2400/training_args.bin +3 -0
- checkpoint-2400/vocab.json +0 -0
- checkpoint-3000/config.json +33 -0
- checkpoint-3000/merges.txt +0 -0
- checkpoint-3000/optimizer.pt +3 -0
- checkpoint-3000/pytorch_model.bin +3 -0
- checkpoint-3000/rng_state.pth +3 -0
- checkpoint-3000/scaler.pt +3 -0
- checkpoint-3000/scheduler.pt +3 -0
- checkpoint-3000/special_tokens_map.json +1 -0
- checkpoint-3000/tokenizer.json +0 -0
- checkpoint-3000/tokenizer_config.json +1 -0
- checkpoint-3000/trainer_state.json +166 -0
- checkpoint-3000/training_args.bin +3 -0
- checkpoint-3000/vocab.json +0 -0
- config.json +33 -0
- merges.txt +0 -0
- pytorch_model.bin +3 -0
- runs/Apr06_17-46-43_babel-3-7/1712440009.733516/events.out.tfevents.1712440009.babel-3-7 +3 -0
- runs/Apr06_17-46-43_babel-3-7/events.out.tfevents.1712440009.babel-3-7 +3 -0
- runs/Apr06_18-01-45_babel-8-7/1712440910.5851839/events.out.tfevents.1712440910.babel-8-7 +3 -0
- runs/Apr06_18-01-45_babel-8-7/events.out.tfevents.1712440910.babel-8-7 +3 -0
- runs/Apr06_18-01-45_babel-8-7/events.out.tfevents.1712609521.babel-8-7 +3 -0
- special_tokens_map.json +1 -0
- tokenizer.json +0 -0
- tokenizer_config.json +1 -0
- trainer_state.json +175 -0
- training_args.bin +3 -0
- vocab.json +0 -0
checkpoint-2400/config.json
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "Salesforce/codet5p-220m",
|
3 |
+
"architectures": [
|
4 |
+
"T5ForConditionalGeneration"
|
5 |
+
],
|
6 |
+
"bos_token_id": 1,
|
7 |
+
"d_ff": 3072,
|
8 |
+
"d_kv": 64,
|
9 |
+
"d_model": 768,
|
10 |
+
"decoder_start_token_id": 0,
|
11 |
+
"dense_act_fn": "relu",
|
12 |
+
"dropout_rate": 0.1,
|
13 |
+
"eos_token_id": 2,
|
14 |
+
"feed_forward_proj": "relu",
|
15 |
+
"initializer_factor": 1.0,
|
16 |
+
"is_encoder_decoder": true,
|
17 |
+
"is_gated_act": false,
|
18 |
+
"layer_norm_epsilon": 1e-06,
|
19 |
+
"max_length": 1024,
|
20 |
+
"model_type": "t5",
|
21 |
+
"n_positions": 512,
|
22 |
+
"num_decoder_layers": 12,
|
23 |
+
"num_heads": 12,
|
24 |
+
"num_layers": 12,
|
25 |
+
"output_past": true,
|
26 |
+
"pad_token_id": 0,
|
27 |
+
"relative_attention_max_distance": 128,
|
28 |
+
"relative_attention_num_buckets": 32,
|
29 |
+
"torch_dtype": "float32",
|
30 |
+
"transformers_version": "4.17.0",
|
31 |
+
"use_cache": true,
|
32 |
+
"vocab_size": 32100
|
33 |
+
}
|
checkpoint-2400/merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-2400/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a07de3d096c126efcc7a124528958d8881f5ffe9b7a42c3885bc353e0bea3eba
|
3 |
+
size 1783209146
|
checkpoint-2400/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a87454e3aa158c8448067089cf62318d406cdf9bc61b2df9eee7043db89e980d
|
3 |
+
size 891647438
|
checkpoint-2400/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6ea8abde04241a68e49a8167022aa9b83cee1f8ce6494fc8508d03a5520b276b
|
3 |
+
size 15006
|
checkpoint-2400/scaler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:900f26e6b66c9eae5e85a376345ba03c270e502825dc9e8a4ca0b1cd6fc50bd2
|
3 |
+
size 988
|
checkpoint-2400/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e9e4c100eb9966109a455ef09e6ff0c3b05e7f31ab122aab309b9c64eba67bc1
|
3 |
+
size 1064
|
checkpoint-2400/special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, "additional_special_tokens": [{"content": "<extra_id_99>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_98>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_97>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_96>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_95>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_94>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_93>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_92>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_91>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_90>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_89>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_88>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_87>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_86>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_85>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_84>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_83>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_82>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_81>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_80>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_79>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_78>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_77>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_76>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_75>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_74>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_73>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_72>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_71>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_70>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_69>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_68>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_67>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_66>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_65>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_64>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_63>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_62>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_61>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_60>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_59>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_58>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_57>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_56>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_55>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_54>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_53>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_52>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_51>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_50>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_49>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_48>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_47>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_46>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_45>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_44>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_43>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_42>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_41>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_40>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_39>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_38>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_37>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_36>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_35>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_34>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_33>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_32>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_31>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_30>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_29>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_28>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_27>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_26>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_25>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_24>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_23>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_22>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_21>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_20>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_19>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_18>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_17>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_16>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_15>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_14>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_13>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_12>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_11>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_10>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_9>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_8>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_7>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_6>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_5>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_4>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_3>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_2>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_1>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_0>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}]}
|
checkpoint-2400/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-2400/tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"errors": "replace", "bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "add_prefix_space": false, "trim_offsets": true, "model_max_length": 512, "special_tokens_map_file": "/data/datasets/hf_cache/transformers/f432e4eb4a7dfc04b533beea5590e11f4b46c86f5630e8a032704ef76d7269c1.b9905d0575bde443a20834122b6e2d48e853b2e36444ce98ddeb43c38097eb3f", "name_or_path": "Salesforce/codet5p-220m", "tokenizer_class": "RobertaTokenizer"}
|
checkpoint-2400/trainer_state.json
ADDED
@@ -0,0 +1,136 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 96.7742,
|
3 |
+
"best_model_checkpoint": "/data/tir/projects/tir7/user_data/priyansk/qa_tapex_e2_codet5p-220m_latex/checkpoint-2400",
|
4 |
+
"epoch": 1.4887484736301437,
|
5 |
+
"global_step": 2400,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 0.19,
|
12 |
+
"learning_rate": 9.070409429280397e-05,
|
13 |
+
"loss": 1.8932,
|
14 |
+
"step": 300
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"epoch": 0.19,
|
18 |
+
"eval_exact_match": 69.7581,
|
19 |
+
"eval_loss": 0.006803931202739477,
|
20 |
+
"eval_runtime": 182.7837,
|
21 |
+
"eval_samples_per_second": 2.735,
|
22 |
+
"eval_steps_per_second": 0.175,
|
23 |
+
"step": 300
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.37,
|
27 |
+
"learning_rate": 8.140818858560795e-05,
|
28 |
+
"loss": 0.0034,
|
29 |
+
"step": 600
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"epoch": 0.37,
|
33 |
+
"eval_exact_match": 86.4919,
|
34 |
+
"eval_loss": 0.0015783495036885142,
|
35 |
+
"eval_runtime": 139.3342,
|
36 |
+
"eval_samples_per_second": 3.588,
|
37 |
+
"eval_steps_per_second": 0.23,
|
38 |
+
"step": 600
|
39 |
+
},
|
40 |
+
{
|
41 |
+
"epoch": 0.56,
|
42 |
+
"learning_rate": 7.211228287841191e-05,
|
43 |
+
"loss": 0.0012,
|
44 |
+
"step": 900
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.56,
|
48 |
+
"eval_exact_match": 92.5403,
|
49 |
+
"eval_loss": 0.00091337546473369,
|
50 |
+
"eval_runtime": 129.2451,
|
51 |
+
"eval_samples_per_second": 3.869,
|
52 |
+
"eval_steps_per_second": 0.248,
|
53 |
+
"step": 900
|
54 |
+
},
|
55 |
+
{
|
56 |
+
"epoch": 0.74,
|
57 |
+
"learning_rate": 6.28163771712159e-05,
|
58 |
+
"loss": 0.0008,
|
59 |
+
"step": 1200
|
60 |
+
},
|
61 |
+
{
|
62 |
+
"epoch": 0.74,
|
63 |
+
"eval_exact_match": 94.3548,
|
64 |
+
"eval_loss": 0.0006592237623408437,
|
65 |
+
"eval_runtime": 130.2486,
|
66 |
+
"eval_samples_per_second": 3.839,
|
67 |
+
"eval_steps_per_second": 0.246,
|
68 |
+
"step": 1200
|
69 |
+
},
|
70 |
+
{
|
71 |
+
"epoch": 0.93,
|
72 |
+
"learning_rate": 5.352047146401985e-05,
|
73 |
+
"loss": 0.0006,
|
74 |
+
"step": 1500
|
75 |
+
},
|
76 |
+
{
|
77 |
+
"epoch": 0.93,
|
78 |
+
"eval_exact_match": 94.5565,
|
79 |
+
"eval_loss": 0.0005109157646074891,
|
80 |
+
"eval_runtime": 127.9056,
|
81 |
+
"eval_samples_per_second": 3.909,
|
82 |
+
"eval_steps_per_second": 0.25,
|
83 |
+
"step": 1500
|
84 |
+
},
|
85 |
+
{
|
86 |
+
"epoch": 1.12,
|
87 |
+
"learning_rate": 4.422456575682382e-05,
|
88 |
+
"loss": 0.0004,
|
89 |
+
"step": 1800
|
90 |
+
},
|
91 |
+
{
|
92 |
+
"epoch": 1.12,
|
93 |
+
"eval_exact_match": 95.3629,
|
94 |
+
"eval_loss": 0.0004209030594211072,
|
95 |
+
"eval_runtime": 129.0896,
|
96 |
+
"eval_samples_per_second": 3.873,
|
97 |
+
"eval_steps_per_second": 0.248,
|
98 |
+
"step": 1800
|
99 |
+
},
|
100 |
+
{
|
101 |
+
"epoch": 1.3,
|
102 |
+
"learning_rate": 3.4928660049627796e-05,
|
103 |
+
"loss": 0.0004,
|
104 |
+
"step": 2100
|
105 |
+
},
|
106 |
+
{
|
107 |
+
"epoch": 1.3,
|
108 |
+
"eval_exact_match": 95.1613,
|
109 |
+
"eval_loss": 0.00042048218892887235,
|
110 |
+
"eval_runtime": 129.0282,
|
111 |
+
"eval_samples_per_second": 3.875,
|
112 |
+
"eval_steps_per_second": 0.248,
|
113 |
+
"step": 2100
|
114 |
+
},
|
115 |
+
{
|
116 |
+
"epoch": 1.49,
|
117 |
+
"learning_rate": 2.563275434243176e-05,
|
118 |
+
"loss": 0.0003,
|
119 |
+
"step": 2400
|
120 |
+
},
|
121 |
+
{
|
122 |
+
"epoch": 1.49,
|
123 |
+
"eval_exact_match": 96.7742,
|
124 |
+
"eval_loss": 0.0003429962380323559,
|
125 |
+
"eval_runtime": 127.2593,
|
126 |
+
"eval_samples_per_second": 3.929,
|
127 |
+
"eval_steps_per_second": 0.251,
|
128 |
+
"step": 2400
|
129 |
+
}
|
130 |
+
],
|
131 |
+
"max_steps": 3224,
|
132 |
+
"num_train_epochs": 2,
|
133 |
+
"total_flos": 2.9935005834254746e+18,
|
134 |
+
"trial_name": null,
|
135 |
+
"trial_params": null
|
136 |
+
}
|
checkpoint-2400/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed5d69df2f4458e091aa9fd5be576a7a7349e33ac70704441e5d3b16cf246325
|
3 |
+
size 3704
|
checkpoint-2400/vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-3000/config.json
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "Salesforce/codet5p-220m",
|
3 |
+
"architectures": [
|
4 |
+
"T5ForConditionalGeneration"
|
5 |
+
],
|
6 |
+
"bos_token_id": 1,
|
7 |
+
"d_ff": 3072,
|
8 |
+
"d_kv": 64,
|
9 |
+
"d_model": 768,
|
10 |
+
"decoder_start_token_id": 0,
|
11 |
+
"dense_act_fn": "relu",
|
12 |
+
"dropout_rate": 0.1,
|
13 |
+
"eos_token_id": 2,
|
14 |
+
"feed_forward_proj": "relu",
|
15 |
+
"initializer_factor": 1.0,
|
16 |
+
"is_encoder_decoder": true,
|
17 |
+
"is_gated_act": false,
|
18 |
+
"layer_norm_epsilon": 1e-06,
|
19 |
+
"max_length": 1024,
|
20 |
+
"model_type": "t5",
|
21 |
+
"n_positions": 512,
|
22 |
+
"num_decoder_layers": 12,
|
23 |
+
"num_heads": 12,
|
24 |
+
"num_layers": 12,
|
25 |
+
"output_past": true,
|
26 |
+
"pad_token_id": 0,
|
27 |
+
"relative_attention_max_distance": 128,
|
28 |
+
"relative_attention_num_buckets": 32,
|
29 |
+
"torch_dtype": "float32",
|
30 |
+
"transformers_version": "4.17.0",
|
31 |
+
"use_cache": true,
|
32 |
+
"vocab_size": 32100
|
33 |
+
}
|
checkpoint-3000/merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-3000/optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b907bddb688109e99a2603d097ccfcf94f8c74d5ff6aee327877c03e2e0a7f42
|
3 |
+
size 1783209146
|
checkpoint-3000/pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2c74a0109069162babb6e8f4ee3abccf74dcf29756a2f98881c6d56a14db2783
|
3 |
+
size 891647438
|
checkpoint-3000/rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:49ab7ff4aa151c2e257a826519f59a4cf194464bf39e8c8feb460471c519e854
|
3 |
+
size 15006
|
checkpoint-3000/scaler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:21aba8ed0f38ed1c04994c10a9ca7e9925e55ef2ed51283c43ff8e2cce78585f
|
3 |
+
size 988
|
checkpoint-3000/scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:33184e4b01e31b6911f5275835e383836a36f8349f9d3b2f4dc1ce2520b4d6ae
|
3 |
+
size 1064
|
checkpoint-3000/special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, "additional_special_tokens": [{"content": "<extra_id_99>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_98>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_97>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_96>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_95>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_94>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_93>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_92>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_91>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_90>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_89>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_88>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_87>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_86>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_85>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_84>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_83>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_82>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_81>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_80>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_79>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_78>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_77>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_76>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_75>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_74>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_73>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_72>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_71>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_70>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_69>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_68>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_67>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_66>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_65>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_64>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_63>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_62>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_61>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_60>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_59>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_58>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_57>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_56>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_55>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_54>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_53>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_52>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_51>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_50>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_49>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_48>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_47>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_46>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_45>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_44>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_43>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_42>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_41>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_40>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_39>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_38>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_37>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_36>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_35>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_34>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_33>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_32>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_31>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_30>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_29>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_28>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_27>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_26>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_25>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_24>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_23>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_22>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_21>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_20>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_19>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_18>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_17>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_16>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_15>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_14>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_13>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_12>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_11>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_10>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_9>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_8>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_7>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_6>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_5>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_4>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_3>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_2>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_1>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_0>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}]}
|
checkpoint-3000/tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
checkpoint-3000/tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"errors": "replace", "bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "add_prefix_space": false, "trim_offsets": true, "model_max_length": 512, "special_tokens_map_file": "/data/datasets/hf_cache/transformers/f432e4eb4a7dfc04b533beea5590e11f4b46c86f5630e8a032704ef76d7269c1.b9905d0575bde443a20834122b6e2d48e853b2e36444ce98ddeb43c38097eb3f", "name_or_path": "Salesforce/codet5p-220m", "tokenizer_class": "RobertaTokenizer"}
|
checkpoint-3000/trainer_state.json
ADDED
@@ -0,0 +1,166 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 96.7742,
|
3 |
+
"best_model_checkpoint": "/data/tir/projects/tir7/user_data/priyansk/qa_tapex_e2_codet5p-220m_latex/checkpoint-2400",
|
4 |
+
"epoch": 1.8608919814703544,
|
5 |
+
"global_step": 3000,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 0.19,
|
12 |
+
"learning_rate": 9.070409429280397e-05,
|
13 |
+
"loss": 1.8932,
|
14 |
+
"step": 300
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"epoch": 0.19,
|
18 |
+
"eval_exact_match": 69.7581,
|
19 |
+
"eval_loss": 0.006803931202739477,
|
20 |
+
"eval_runtime": 182.7837,
|
21 |
+
"eval_samples_per_second": 2.735,
|
22 |
+
"eval_steps_per_second": 0.175,
|
23 |
+
"step": 300
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.37,
|
27 |
+
"learning_rate": 8.140818858560795e-05,
|
28 |
+
"loss": 0.0034,
|
29 |
+
"step": 600
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"epoch": 0.37,
|
33 |
+
"eval_exact_match": 86.4919,
|
34 |
+
"eval_loss": 0.0015783495036885142,
|
35 |
+
"eval_runtime": 139.3342,
|
36 |
+
"eval_samples_per_second": 3.588,
|
37 |
+
"eval_steps_per_second": 0.23,
|
38 |
+
"step": 600
|
39 |
+
},
|
40 |
+
{
|
41 |
+
"epoch": 0.56,
|
42 |
+
"learning_rate": 7.211228287841191e-05,
|
43 |
+
"loss": 0.0012,
|
44 |
+
"step": 900
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.56,
|
48 |
+
"eval_exact_match": 92.5403,
|
49 |
+
"eval_loss": 0.00091337546473369,
|
50 |
+
"eval_runtime": 129.2451,
|
51 |
+
"eval_samples_per_second": 3.869,
|
52 |
+
"eval_steps_per_second": 0.248,
|
53 |
+
"step": 900
|
54 |
+
},
|
55 |
+
{
|
56 |
+
"epoch": 0.74,
|
57 |
+
"learning_rate": 6.28163771712159e-05,
|
58 |
+
"loss": 0.0008,
|
59 |
+
"step": 1200
|
60 |
+
},
|
61 |
+
{
|
62 |
+
"epoch": 0.74,
|
63 |
+
"eval_exact_match": 94.3548,
|
64 |
+
"eval_loss": 0.0006592237623408437,
|
65 |
+
"eval_runtime": 130.2486,
|
66 |
+
"eval_samples_per_second": 3.839,
|
67 |
+
"eval_steps_per_second": 0.246,
|
68 |
+
"step": 1200
|
69 |
+
},
|
70 |
+
{
|
71 |
+
"epoch": 0.93,
|
72 |
+
"learning_rate": 5.352047146401985e-05,
|
73 |
+
"loss": 0.0006,
|
74 |
+
"step": 1500
|
75 |
+
},
|
76 |
+
{
|
77 |
+
"epoch": 0.93,
|
78 |
+
"eval_exact_match": 94.5565,
|
79 |
+
"eval_loss": 0.0005109157646074891,
|
80 |
+
"eval_runtime": 127.9056,
|
81 |
+
"eval_samples_per_second": 3.909,
|
82 |
+
"eval_steps_per_second": 0.25,
|
83 |
+
"step": 1500
|
84 |
+
},
|
85 |
+
{
|
86 |
+
"epoch": 1.12,
|
87 |
+
"learning_rate": 4.422456575682382e-05,
|
88 |
+
"loss": 0.0004,
|
89 |
+
"step": 1800
|
90 |
+
},
|
91 |
+
{
|
92 |
+
"epoch": 1.12,
|
93 |
+
"eval_exact_match": 95.3629,
|
94 |
+
"eval_loss": 0.0004209030594211072,
|
95 |
+
"eval_runtime": 129.0896,
|
96 |
+
"eval_samples_per_second": 3.873,
|
97 |
+
"eval_steps_per_second": 0.248,
|
98 |
+
"step": 1800
|
99 |
+
},
|
100 |
+
{
|
101 |
+
"epoch": 1.3,
|
102 |
+
"learning_rate": 3.4928660049627796e-05,
|
103 |
+
"loss": 0.0004,
|
104 |
+
"step": 2100
|
105 |
+
},
|
106 |
+
{
|
107 |
+
"epoch": 1.3,
|
108 |
+
"eval_exact_match": 95.1613,
|
109 |
+
"eval_loss": 0.00042048218892887235,
|
110 |
+
"eval_runtime": 129.0282,
|
111 |
+
"eval_samples_per_second": 3.875,
|
112 |
+
"eval_steps_per_second": 0.248,
|
113 |
+
"step": 2100
|
114 |
+
},
|
115 |
+
{
|
116 |
+
"epoch": 1.49,
|
117 |
+
"learning_rate": 2.563275434243176e-05,
|
118 |
+
"loss": 0.0003,
|
119 |
+
"step": 2400
|
120 |
+
},
|
121 |
+
{
|
122 |
+
"epoch": 1.49,
|
123 |
+
"eval_exact_match": 96.7742,
|
124 |
+
"eval_loss": 0.0003429962380323559,
|
125 |
+
"eval_runtime": 127.2593,
|
126 |
+
"eval_samples_per_second": 3.929,
|
127 |
+
"eval_steps_per_second": 0.251,
|
128 |
+
"step": 2400
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.67,
|
132 |
+
"learning_rate": 1.6336848635235734e-05,
|
133 |
+
"loss": 0.0003,
|
134 |
+
"step": 2700
|
135 |
+
},
|
136 |
+
{
|
137 |
+
"epoch": 1.67,
|
138 |
+
"eval_exact_match": 96.7742,
|
139 |
+
"eval_loss": 0.00032032810850068927,
|
140 |
+
"eval_runtime": 126.2849,
|
141 |
+
"eval_samples_per_second": 3.959,
|
142 |
+
"eval_steps_per_second": 0.253,
|
143 |
+
"step": 2700
|
144 |
+
},
|
145 |
+
{
|
146 |
+
"epoch": 1.86,
|
147 |
+
"learning_rate": 7.040942928039701e-06,
|
148 |
+
"loss": 0.0003,
|
149 |
+
"step": 3000
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 1.86,
|
153 |
+
"eval_exact_match": 96.371,
|
154 |
+
"eval_loss": 0.0003172786091454327,
|
155 |
+
"eval_runtime": 128.5411,
|
156 |
+
"eval_samples_per_second": 3.89,
|
157 |
+
"eval_steps_per_second": 0.249,
|
158 |
+
"step": 3000
|
159 |
+
}
|
160 |
+
],
|
161 |
+
"max_steps": 3224,
|
162 |
+
"num_train_epochs": 2,
|
163 |
+
"total_flos": 3.7417880393456026e+18,
|
164 |
+
"trial_name": null,
|
165 |
+
"trial_params": null
|
166 |
+
}
|
checkpoint-3000/training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed5d69df2f4458e091aa9fd5be576a7a7349e33ac70704441e5d3b16cf246325
|
3 |
+
size 3704
|
checkpoint-3000/vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
config.json
ADDED
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "Salesforce/codet5p-220m",
|
3 |
+
"architectures": [
|
4 |
+
"T5ForConditionalGeneration"
|
5 |
+
],
|
6 |
+
"bos_token_id": 1,
|
7 |
+
"d_ff": 3072,
|
8 |
+
"d_kv": 64,
|
9 |
+
"d_model": 768,
|
10 |
+
"decoder_start_token_id": 0,
|
11 |
+
"dense_act_fn": "relu",
|
12 |
+
"dropout_rate": 0.1,
|
13 |
+
"eos_token_id": 2,
|
14 |
+
"feed_forward_proj": "relu",
|
15 |
+
"initializer_factor": 1.0,
|
16 |
+
"is_encoder_decoder": true,
|
17 |
+
"is_gated_act": false,
|
18 |
+
"layer_norm_epsilon": 1e-06,
|
19 |
+
"max_length": 1024,
|
20 |
+
"model_type": "t5",
|
21 |
+
"n_positions": 512,
|
22 |
+
"num_decoder_layers": 12,
|
23 |
+
"num_heads": 12,
|
24 |
+
"num_layers": 12,
|
25 |
+
"output_past": true,
|
26 |
+
"pad_token_id": 0,
|
27 |
+
"relative_attention_max_distance": 128,
|
28 |
+
"relative_attention_num_buckets": 32,
|
29 |
+
"torch_dtype": "float32",
|
30 |
+
"transformers_version": "4.17.0",
|
31 |
+
"use_cache": true,
|
32 |
+
"vocab_size": 32100
|
33 |
+
}
|
merges.txt
ADDED
The diff for this file is too large to render.
See raw diff
|
|
pytorch_model.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a87454e3aa158c8448067089cf62318d406cdf9bc61b2df9eee7043db89e980d
|
3 |
+
size 891647438
|
runs/Apr06_17-46-43_babel-3-7/1712440009.733516/events.out.tfevents.1712440009.babel-3-7
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6b5eac974e2a790c10311e084e85da4201106f56fc7e7fc3f44013bb39af6cc3
|
3 |
+
size 4975
|
runs/Apr06_17-46-43_babel-3-7/events.out.tfevents.1712440009.babel-3-7
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9cee1b29ff8aa600239b18410805959047a70d9ddac9a726506f68d6c8fed8da
|
3 |
+
size 3740
|
runs/Apr06_18-01-45_babel-8-7/1712440910.5851839/events.out.tfevents.1712440910.babel-8-7
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e2aa99ed2986f5d55e50469c2991c10486f36cac5c2aac9b58ed30e85408c5c0
|
3 |
+
size 4975
|
runs/Apr06_18-01-45_babel-8-7/events.out.tfevents.1712440910.babel-8-7
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bb7c0be6f56e01ef2bf7edc3dd1f6f9b0e389e589d144f3477cab1ef4aa8bc7e
|
3 |
+
size 8924
|
runs/Apr06_18-01-45_babel-8-7/events.out.tfevents.1712609521.babel-8-7
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:056cbcead4cb7ec2fdd4b5e3c40cb4be3db4704f071eb2959a7a7e53d45ba575
|
3 |
+
size 366
|
special_tokens_map.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, "additional_special_tokens": [{"content": "<extra_id_99>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_98>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_97>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_96>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_95>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_94>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_93>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_92>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_91>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_90>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_89>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_88>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_87>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_86>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_85>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_84>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_83>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_82>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_81>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_80>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_79>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_78>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_77>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_76>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_75>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_74>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_73>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_72>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_71>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_70>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_69>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_68>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_67>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_66>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_65>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_64>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_63>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_62>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_61>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_60>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_59>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_58>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_57>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_56>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_55>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_54>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_53>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_52>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_51>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_50>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_49>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_48>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_47>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_46>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_45>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_44>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_43>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_42>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_41>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_40>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_39>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_38>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_37>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_36>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_35>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_34>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_33>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_32>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_31>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_30>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_29>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_28>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_27>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_26>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_25>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_24>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_23>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_22>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_21>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_20>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_19>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_18>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_17>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_16>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_15>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_14>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_13>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_12>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_11>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_10>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_9>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_8>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_7>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_6>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_5>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_4>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_3>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_2>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_1>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_0>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}]}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1 @@
|
|
|
|
|
1 |
+
{"errors": "replace", "bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "add_prefix_space": false, "trim_offsets": true, "model_max_length": 512, "special_tokens_map_file": "/data/datasets/hf_cache/transformers/f432e4eb4a7dfc04b533beea5590e11f4b46c86f5630e8a032704ef76d7269c1.b9905d0575bde443a20834122b6e2d48e853b2e36444ce98ddeb43c38097eb3f", "name_or_path": "Salesforce/codet5p-220m", "tokenizer_class": "RobertaTokenizer"}
|
trainer_state.json
ADDED
@@ -0,0 +1,175 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 96.7742,
|
3 |
+
"best_model_checkpoint": "/data/tir/projects/tir7/user_data/priyansk/qa_tapex_e2_codet5p-220m_latex/checkpoint-2400",
|
4 |
+
"epoch": 1.9998255577306998,
|
5 |
+
"global_step": 3224,
|
6 |
+
"is_hyper_param_search": false,
|
7 |
+
"is_local_process_zero": true,
|
8 |
+
"is_world_process_zero": true,
|
9 |
+
"log_history": [
|
10 |
+
{
|
11 |
+
"epoch": 0.19,
|
12 |
+
"learning_rate": 9.070409429280397e-05,
|
13 |
+
"loss": 1.8932,
|
14 |
+
"step": 300
|
15 |
+
},
|
16 |
+
{
|
17 |
+
"epoch": 0.19,
|
18 |
+
"eval_exact_match": 69.7581,
|
19 |
+
"eval_loss": 0.006803931202739477,
|
20 |
+
"eval_runtime": 182.7837,
|
21 |
+
"eval_samples_per_second": 2.735,
|
22 |
+
"eval_steps_per_second": 0.175,
|
23 |
+
"step": 300
|
24 |
+
},
|
25 |
+
{
|
26 |
+
"epoch": 0.37,
|
27 |
+
"learning_rate": 8.140818858560795e-05,
|
28 |
+
"loss": 0.0034,
|
29 |
+
"step": 600
|
30 |
+
},
|
31 |
+
{
|
32 |
+
"epoch": 0.37,
|
33 |
+
"eval_exact_match": 86.4919,
|
34 |
+
"eval_loss": 0.0015783495036885142,
|
35 |
+
"eval_runtime": 139.3342,
|
36 |
+
"eval_samples_per_second": 3.588,
|
37 |
+
"eval_steps_per_second": 0.23,
|
38 |
+
"step": 600
|
39 |
+
},
|
40 |
+
{
|
41 |
+
"epoch": 0.56,
|
42 |
+
"learning_rate": 7.211228287841191e-05,
|
43 |
+
"loss": 0.0012,
|
44 |
+
"step": 900
|
45 |
+
},
|
46 |
+
{
|
47 |
+
"epoch": 0.56,
|
48 |
+
"eval_exact_match": 92.5403,
|
49 |
+
"eval_loss": 0.00091337546473369,
|
50 |
+
"eval_runtime": 129.2451,
|
51 |
+
"eval_samples_per_second": 3.869,
|
52 |
+
"eval_steps_per_second": 0.248,
|
53 |
+
"step": 900
|
54 |
+
},
|
55 |
+
{
|
56 |
+
"epoch": 0.74,
|
57 |
+
"learning_rate": 6.28163771712159e-05,
|
58 |
+
"loss": 0.0008,
|
59 |
+
"step": 1200
|
60 |
+
},
|
61 |
+
{
|
62 |
+
"epoch": 0.74,
|
63 |
+
"eval_exact_match": 94.3548,
|
64 |
+
"eval_loss": 0.0006592237623408437,
|
65 |
+
"eval_runtime": 130.2486,
|
66 |
+
"eval_samples_per_second": 3.839,
|
67 |
+
"eval_steps_per_second": 0.246,
|
68 |
+
"step": 1200
|
69 |
+
},
|
70 |
+
{
|
71 |
+
"epoch": 0.93,
|
72 |
+
"learning_rate": 5.352047146401985e-05,
|
73 |
+
"loss": 0.0006,
|
74 |
+
"step": 1500
|
75 |
+
},
|
76 |
+
{
|
77 |
+
"epoch": 0.93,
|
78 |
+
"eval_exact_match": 94.5565,
|
79 |
+
"eval_loss": 0.0005109157646074891,
|
80 |
+
"eval_runtime": 127.9056,
|
81 |
+
"eval_samples_per_second": 3.909,
|
82 |
+
"eval_steps_per_second": 0.25,
|
83 |
+
"step": 1500
|
84 |
+
},
|
85 |
+
{
|
86 |
+
"epoch": 1.12,
|
87 |
+
"learning_rate": 4.422456575682382e-05,
|
88 |
+
"loss": 0.0004,
|
89 |
+
"step": 1800
|
90 |
+
},
|
91 |
+
{
|
92 |
+
"epoch": 1.12,
|
93 |
+
"eval_exact_match": 95.3629,
|
94 |
+
"eval_loss": 0.0004209030594211072,
|
95 |
+
"eval_runtime": 129.0896,
|
96 |
+
"eval_samples_per_second": 3.873,
|
97 |
+
"eval_steps_per_second": 0.248,
|
98 |
+
"step": 1800
|
99 |
+
},
|
100 |
+
{
|
101 |
+
"epoch": 1.3,
|
102 |
+
"learning_rate": 3.4928660049627796e-05,
|
103 |
+
"loss": 0.0004,
|
104 |
+
"step": 2100
|
105 |
+
},
|
106 |
+
{
|
107 |
+
"epoch": 1.3,
|
108 |
+
"eval_exact_match": 95.1613,
|
109 |
+
"eval_loss": 0.00042048218892887235,
|
110 |
+
"eval_runtime": 129.0282,
|
111 |
+
"eval_samples_per_second": 3.875,
|
112 |
+
"eval_steps_per_second": 0.248,
|
113 |
+
"step": 2100
|
114 |
+
},
|
115 |
+
{
|
116 |
+
"epoch": 1.49,
|
117 |
+
"learning_rate": 2.563275434243176e-05,
|
118 |
+
"loss": 0.0003,
|
119 |
+
"step": 2400
|
120 |
+
},
|
121 |
+
{
|
122 |
+
"epoch": 1.49,
|
123 |
+
"eval_exact_match": 96.7742,
|
124 |
+
"eval_loss": 0.0003429962380323559,
|
125 |
+
"eval_runtime": 127.2593,
|
126 |
+
"eval_samples_per_second": 3.929,
|
127 |
+
"eval_steps_per_second": 0.251,
|
128 |
+
"step": 2400
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 1.67,
|
132 |
+
"learning_rate": 1.6336848635235734e-05,
|
133 |
+
"loss": 0.0003,
|
134 |
+
"step": 2700
|
135 |
+
},
|
136 |
+
{
|
137 |
+
"epoch": 1.67,
|
138 |
+
"eval_exact_match": 96.7742,
|
139 |
+
"eval_loss": 0.00032032810850068927,
|
140 |
+
"eval_runtime": 126.2849,
|
141 |
+
"eval_samples_per_second": 3.959,
|
142 |
+
"eval_steps_per_second": 0.253,
|
143 |
+
"step": 2700
|
144 |
+
},
|
145 |
+
{
|
146 |
+
"epoch": 1.86,
|
147 |
+
"learning_rate": 7.040942928039701e-06,
|
148 |
+
"loss": 0.0003,
|
149 |
+
"step": 3000
|
150 |
+
},
|
151 |
+
{
|
152 |
+
"epoch": 1.86,
|
153 |
+
"eval_exact_match": 96.371,
|
154 |
+
"eval_loss": 0.0003172786091454327,
|
155 |
+
"eval_runtime": 128.5411,
|
156 |
+
"eval_samples_per_second": 3.89,
|
157 |
+
"eval_steps_per_second": 0.249,
|
158 |
+
"step": 3000
|
159 |
+
},
|
160 |
+
{
|
161 |
+
"epoch": 2.0,
|
162 |
+
"step": 3224,
|
163 |
+
"total_flos": 4.0211486895557837e+18,
|
164 |
+
"train_loss": 0.17688602654646998,
|
165 |
+
"train_runtime": 168482.5606,
|
166 |
+
"train_samples_per_second": 19.598,
|
167 |
+
"train_steps_per_second": 0.019
|
168 |
+
}
|
169 |
+
],
|
170 |
+
"max_steps": 3224,
|
171 |
+
"num_train_epochs": 2,
|
172 |
+
"total_flos": 4.0211486895557837e+18,
|
173 |
+
"trial_name": null,
|
174 |
+
"trial_params": null
|
175 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed5d69df2f4458e091aa9fd5be576a7a7349e33ac70704441e5d3b16cf246325
|
3 |
+
size 3704
|
vocab.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|