kpriyanshu256 commited on
Commit
d830e44
1 Parent(s): d3570bf

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-1344/config.json +33 -0
  2. qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-1344/merges.txt +0 -0
  3. qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-1344/optimizer.pt +3 -0
  4. qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-1344/pytorch_model.bin +3 -0
  5. qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-1344/rng_state.pth +3 -0
  6. qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-1344/scaler.pt +3 -0
  7. qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-1344/scheduler.pt +3 -0
  8. qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-1344/special_tokens_map.json +1 -0
  9. qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-1344/tokenizer.json +0 -0
  10. qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-1344/tokenizer_config.json +1 -0
  11. qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-1344/trainer_state.json +154 -0
  12. qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-1344/training_args.bin +3 -0
  13. qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-1344/vocab.json +0 -0
  14. qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-5760/config.json +33 -0
  15. qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-5760/merges.txt +0 -0
  16. qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-5760/optimizer.pt +3 -0
  17. qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-5760/pytorch_model.bin +3 -0
  18. qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-5760/rng_state.pth +3 -0
  19. qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-5760/scaler.pt +3 -0
  20. qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-5760/scheduler.pt +3 -0
  21. qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-5760/special_tokens_map.json +1 -0
  22. qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-5760/tokenizer.json +0 -0
  23. qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-5760/tokenizer_config.json +1 -0
  24. qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-5760/trainer_state.json +622 -0
  25. qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-5760/training_args.bin +3 -0
  26. qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-5760/vocab.json +0 -0
  27. qa_atis_codet5p-220m_s2_latex_bs_lr/config.json +33 -0
  28. qa_atis_codet5p-220m_s2_latex_bs_lr/merges.txt +0 -0
  29. qa_atis_codet5p-220m_s2_latex_bs_lr/metric.txt +2 -0
  30. qa_atis_codet5p-220m_s2_latex_bs_lr/pred.pkl +3 -0
  31. qa_atis_codet5p-220m_s2_latex_bs_lr/pytorch_model.bin +3 -0
  32. qa_atis_codet5p-220m_s2_latex_bs_lr/runs/Apr11_07-35-59_babel-3-9/1712835366.760723/events.out.tfevents.1712835366.babel-3-9 +3 -0
  33. qa_atis_codet5p-220m_s2_latex_bs_lr/runs/Apr11_07-35-59_babel-3-9/events.out.tfevents.1712835366.babel-3-9 +3 -0
  34. qa_atis_codet5p-220m_s2_latex_bs_lr/runs/Apr11_07-35-59_babel-3-9/events.out.tfevents.1712840389.babel-3-9 +3 -0
  35. qa_atis_codet5p-220m_s2_latex_bs_lr/special_tokens_map.json +1 -0
  36. qa_atis_codet5p-220m_s2_latex_bs_lr/tokenizer.json +0 -0
  37. qa_atis_codet5p-220m_s2_latex_bs_lr/tokenizer_config.json +1 -0
  38. qa_atis_codet5p-220m_s2_latex_bs_lr/trainer_state.json +631 -0
  39. qa_atis_codet5p-220m_s2_latex_bs_lr/training_args.bin +3 -0
  40. qa_atis_codet5p-220m_s2_latex_bs_lr/vocab.json +0 -0
  41. qa_atis_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-2688/config.json +33 -0
  42. qa_atis_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-2688/merges.txt +0 -0
  43. qa_atis_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-2688/optimizer.pt +3 -0
  44. qa_atis_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-2688/pytorch_model.bin +3 -0
  45. qa_atis_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-2688/rng_state.pth +3 -0
  46. qa_atis_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-2688/scaler.pt +3 -0
  47. qa_atis_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-2688/scheduler.pt +3 -0
  48. qa_atis_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-2688/special_tokens_map.json +1 -0
  49. qa_atis_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-2688/tokenizer.json +0 -0
  50. qa_atis_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-2688/tokenizer_config.json +1 -0
qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-1344/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/data/tir/projects/tir7/user_data/priyansk/qa_s2_codet5p-220m_latex/checkpoint-3300/",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "bos_token_id": 1,
7
+ "d_ff": 3072,
8
+ "d_kv": 64,
9
+ "d_model": 768,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "relu",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 2,
14
+ "feed_forward_proj": "relu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": false,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "max_length": 1024,
20
+ "model_type": "t5",
21
+ "n_positions": 512,
22
+ "num_decoder_layers": 12,
23
+ "num_heads": 12,
24
+ "num_layers": 12,
25
+ "output_past": true,
26
+ "pad_token_id": 0,
27
+ "relative_attention_max_distance": 128,
28
+ "relative_attention_num_buckets": 32,
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.17.0",
31
+ "use_cache": true,
32
+ "vocab_size": 32100
33
+ }
qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-1344/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-1344/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6af04378cae74be473d9c990a50f1175b009231ae6f1da317a4b2b8f3734db64
3
+ size 1783209146
qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-1344/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73f7f1f5d6ce7087fa4ee832b3762c6a72913500559416e2aaca72850e9a475d
3
+ size 891647438
qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-1344/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4a5fe5554238e695874d6f0d587c05c713528f94093e37e6397b11feb56434b
3
+ size 14244
qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-1344/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78b7c493366b24bd062b125a5e4f5023c2b1f96118dc4fb7bb2fadb445921ccf
3
+ size 988
qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-1344/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bd235b7c62f743337c2eb7258ac7657a44155f7b44f64322303dcfe1cd712034
3
+ size 1064
qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-1344/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, "additional_special_tokens": [{"content": "<extra_id_99>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_98>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_97>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_96>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_95>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_94>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_93>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_92>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_91>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_90>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_89>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_88>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_87>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_86>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_85>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_84>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_83>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_82>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_81>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_80>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_79>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_78>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_77>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_76>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_75>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_74>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_73>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_72>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_71>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_70>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_69>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_68>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_67>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_66>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_65>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_64>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_63>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_62>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_61>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_60>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_59>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_58>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_57>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_56>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_55>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_54>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_53>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_52>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_51>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_50>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_49>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_48>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_47>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_46>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_45>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_44>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_43>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_42>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_41>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_40>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_39>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_38>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_37>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_36>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_35>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_34>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_33>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_32>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_31>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_30>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_29>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_28>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_27>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_26>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_25>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_24>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_23>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_22>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_21>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_20>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_19>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_18>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_17>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_16>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_15>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_14>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_13>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_12>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_11>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_10>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_9>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_8>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_7>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_6>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_5>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_4>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_3>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_2>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_1>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_0>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}]}
qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-1344/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-1344/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"errors": "replace", "bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "add_prefix_space": false, "trim_offsets": true, "model_max_length": 512, "special_tokens_map_file": "/data/datasets/hf_cache/transformers/f432e4eb4a7dfc04b533beea5590e11f4b46c86f5630e8a032704ef76d7269c1.b9905d0575bde443a20834122b6e2d48e853b2e36444ce98ddeb43c38097eb3f", "name_or_path": "/data/tir/projects/tir7/user_data/priyansk/qa_s2_codet5p-220m_latex/checkpoint-3300/", "tokenizer_class": "RobertaTokenizer"}
qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-1344/trainer_state.json ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 88.6364,
3
+ "best_model_checkpoint": "/data/tir/projects/tir7/user_data/priyansk/qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-1344",
4
+ "epoch": 14.0,
5
+ "global_step": 1344,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "eval_exact_match": 36.3636,
13
+ "eval_loss": 0.007629383821040392,
14
+ "eval_runtime": 65.8126,
15
+ "eval_samples_per_second": 0.684,
16
+ "eval_steps_per_second": 0.182,
17
+ "step": 96
18
+ },
19
+ {
20
+ "epoch": 2.0,
21
+ "eval_exact_match": 50.0,
22
+ "eval_loss": 0.005913609638810158,
23
+ "eval_runtime": 61.2832,
24
+ "eval_samples_per_second": 0.734,
25
+ "eval_steps_per_second": 0.196,
26
+ "step": 192
27
+ },
28
+ {
29
+ "epoch": 3.0,
30
+ "eval_exact_match": 59.0909,
31
+ "eval_loss": 0.00651139160618186,
32
+ "eval_runtime": 24.3736,
33
+ "eval_samples_per_second": 1.846,
34
+ "eval_steps_per_second": 0.492,
35
+ "step": 288
36
+ },
37
+ {
38
+ "epoch": 4.0,
39
+ "eval_exact_match": 79.5455,
40
+ "eval_loss": 0.00496898777782917,
41
+ "eval_runtime": 26.7272,
42
+ "eval_samples_per_second": 1.684,
43
+ "eval_steps_per_second": 0.449,
44
+ "step": 384
45
+ },
46
+ {
47
+ "epoch": 5.0,
48
+ "eval_exact_match": 70.4545,
49
+ "eval_loss": 0.004105158615857363,
50
+ "eval_runtime": 34.8161,
51
+ "eval_samples_per_second": 1.293,
52
+ "eval_steps_per_second": 0.345,
53
+ "step": 480
54
+ },
55
+ {
56
+ "epoch": 5.21,
57
+ "learning_rate": 4.566840277777778e-05,
58
+ "loss": 0.0051,
59
+ "step": 500
60
+ },
61
+ {
62
+ "epoch": 6.0,
63
+ "eval_exact_match": 86.3636,
64
+ "eval_loss": 0.0037642279639840126,
65
+ "eval_runtime": 109.118,
66
+ "eval_samples_per_second": 0.412,
67
+ "eval_steps_per_second": 0.11,
68
+ "step": 576
69
+ },
70
+ {
71
+ "epoch": 7.0,
72
+ "eval_exact_match": 77.2727,
73
+ "eval_loss": 0.0040812077932059765,
74
+ "eval_runtime": 27.0233,
75
+ "eval_samples_per_second": 1.665,
76
+ "eval_steps_per_second": 0.444,
77
+ "step": 672
78
+ },
79
+ {
80
+ "epoch": 8.0,
81
+ "eval_exact_match": 84.0909,
82
+ "eval_loss": 0.0037790273781865835,
83
+ "eval_runtime": 26.262,
84
+ "eval_samples_per_second": 1.713,
85
+ "eval_steps_per_second": 0.457,
86
+ "step": 768
87
+ },
88
+ {
89
+ "epoch": 9.0,
90
+ "eval_exact_match": 81.8182,
91
+ "eval_loss": 0.0034369456116110086,
92
+ "eval_runtime": 48.2084,
93
+ "eval_samples_per_second": 0.933,
94
+ "eval_steps_per_second": 0.249,
95
+ "step": 864
96
+ },
97
+ {
98
+ "epoch": 10.0,
99
+ "eval_exact_match": 81.8182,
100
+ "eval_loss": 0.0035597539972513914,
101
+ "eval_runtime": 21.8113,
102
+ "eval_samples_per_second": 2.063,
103
+ "eval_steps_per_second": 0.55,
104
+ "step": 960
105
+ },
106
+ {
107
+ "epoch": 10.42,
108
+ "learning_rate": 4.1336805555555555e-05,
109
+ "loss": 0.0016,
110
+ "step": 1000
111
+ },
112
+ {
113
+ "epoch": 11.0,
114
+ "eval_exact_match": 84.0909,
115
+ "eval_loss": 0.003501500003039837,
116
+ "eval_runtime": 28.5426,
117
+ "eval_samples_per_second": 1.577,
118
+ "eval_steps_per_second": 0.42,
119
+ "step": 1056
120
+ },
121
+ {
122
+ "epoch": 12.0,
123
+ "eval_exact_match": 84.0909,
124
+ "eval_loss": 0.0038431978318840265,
125
+ "eval_runtime": 23.617,
126
+ "eval_samples_per_second": 1.905,
127
+ "eval_steps_per_second": 0.508,
128
+ "step": 1152
129
+ },
130
+ {
131
+ "epoch": 13.0,
132
+ "eval_exact_match": 86.3636,
133
+ "eval_loss": 0.003451160853728652,
134
+ "eval_runtime": 23.083,
135
+ "eval_samples_per_second": 1.949,
136
+ "eval_steps_per_second": 0.52,
137
+ "step": 1248
138
+ },
139
+ {
140
+ "epoch": 14.0,
141
+ "eval_exact_match": 88.6364,
142
+ "eval_loss": 0.003256796160712838,
143
+ "eval_runtime": 24.4477,
144
+ "eval_samples_per_second": 1.841,
145
+ "eval_steps_per_second": 0.491,
146
+ "step": 1344
147
+ }
148
+ ],
149
+ "max_steps": 5760,
150
+ "num_train_epochs": 60,
151
+ "total_flos": 6547515239301120.0,
152
+ "trial_name": null,
153
+ "trial_params": null
154
+ }
qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-1344/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:118a642995f3567a78b3ee996aa9cc1020a9328e169e41325917475ad605830a
3
+ size 3768
qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-1344/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-5760/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/data/tir/projects/tir7/user_data/priyansk/qa_s2_codet5p-220m_latex/checkpoint-3300/",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "bos_token_id": 1,
7
+ "d_ff": 3072,
8
+ "d_kv": 64,
9
+ "d_model": 768,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "relu",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 2,
14
+ "feed_forward_proj": "relu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": false,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "max_length": 1024,
20
+ "model_type": "t5",
21
+ "n_positions": 512,
22
+ "num_decoder_layers": 12,
23
+ "num_heads": 12,
24
+ "num_layers": 12,
25
+ "output_past": true,
26
+ "pad_token_id": 0,
27
+ "relative_attention_max_distance": 128,
28
+ "relative_attention_num_buckets": 32,
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.17.0",
31
+ "use_cache": true,
32
+ "vocab_size": 32100
33
+ }
qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-5760/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-5760/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:60f3b0ac137c4f1f66196b7d1a1b3f297ab3288edd2870e125ef1b4914dd40c0
3
+ size 1783209146
qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-5760/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffe0bafabeaf7d935f407134f246045ad606d68a05b393bac891865c4bd2ce22
3
+ size 891647438
qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-5760/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b7b8153d556a35d07227fc6a1fe4ddddd6eacae583af0e492cf99da5a17adede
3
+ size 14244
qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-5760/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59a9b3fd995070be4e8e57f0cf07ae30e24b5908cfa2650770cdcd2c93fdac6b
3
+ size 988
qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-5760/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75bdb3ebda893015fab87897c60b05978fa4f326bed9b4f0f4314300dd5050f6
3
+ size 1064
qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-5760/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, "additional_special_tokens": [{"content": "<extra_id_99>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_98>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_97>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_96>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_95>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_94>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_93>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_92>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_91>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_90>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_89>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_88>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_87>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_86>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_85>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_84>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_83>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_82>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_81>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_80>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_79>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_78>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_77>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_76>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_75>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_74>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_73>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_72>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_71>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_70>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_69>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_68>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_67>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_66>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_65>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_64>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_63>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_62>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_61>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_60>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_59>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_58>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_57>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_56>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_55>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_54>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_53>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_52>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_51>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_50>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_49>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_48>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_47>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_46>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_45>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_44>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_43>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_42>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_41>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_40>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_39>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_38>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_37>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_36>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_35>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_34>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_33>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_32>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_31>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_30>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_29>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_28>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_27>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_26>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_25>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_24>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_23>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_22>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_21>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_20>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_19>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_18>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_17>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_16>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_15>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_14>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_13>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_12>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_11>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_10>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_9>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_8>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_7>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_6>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_5>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_4>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_3>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_2>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_1>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_0>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}]}
qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-5760/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-5760/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"errors": "replace", "bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "add_prefix_space": false, "trim_offsets": true, "model_max_length": 512, "special_tokens_map_file": "/data/datasets/hf_cache/transformers/f432e4eb4a7dfc04b533beea5590e11f4b46c86f5630e8a032704ef76d7269c1.b9905d0575bde443a20834122b6e2d48e853b2e36444ce98ddeb43c38097eb3f", "name_or_path": "/data/tir/projects/tir7/user_data/priyansk/qa_s2_codet5p-220m_latex/checkpoint-3300/", "tokenizer_class": "RobertaTokenizer"}
qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-5760/trainer_state.json ADDED
@@ -0,0 +1,622 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 88.6364,
3
+ "best_model_checkpoint": "/data/tir/projects/tir7/user_data/priyansk/qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-1344",
4
+ "epoch": 60.0,
5
+ "global_step": 5760,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "eval_exact_match": 36.3636,
13
+ "eval_loss": 0.007629383821040392,
14
+ "eval_runtime": 65.8126,
15
+ "eval_samples_per_second": 0.684,
16
+ "eval_steps_per_second": 0.182,
17
+ "step": 96
18
+ },
19
+ {
20
+ "epoch": 2.0,
21
+ "eval_exact_match": 50.0,
22
+ "eval_loss": 0.005913609638810158,
23
+ "eval_runtime": 61.2832,
24
+ "eval_samples_per_second": 0.734,
25
+ "eval_steps_per_second": 0.196,
26
+ "step": 192
27
+ },
28
+ {
29
+ "epoch": 3.0,
30
+ "eval_exact_match": 59.0909,
31
+ "eval_loss": 0.00651139160618186,
32
+ "eval_runtime": 24.3736,
33
+ "eval_samples_per_second": 1.846,
34
+ "eval_steps_per_second": 0.492,
35
+ "step": 288
36
+ },
37
+ {
38
+ "epoch": 4.0,
39
+ "eval_exact_match": 79.5455,
40
+ "eval_loss": 0.00496898777782917,
41
+ "eval_runtime": 26.7272,
42
+ "eval_samples_per_second": 1.684,
43
+ "eval_steps_per_second": 0.449,
44
+ "step": 384
45
+ },
46
+ {
47
+ "epoch": 5.0,
48
+ "eval_exact_match": 70.4545,
49
+ "eval_loss": 0.004105158615857363,
50
+ "eval_runtime": 34.8161,
51
+ "eval_samples_per_second": 1.293,
52
+ "eval_steps_per_second": 0.345,
53
+ "step": 480
54
+ },
55
+ {
56
+ "epoch": 5.21,
57
+ "learning_rate": 4.566840277777778e-05,
58
+ "loss": 0.0051,
59
+ "step": 500
60
+ },
61
+ {
62
+ "epoch": 6.0,
63
+ "eval_exact_match": 86.3636,
64
+ "eval_loss": 0.0037642279639840126,
65
+ "eval_runtime": 109.118,
66
+ "eval_samples_per_second": 0.412,
67
+ "eval_steps_per_second": 0.11,
68
+ "step": 576
69
+ },
70
+ {
71
+ "epoch": 7.0,
72
+ "eval_exact_match": 77.2727,
73
+ "eval_loss": 0.0040812077932059765,
74
+ "eval_runtime": 27.0233,
75
+ "eval_samples_per_second": 1.665,
76
+ "eval_steps_per_second": 0.444,
77
+ "step": 672
78
+ },
79
+ {
80
+ "epoch": 8.0,
81
+ "eval_exact_match": 84.0909,
82
+ "eval_loss": 0.0037790273781865835,
83
+ "eval_runtime": 26.262,
84
+ "eval_samples_per_second": 1.713,
85
+ "eval_steps_per_second": 0.457,
86
+ "step": 768
87
+ },
88
+ {
89
+ "epoch": 9.0,
90
+ "eval_exact_match": 81.8182,
91
+ "eval_loss": 0.0034369456116110086,
92
+ "eval_runtime": 48.2084,
93
+ "eval_samples_per_second": 0.933,
94
+ "eval_steps_per_second": 0.249,
95
+ "step": 864
96
+ },
97
+ {
98
+ "epoch": 10.0,
99
+ "eval_exact_match": 81.8182,
100
+ "eval_loss": 0.0035597539972513914,
101
+ "eval_runtime": 21.8113,
102
+ "eval_samples_per_second": 2.063,
103
+ "eval_steps_per_second": 0.55,
104
+ "step": 960
105
+ },
106
+ {
107
+ "epoch": 10.42,
108
+ "learning_rate": 4.1336805555555555e-05,
109
+ "loss": 0.0016,
110
+ "step": 1000
111
+ },
112
+ {
113
+ "epoch": 11.0,
114
+ "eval_exact_match": 84.0909,
115
+ "eval_loss": 0.003501500003039837,
116
+ "eval_runtime": 28.5426,
117
+ "eval_samples_per_second": 1.577,
118
+ "eval_steps_per_second": 0.42,
119
+ "step": 1056
120
+ },
121
+ {
122
+ "epoch": 12.0,
123
+ "eval_exact_match": 84.0909,
124
+ "eval_loss": 0.0038431978318840265,
125
+ "eval_runtime": 23.617,
126
+ "eval_samples_per_second": 1.905,
127
+ "eval_steps_per_second": 0.508,
128
+ "step": 1152
129
+ },
130
+ {
131
+ "epoch": 13.0,
132
+ "eval_exact_match": 86.3636,
133
+ "eval_loss": 0.003451160853728652,
134
+ "eval_runtime": 23.083,
135
+ "eval_samples_per_second": 1.949,
136
+ "eval_steps_per_second": 0.52,
137
+ "step": 1248
138
+ },
139
+ {
140
+ "epoch": 14.0,
141
+ "eval_exact_match": 88.6364,
142
+ "eval_loss": 0.003256796160712838,
143
+ "eval_runtime": 24.4477,
144
+ "eval_samples_per_second": 1.841,
145
+ "eval_steps_per_second": 0.491,
146
+ "step": 1344
147
+ },
148
+ {
149
+ "epoch": 15.0,
150
+ "eval_exact_match": 86.3636,
151
+ "eval_loss": 0.0036837367806583643,
152
+ "eval_runtime": 21.1697,
153
+ "eval_samples_per_second": 2.126,
154
+ "eval_steps_per_second": 0.567,
155
+ "step": 1440
156
+ },
157
+ {
158
+ "epoch": 15.62,
159
+ "learning_rate": 3.7005208333333334e-05,
160
+ "loss": 0.0011,
161
+ "step": 1500
162
+ },
163
+ {
164
+ "epoch": 16.0,
165
+ "eval_exact_match": 84.0909,
166
+ "eval_loss": 0.0032025405671447515,
167
+ "eval_runtime": 24.8073,
168
+ "eval_samples_per_second": 1.814,
169
+ "eval_steps_per_second": 0.484,
170
+ "step": 1536
171
+ },
172
+ {
173
+ "epoch": 17.0,
174
+ "eval_exact_match": 84.0909,
175
+ "eval_loss": 0.0033474608790129423,
176
+ "eval_runtime": 22.9067,
177
+ "eval_samples_per_second": 1.964,
178
+ "eval_steps_per_second": 0.524,
179
+ "step": 1632
180
+ },
181
+ {
182
+ "epoch": 18.0,
183
+ "eval_exact_match": 81.8182,
184
+ "eval_loss": 0.0030569082591682673,
185
+ "eval_runtime": 21.8837,
186
+ "eval_samples_per_second": 2.056,
187
+ "eval_steps_per_second": 0.548,
188
+ "step": 1728
189
+ },
190
+ {
191
+ "epoch": 19.0,
192
+ "eval_exact_match": 81.8182,
193
+ "eval_loss": 0.0035109743475914,
194
+ "eval_runtime": 23.2006,
195
+ "eval_samples_per_second": 1.94,
196
+ "eval_steps_per_second": 0.517,
197
+ "step": 1824
198
+ },
199
+ {
200
+ "epoch": 20.0,
201
+ "eval_exact_match": 63.6364,
202
+ "eval_loss": 0.005242642015218735,
203
+ "eval_runtime": 55.4958,
204
+ "eval_samples_per_second": 0.811,
205
+ "eval_steps_per_second": 0.216,
206
+ "step": 1920
207
+ },
208
+ {
209
+ "epoch": 20.83,
210
+ "learning_rate": 3.2673611111111114e-05,
211
+ "loss": 0.0007,
212
+ "step": 2000
213
+ },
214
+ {
215
+ "epoch": 21.0,
216
+ "eval_exact_match": 86.3636,
217
+ "eval_loss": 0.003536619246006012,
218
+ "eval_runtime": 21.309,
219
+ "eval_samples_per_second": 2.112,
220
+ "eval_steps_per_second": 0.563,
221
+ "step": 2016
222
+ },
223
+ {
224
+ "epoch": 22.0,
225
+ "eval_exact_match": 86.3636,
226
+ "eval_loss": 0.003465011017397046,
227
+ "eval_runtime": 26.3674,
228
+ "eval_samples_per_second": 1.707,
229
+ "eval_steps_per_second": 0.455,
230
+ "step": 2112
231
+ },
232
+ {
233
+ "epoch": 23.0,
234
+ "eval_exact_match": 84.0909,
235
+ "eval_loss": 0.003460909239947796,
236
+ "eval_runtime": 21.2892,
237
+ "eval_samples_per_second": 2.114,
238
+ "eval_steps_per_second": 0.564,
239
+ "step": 2208
240
+ },
241
+ {
242
+ "epoch": 24.0,
243
+ "eval_exact_match": 86.3636,
244
+ "eval_loss": 0.003597635542973876,
245
+ "eval_runtime": 21.6043,
246
+ "eval_samples_per_second": 2.083,
247
+ "eval_steps_per_second": 0.555,
248
+ "step": 2304
249
+ },
250
+ {
251
+ "epoch": 25.0,
252
+ "eval_exact_match": 84.0909,
253
+ "eval_loss": 0.003971734084188938,
254
+ "eval_runtime": 27.8121,
255
+ "eval_samples_per_second": 1.618,
256
+ "eval_steps_per_second": 0.431,
257
+ "step": 2400
258
+ },
259
+ {
260
+ "epoch": 26.0,
261
+ "eval_exact_match": 84.0909,
262
+ "eval_loss": 0.0037253580521792173,
263
+ "eval_runtime": 28.9924,
264
+ "eval_samples_per_second": 1.552,
265
+ "eval_steps_per_second": 0.414,
266
+ "step": 2496
267
+ },
268
+ {
269
+ "epoch": 26.04,
270
+ "learning_rate": 2.8342013888888887e-05,
271
+ "loss": 0.0004,
272
+ "step": 2500
273
+ },
274
+ {
275
+ "epoch": 27.0,
276
+ "eval_exact_match": 84.0909,
277
+ "eval_loss": 0.0037060389295220375,
278
+ "eval_runtime": 31.884,
279
+ "eval_samples_per_second": 1.411,
280
+ "eval_steps_per_second": 0.376,
281
+ "step": 2592
282
+ },
283
+ {
284
+ "epoch": 28.0,
285
+ "eval_exact_match": 84.0909,
286
+ "eval_loss": 0.003791552037000656,
287
+ "eval_runtime": 28.1348,
288
+ "eval_samples_per_second": 1.599,
289
+ "eval_steps_per_second": 0.427,
290
+ "step": 2688
291
+ },
292
+ {
293
+ "epoch": 29.0,
294
+ "eval_exact_match": 84.0909,
295
+ "eval_loss": 0.004038272891193628,
296
+ "eval_runtime": 21.0529,
297
+ "eval_samples_per_second": 2.137,
298
+ "eval_steps_per_second": 0.57,
299
+ "step": 2784
300
+ },
301
+ {
302
+ "epoch": 30.0,
303
+ "eval_exact_match": 86.3636,
304
+ "eval_loss": 0.0035975456703454256,
305
+ "eval_runtime": 22.9255,
306
+ "eval_samples_per_second": 1.963,
307
+ "eval_steps_per_second": 0.523,
308
+ "step": 2880
309
+ },
310
+ {
311
+ "epoch": 31.0,
312
+ "eval_exact_match": 81.8182,
313
+ "eval_loss": 0.0037584907840937376,
314
+ "eval_runtime": 23.3537,
315
+ "eval_samples_per_second": 1.927,
316
+ "eval_steps_per_second": 0.514,
317
+ "step": 2976
318
+ },
319
+ {
320
+ "epoch": 31.25,
321
+ "learning_rate": 2.4010416666666663e-05,
322
+ "loss": 0.0003,
323
+ "step": 3000
324
+ },
325
+ {
326
+ "epoch": 32.0,
327
+ "eval_exact_match": 84.0909,
328
+ "eval_loss": 0.003442410146817565,
329
+ "eval_runtime": 26.8005,
330
+ "eval_samples_per_second": 1.679,
331
+ "eval_steps_per_second": 0.448,
332
+ "step": 3072
333
+ },
334
+ {
335
+ "epoch": 33.0,
336
+ "eval_exact_match": 81.8182,
337
+ "eval_loss": 0.0034424064215272665,
338
+ "eval_runtime": 22.6426,
339
+ "eval_samples_per_second": 1.987,
340
+ "eval_steps_per_second": 0.53,
341
+ "step": 3168
342
+ },
343
+ {
344
+ "epoch": 34.0,
345
+ "eval_exact_match": 84.0909,
346
+ "eval_loss": 0.003292588982731104,
347
+ "eval_runtime": 28.9863,
348
+ "eval_samples_per_second": 1.552,
349
+ "eval_steps_per_second": 0.414,
350
+ "step": 3264
351
+ },
352
+ {
353
+ "epoch": 35.0,
354
+ "eval_exact_match": 81.8182,
355
+ "eval_loss": 0.0034707931336015463,
356
+ "eval_runtime": 22.613,
357
+ "eval_samples_per_second": 1.99,
358
+ "eval_steps_per_second": 0.531,
359
+ "step": 3360
360
+ },
361
+ {
362
+ "epoch": 36.0,
363
+ "eval_exact_match": 81.8182,
364
+ "eval_loss": 0.0034839294385164976,
365
+ "eval_runtime": 22.2464,
366
+ "eval_samples_per_second": 2.023,
367
+ "eval_steps_per_second": 0.539,
368
+ "step": 3456
369
+ },
370
+ {
371
+ "epoch": 36.46,
372
+ "learning_rate": 1.9678819444444446e-05,
373
+ "loss": 0.0002,
374
+ "step": 3500
375
+ },
376
+ {
377
+ "epoch": 37.0,
378
+ "eval_exact_match": 81.8182,
379
+ "eval_loss": 0.0033870378974825144,
380
+ "eval_runtime": 22.1209,
381
+ "eval_samples_per_second": 2.034,
382
+ "eval_steps_per_second": 0.542,
383
+ "step": 3552
384
+ },
385
+ {
386
+ "epoch": 38.0,
387
+ "eval_exact_match": 84.0909,
388
+ "eval_loss": 0.003428942058235407,
389
+ "eval_runtime": 22.2005,
390
+ "eval_samples_per_second": 2.027,
391
+ "eval_steps_per_second": 0.541,
392
+ "step": 3648
393
+ },
394
+ {
395
+ "epoch": 39.0,
396
+ "eval_exact_match": 84.0909,
397
+ "eval_loss": 0.003560782875865698,
398
+ "eval_runtime": 22.3893,
399
+ "eval_samples_per_second": 2.01,
400
+ "eval_steps_per_second": 0.536,
401
+ "step": 3744
402
+ },
403
+ {
404
+ "epoch": 40.0,
405
+ "eval_exact_match": 81.8182,
406
+ "eval_loss": 0.003526048269122839,
407
+ "eval_runtime": 22.5512,
408
+ "eval_samples_per_second": 1.995,
409
+ "eval_steps_per_second": 0.532,
410
+ "step": 3840
411
+ },
412
+ {
413
+ "epoch": 41.0,
414
+ "eval_exact_match": 81.8182,
415
+ "eval_loss": 0.003472343785688281,
416
+ "eval_runtime": 24.974,
417
+ "eval_samples_per_second": 1.802,
418
+ "eval_steps_per_second": 0.481,
419
+ "step": 3936
420
+ },
421
+ {
422
+ "epoch": 41.67,
423
+ "learning_rate": 1.5347222222222222e-05,
424
+ "loss": 0.0001,
425
+ "step": 4000
426
+ },
427
+ {
428
+ "epoch": 42.0,
429
+ "eval_exact_match": 81.8182,
430
+ "eval_loss": 0.0036066400352865458,
431
+ "eval_runtime": 22.3819,
432
+ "eval_samples_per_second": 2.011,
433
+ "eval_steps_per_second": 0.536,
434
+ "step": 4032
435
+ },
436
+ {
437
+ "epoch": 43.0,
438
+ "eval_exact_match": 81.8182,
439
+ "eval_loss": 0.0035321256145834923,
440
+ "eval_runtime": 22.355,
441
+ "eval_samples_per_second": 2.013,
442
+ "eval_steps_per_second": 0.537,
443
+ "step": 4128
444
+ },
445
+ {
446
+ "epoch": 44.0,
447
+ "eval_exact_match": 81.8182,
448
+ "eval_loss": 0.0035351745318621397,
449
+ "eval_runtime": 22.294,
450
+ "eval_samples_per_second": 2.018,
451
+ "eval_steps_per_second": 0.538,
452
+ "step": 4224
453
+ },
454
+ {
455
+ "epoch": 45.0,
456
+ "eval_exact_match": 81.8182,
457
+ "eval_loss": 0.0035474663600325584,
458
+ "eval_runtime": 22.453,
459
+ "eval_samples_per_second": 2.004,
460
+ "eval_steps_per_second": 0.534,
461
+ "step": 4320
462
+ },
463
+ {
464
+ "epoch": 46.0,
465
+ "eval_exact_match": 81.8182,
466
+ "eval_loss": 0.003549758577719331,
467
+ "eval_runtime": 22.3855,
468
+ "eval_samples_per_second": 2.01,
469
+ "eval_steps_per_second": 0.536,
470
+ "step": 4416
471
+ },
472
+ {
473
+ "epoch": 46.88,
474
+ "learning_rate": 1.1015625e-05,
475
+ "loss": 0.0,
476
+ "step": 4500
477
+ },
478
+ {
479
+ "epoch": 47.0,
480
+ "eval_exact_match": 81.8182,
481
+ "eval_loss": 0.0035524307750165462,
482
+ "eval_runtime": 22.2755,
483
+ "eval_samples_per_second": 2.02,
484
+ "eval_steps_per_second": 0.539,
485
+ "step": 4512
486
+ },
487
+ {
488
+ "epoch": 48.0,
489
+ "eval_exact_match": 81.8182,
490
+ "eval_loss": 0.003558830823749304,
491
+ "eval_runtime": 21.5364,
492
+ "eval_samples_per_second": 2.089,
493
+ "eval_steps_per_second": 0.557,
494
+ "step": 4608
495
+ },
496
+ {
497
+ "epoch": 49.0,
498
+ "eval_exact_match": 81.8182,
499
+ "eval_loss": 0.003596294904127717,
500
+ "eval_runtime": 21.3935,
501
+ "eval_samples_per_second": 2.103,
502
+ "eval_steps_per_second": 0.561,
503
+ "step": 4704
504
+ },
505
+ {
506
+ "epoch": 50.0,
507
+ "eval_exact_match": 84.0909,
508
+ "eval_loss": 0.0035491541493684053,
509
+ "eval_runtime": 24.3301,
510
+ "eval_samples_per_second": 1.85,
511
+ "eval_steps_per_second": 0.493,
512
+ "step": 4800
513
+ },
514
+ {
515
+ "epoch": 51.0,
516
+ "eval_exact_match": 84.0909,
517
+ "eval_loss": 0.0035416865721344948,
518
+ "eval_runtime": 21.4122,
519
+ "eval_samples_per_second": 2.102,
520
+ "eval_steps_per_second": 0.56,
521
+ "step": 4896
522
+ },
523
+ {
524
+ "epoch": 52.0,
525
+ "eval_exact_match": 84.0909,
526
+ "eval_loss": 0.003554833587259054,
527
+ "eval_runtime": 21.5127,
528
+ "eval_samples_per_second": 2.092,
529
+ "eval_steps_per_second": 0.558,
530
+ "step": 4992
531
+ },
532
+ {
533
+ "epoch": 52.08,
534
+ "learning_rate": 6.684027777777776e-06,
535
+ "loss": 0.0,
536
+ "step": 5000
537
+ },
538
+ {
539
+ "epoch": 53.0,
540
+ "eval_exact_match": 84.0909,
541
+ "eval_loss": 0.003558357246220112,
542
+ "eval_runtime": 24.3954,
543
+ "eval_samples_per_second": 1.845,
544
+ "eval_steps_per_second": 0.492,
545
+ "step": 5088
546
+ },
547
+ {
548
+ "epoch": 54.0,
549
+ "eval_exact_match": 84.0909,
550
+ "eval_loss": 0.003560525830835104,
551
+ "eval_runtime": 21.4306,
552
+ "eval_samples_per_second": 2.1,
553
+ "eval_steps_per_second": 0.56,
554
+ "step": 5184
555
+ },
556
+ {
557
+ "epoch": 55.0,
558
+ "eval_exact_match": 84.0909,
559
+ "eval_loss": 0.003565275575965643,
560
+ "eval_runtime": 21.3818,
561
+ "eval_samples_per_second": 2.105,
562
+ "eval_steps_per_second": 0.561,
563
+ "step": 5280
564
+ },
565
+ {
566
+ "epoch": 56.0,
567
+ "eval_exact_match": 84.0909,
568
+ "eval_loss": 0.0035686639603227377,
569
+ "eval_runtime": 21.4209,
570
+ "eval_samples_per_second": 2.101,
571
+ "eval_steps_per_second": 0.56,
572
+ "step": 5376
573
+ },
574
+ {
575
+ "epoch": 57.0,
576
+ "eval_exact_match": 84.0909,
577
+ "eval_loss": 0.0035712106619030237,
578
+ "eval_runtime": 24.3811,
579
+ "eval_samples_per_second": 1.846,
580
+ "eval_steps_per_second": 0.492,
581
+ "step": 5472
582
+ },
583
+ {
584
+ "epoch": 57.29,
585
+ "learning_rate": 2.352430555555553e-06,
586
+ "loss": 0.0,
587
+ "step": 5500
588
+ },
589
+ {
590
+ "epoch": 58.0,
591
+ "eval_exact_match": 84.0909,
592
+ "eval_loss": 0.0035736262798309326,
593
+ "eval_runtime": 21.4478,
594
+ "eval_samples_per_second": 2.098,
595
+ "eval_steps_per_second": 0.559,
596
+ "step": 5568
597
+ },
598
+ {
599
+ "epoch": 59.0,
600
+ "eval_exact_match": 84.0909,
601
+ "eval_loss": 0.0035762363113462925,
602
+ "eval_runtime": 21.3771,
603
+ "eval_samples_per_second": 2.105,
604
+ "eval_steps_per_second": 0.561,
605
+ "step": 5664
606
+ },
607
+ {
608
+ "epoch": 60.0,
609
+ "eval_exact_match": 84.0909,
610
+ "eval_loss": 0.003574397647753358,
611
+ "eval_runtime": 21.4318,
612
+ "eval_samples_per_second": 2.1,
613
+ "eval_steps_per_second": 0.56,
614
+ "step": 5760
615
+ }
616
+ ],
617
+ "max_steps": 5760,
618
+ "num_train_epochs": 60,
619
+ "total_flos": 2.80607795970048e+16,
620
+ "trial_name": null,
621
+ "trial_params": null
622
+ }
qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-5760/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:118a642995f3567a78b3ee996aa9cc1020a9328e169e41325917475ad605830a
3
+ size 3768
qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-5760/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
qa_atis_codet5p-220m_s2_latex_bs_lr/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/data/tir/projects/tir7/user_data/priyansk/qa_s2_codet5p-220m_latex/checkpoint-3300/",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "bos_token_id": 1,
7
+ "d_ff": 3072,
8
+ "d_kv": 64,
9
+ "d_model": 768,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "relu",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 2,
14
+ "feed_forward_proj": "relu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": false,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "max_length": 1024,
20
+ "model_type": "t5",
21
+ "n_positions": 512,
22
+ "num_decoder_layers": 12,
23
+ "num_heads": 12,
24
+ "num_layers": 12,
25
+ "output_past": true,
26
+ "pad_token_id": 0,
27
+ "relative_attention_max_distance": 128,
28
+ "relative_attention_num_buckets": 32,
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.17.0",
31
+ "use_cache": true,
32
+ "vocab_size": 32100
33
+ }
qa_atis_codet5p-220m_s2_latex_bs_lr/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
qa_atis_codet5p-220m_s2_latex_bs_lr/metric.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ Val {'eval_loss': 0.003256796160712838, 'eval_exact_match': 88.6364, 'eval_runtime': 23.4069, 'eval_samples_per_second': 1.923, 'eval_steps_per_second': 0.513, 'epoch': 60.0}
2
+ Test {'eval_loss': 0.009773972444236279, 'eval_exact_match': 77.381, 'eval_runtime': 175.7219, 'eval_samples_per_second': 0.489, 'eval_steps_per_second': 0.125, 'epoch': 60.0}
qa_atis_codet5p-220m_s2_latex_bs_lr/pred.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e3fb03c0ae880ed315c7125905440e12bf6a23648935ae79bd71104ce3b597e
3
+ size 23193
qa_atis_codet5p-220m_s2_latex_bs_lr/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:73f7f1f5d6ce7087fa4ee832b3762c6a72913500559416e2aaca72850e9a475d
3
+ size 891647438
qa_atis_codet5p-220m_s2_latex_bs_lr/runs/Apr11_07-35-59_babel-3-9/1712835366.760723/events.out.tfevents.1712835366.babel-3-9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34805ada2fe80aa98221333439a02b620a69b7ac1819ecd2c7ff512979613562
3
+ size 4987
qa_atis_codet5p-220m_s2_latex_bs_lr/runs/Apr11_07-35-59_babel-3-9/events.out.tfevents.1712835366.babel-3-9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f11f1df1d166d6bb4455b198c66430e4db7ffd41e630ebacef78bba81fc635e2
3
+ size 25451
qa_atis_codet5p-220m_s2_latex_bs_lr/runs/Apr11_07-35-59_babel-3-9/events.out.tfevents.1712840389.babel-3-9 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5a51754f7edd0bb9acf52197aedb9cc3eb6146c2f7af450f9f57359ab24de22d
3
+ size 692
qa_atis_codet5p-220m_s2_latex_bs_lr/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, "additional_special_tokens": [{"content": "<extra_id_99>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_98>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_97>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_96>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_95>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_94>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_93>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_92>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_91>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_90>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_89>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_88>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_87>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_86>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_85>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_84>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_83>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_82>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_81>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_80>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_79>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_78>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_77>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_76>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_75>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_74>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_73>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_72>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_71>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_70>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_69>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_68>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_67>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_66>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_65>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_64>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_63>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_62>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_61>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_60>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_59>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_58>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_57>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_56>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_55>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_54>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_53>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_52>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_51>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_50>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_49>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_48>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_47>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_46>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_45>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_44>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_43>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_42>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_41>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_40>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_39>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_38>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_37>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_36>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_35>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_34>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_33>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_32>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_31>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_30>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_29>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_28>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_27>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_26>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_25>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_24>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_23>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_22>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_21>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_20>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_19>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_18>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_17>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_16>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_15>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_14>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_13>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_12>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_11>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_10>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_9>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_8>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_7>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_6>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_5>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_4>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_3>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_2>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_1>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_0>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}]}
qa_atis_codet5p-220m_s2_latex_bs_lr/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
qa_atis_codet5p-220m_s2_latex_bs_lr/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"errors": "replace", "bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "add_prefix_space": false, "trim_offsets": true, "model_max_length": 512, "special_tokens_map_file": "/data/datasets/hf_cache/transformers/f432e4eb4a7dfc04b533beea5590e11f4b46c86f5630e8a032704ef76d7269c1.b9905d0575bde443a20834122b6e2d48e853b2e36444ce98ddeb43c38097eb3f", "name_or_path": "/data/tir/projects/tir7/user_data/priyansk/qa_s2_codet5p-220m_latex/checkpoint-3300/", "tokenizer_class": "RobertaTokenizer"}
qa_atis_codet5p-220m_s2_latex_bs_lr/trainer_state.json ADDED
@@ -0,0 +1,631 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 88.6364,
3
+ "best_model_checkpoint": "/data/tir/projects/tir7/user_data/priyansk/qa_atis_codet5p-220m_s2_latex_bs_lr/checkpoint-1344",
4
+ "epoch": 60.0,
5
+ "global_step": 5760,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "eval_exact_match": 36.3636,
13
+ "eval_loss": 0.007629383821040392,
14
+ "eval_runtime": 65.8126,
15
+ "eval_samples_per_second": 0.684,
16
+ "eval_steps_per_second": 0.182,
17
+ "step": 96
18
+ },
19
+ {
20
+ "epoch": 2.0,
21
+ "eval_exact_match": 50.0,
22
+ "eval_loss": 0.005913609638810158,
23
+ "eval_runtime": 61.2832,
24
+ "eval_samples_per_second": 0.734,
25
+ "eval_steps_per_second": 0.196,
26
+ "step": 192
27
+ },
28
+ {
29
+ "epoch": 3.0,
30
+ "eval_exact_match": 59.0909,
31
+ "eval_loss": 0.00651139160618186,
32
+ "eval_runtime": 24.3736,
33
+ "eval_samples_per_second": 1.846,
34
+ "eval_steps_per_second": 0.492,
35
+ "step": 288
36
+ },
37
+ {
38
+ "epoch": 4.0,
39
+ "eval_exact_match": 79.5455,
40
+ "eval_loss": 0.00496898777782917,
41
+ "eval_runtime": 26.7272,
42
+ "eval_samples_per_second": 1.684,
43
+ "eval_steps_per_second": 0.449,
44
+ "step": 384
45
+ },
46
+ {
47
+ "epoch": 5.0,
48
+ "eval_exact_match": 70.4545,
49
+ "eval_loss": 0.004105158615857363,
50
+ "eval_runtime": 34.8161,
51
+ "eval_samples_per_second": 1.293,
52
+ "eval_steps_per_second": 0.345,
53
+ "step": 480
54
+ },
55
+ {
56
+ "epoch": 5.21,
57
+ "learning_rate": 4.566840277777778e-05,
58
+ "loss": 0.0051,
59
+ "step": 500
60
+ },
61
+ {
62
+ "epoch": 6.0,
63
+ "eval_exact_match": 86.3636,
64
+ "eval_loss": 0.0037642279639840126,
65
+ "eval_runtime": 109.118,
66
+ "eval_samples_per_second": 0.412,
67
+ "eval_steps_per_second": 0.11,
68
+ "step": 576
69
+ },
70
+ {
71
+ "epoch": 7.0,
72
+ "eval_exact_match": 77.2727,
73
+ "eval_loss": 0.0040812077932059765,
74
+ "eval_runtime": 27.0233,
75
+ "eval_samples_per_second": 1.665,
76
+ "eval_steps_per_second": 0.444,
77
+ "step": 672
78
+ },
79
+ {
80
+ "epoch": 8.0,
81
+ "eval_exact_match": 84.0909,
82
+ "eval_loss": 0.0037790273781865835,
83
+ "eval_runtime": 26.262,
84
+ "eval_samples_per_second": 1.713,
85
+ "eval_steps_per_second": 0.457,
86
+ "step": 768
87
+ },
88
+ {
89
+ "epoch": 9.0,
90
+ "eval_exact_match": 81.8182,
91
+ "eval_loss": 0.0034369456116110086,
92
+ "eval_runtime": 48.2084,
93
+ "eval_samples_per_second": 0.933,
94
+ "eval_steps_per_second": 0.249,
95
+ "step": 864
96
+ },
97
+ {
98
+ "epoch": 10.0,
99
+ "eval_exact_match": 81.8182,
100
+ "eval_loss": 0.0035597539972513914,
101
+ "eval_runtime": 21.8113,
102
+ "eval_samples_per_second": 2.063,
103
+ "eval_steps_per_second": 0.55,
104
+ "step": 960
105
+ },
106
+ {
107
+ "epoch": 10.42,
108
+ "learning_rate": 4.1336805555555555e-05,
109
+ "loss": 0.0016,
110
+ "step": 1000
111
+ },
112
+ {
113
+ "epoch": 11.0,
114
+ "eval_exact_match": 84.0909,
115
+ "eval_loss": 0.003501500003039837,
116
+ "eval_runtime": 28.5426,
117
+ "eval_samples_per_second": 1.577,
118
+ "eval_steps_per_second": 0.42,
119
+ "step": 1056
120
+ },
121
+ {
122
+ "epoch": 12.0,
123
+ "eval_exact_match": 84.0909,
124
+ "eval_loss": 0.0038431978318840265,
125
+ "eval_runtime": 23.617,
126
+ "eval_samples_per_second": 1.905,
127
+ "eval_steps_per_second": 0.508,
128
+ "step": 1152
129
+ },
130
+ {
131
+ "epoch": 13.0,
132
+ "eval_exact_match": 86.3636,
133
+ "eval_loss": 0.003451160853728652,
134
+ "eval_runtime": 23.083,
135
+ "eval_samples_per_second": 1.949,
136
+ "eval_steps_per_second": 0.52,
137
+ "step": 1248
138
+ },
139
+ {
140
+ "epoch": 14.0,
141
+ "eval_exact_match": 88.6364,
142
+ "eval_loss": 0.003256796160712838,
143
+ "eval_runtime": 24.4477,
144
+ "eval_samples_per_second": 1.841,
145
+ "eval_steps_per_second": 0.491,
146
+ "step": 1344
147
+ },
148
+ {
149
+ "epoch": 15.0,
150
+ "eval_exact_match": 86.3636,
151
+ "eval_loss": 0.0036837367806583643,
152
+ "eval_runtime": 21.1697,
153
+ "eval_samples_per_second": 2.126,
154
+ "eval_steps_per_second": 0.567,
155
+ "step": 1440
156
+ },
157
+ {
158
+ "epoch": 15.62,
159
+ "learning_rate": 3.7005208333333334e-05,
160
+ "loss": 0.0011,
161
+ "step": 1500
162
+ },
163
+ {
164
+ "epoch": 16.0,
165
+ "eval_exact_match": 84.0909,
166
+ "eval_loss": 0.0032025405671447515,
167
+ "eval_runtime": 24.8073,
168
+ "eval_samples_per_second": 1.814,
169
+ "eval_steps_per_second": 0.484,
170
+ "step": 1536
171
+ },
172
+ {
173
+ "epoch": 17.0,
174
+ "eval_exact_match": 84.0909,
175
+ "eval_loss": 0.0033474608790129423,
176
+ "eval_runtime": 22.9067,
177
+ "eval_samples_per_second": 1.964,
178
+ "eval_steps_per_second": 0.524,
179
+ "step": 1632
180
+ },
181
+ {
182
+ "epoch": 18.0,
183
+ "eval_exact_match": 81.8182,
184
+ "eval_loss": 0.0030569082591682673,
185
+ "eval_runtime": 21.8837,
186
+ "eval_samples_per_second": 2.056,
187
+ "eval_steps_per_second": 0.548,
188
+ "step": 1728
189
+ },
190
+ {
191
+ "epoch": 19.0,
192
+ "eval_exact_match": 81.8182,
193
+ "eval_loss": 0.0035109743475914,
194
+ "eval_runtime": 23.2006,
195
+ "eval_samples_per_second": 1.94,
196
+ "eval_steps_per_second": 0.517,
197
+ "step": 1824
198
+ },
199
+ {
200
+ "epoch": 20.0,
201
+ "eval_exact_match": 63.6364,
202
+ "eval_loss": 0.005242642015218735,
203
+ "eval_runtime": 55.4958,
204
+ "eval_samples_per_second": 0.811,
205
+ "eval_steps_per_second": 0.216,
206
+ "step": 1920
207
+ },
208
+ {
209
+ "epoch": 20.83,
210
+ "learning_rate": 3.2673611111111114e-05,
211
+ "loss": 0.0007,
212
+ "step": 2000
213
+ },
214
+ {
215
+ "epoch": 21.0,
216
+ "eval_exact_match": 86.3636,
217
+ "eval_loss": 0.003536619246006012,
218
+ "eval_runtime": 21.309,
219
+ "eval_samples_per_second": 2.112,
220
+ "eval_steps_per_second": 0.563,
221
+ "step": 2016
222
+ },
223
+ {
224
+ "epoch": 22.0,
225
+ "eval_exact_match": 86.3636,
226
+ "eval_loss": 0.003465011017397046,
227
+ "eval_runtime": 26.3674,
228
+ "eval_samples_per_second": 1.707,
229
+ "eval_steps_per_second": 0.455,
230
+ "step": 2112
231
+ },
232
+ {
233
+ "epoch": 23.0,
234
+ "eval_exact_match": 84.0909,
235
+ "eval_loss": 0.003460909239947796,
236
+ "eval_runtime": 21.2892,
237
+ "eval_samples_per_second": 2.114,
238
+ "eval_steps_per_second": 0.564,
239
+ "step": 2208
240
+ },
241
+ {
242
+ "epoch": 24.0,
243
+ "eval_exact_match": 86.3636,
244
+ "eval_loss": 0.003597635542973876,
245
+ "eval_runtime": 21.6043,
246
+ "eval_samples_per_second": 2.083,
247
+ "eval_steps_per_second": 0.555,
248
+ "step": 2304
249
+ },
250
+ {
251
+ "epoch": 25.0,
252
+ "eval_exact_match": 84.0909,
253
+ "eval_loss": 0.003971734084188938,
254
+ "eval_runtime": 27.8121,
255
+ "eval_samples_per_second": 1.618,
256
+ "eval_steps_per_second": 0.431,
257
+ "step": 2400
258
+ },
259
+ {
260
+ "epoch": 26.0,
261
+ "eval_exact_match": 84.0909,
262
+ "eval_loss": 0.0037253580521792173,
263
+ "eval_runtime": 28.9924,
264
+ "eval_samples_per_second": 1.552,
265
+ "eval_steps_per_second": 0.414,
266
+ "step": 2496
267
+ },
268
+ {
269
+ "epoch": 26.04,
270
+ "learning_rate": 2.8342013888888887e-05,
271
+ "loss": 0.0004,
272
+ "step": 2500
273
+ },
274
+ {
275
+ "epoch": 27.0,
276
+ "eval_exact_match": 84.0909,
277
+ "eval_loss": 0.0037060389295220375,
278
+ "eval_runtime": 31.884,
279
+ "eval_samples_per_second": 1.411,
280
+ "eval_steps_per_second": 0.376,
281
+ "step": 2592
282
+ },
283
+ {
284
+ "epoch": 28.0,
285
+ "eval_exact_match": 84.0909,
286
+ "eval_loss": 0.003791552037000656,
287
+ "eval_runtime": 28.1348,
288
+ "eval_samples_per_second": 1.599,
289
+ "eval_steps_per_second": 0.427,
290
+ "step": 2688
291
+ },
292
+ {
293
+ "epoch": 29.0,
294
+ "eval_exact_match": 84.0909,
295
+ "eval_loss": 0.004038272891193628,
296
+ "eval_runtime": 21.0529,
297
+ "eval_samples_per_second": 2.137,
298
+ "eval_steps_per_second": 0.57,
299
+ "step": 2784
300
+ },
301
+ {
302
+ "epoch": 30.0,
303
+ "eval_exact_match": 86.3636,
304
+ "eval_loss": 0.0035975456703454256,
305
+ "eval_runtime": 22.9255,
306
+ "eval_samples_per_second": 1.963,
307
+ "eval_steps_per_second": 0.523,
308
+ "step": 2880
309
+ },
310
+ {
311
+ "epoch": 31.0,
312
+ "eval_exact_match": 81.8182,
313
+ "eval_loss": 0.0037584907840937376,
314
+ "eval_runtime": 23.3537,
315
+ "eval_samples_per_second": 1.927,
316
+ "eval_steps_per_second": 0.514,
317
+ "step": 2976
318
+ },
319
+ {
320
+ "epoch": 31.25,
321
+ "learning_rate": 2.4010416666666663e-05,
322
+ "loss": 0.0003,
323
+ "step": 3000
324
+ },
325
+ {
326
+ "epoch": 32.0,
327
+ "eval_exact_match": 84.0909,
328
+ "eval_loss": 0.003442410146817565,
329
+ "eval_runtime": 26.8005,
330
+ "eval_samples_per_second": 1.679,
331
+ "eval_steps_per_second": 0.448,
332
+ "step": 3072
333
+ },
334
+ {
335
+ "epoch": 33.0,
336
+ "eval_exact_match": 81.8182,
337
+ "eval_loss": 0.0034424064215272665,
338
+ "eval_runtime": 22.6426,
339
+ "eval_samples_per_second": 1.987,
340
+ "eval_steps_per_second": 0.53,
341
+ "step": 3168
342
+ },
343
+ {
344
+ "epoch": 34.0,
345
+ "eval_exact_match": 84.0909,
346
+ "eval_loss": 0.003292588982731104,
347
+ "eval_runtime": 28.9863,
348
+ "eval_samples_per_second": 1.552,
349
+ "eval_steps_per_second": 0.414,
350
+ "step": 3264
351
+ },
352
+ {
353
+ "epoch": 35.0,
354
+ "eval_exact_match": 81.8182,
355
+ "eval_loss": 0.0034707931336015463,
356
+ "eval_runtime": 22.613,
357
+ "eval_samples_per_second": 1.99,
358
+ "eval_steps_per_second": 0.531,
359
+ "step": 3360
360
+ },
361
+ {
362
+ "epoch": 36.0,
363
+ "eval_exact_match": 81.8182,
364
+ "eval_loss": 0.0034839294385164976,
365
+ "eval_runtime": 22.2464,
366
+ "eval_samples_per_second": 2.023,
367
+ "eval_steps_per_second": 0.539,
368
+ "step": 3456
369
+ },
370
+ {
371
+ "epoch": 36.46,
372
+ "learning_rate": 1.9678819444444446e-05,
373
+ "loss": 0.0002,
374
+ "step": 3500
375
+ },
376
+ {
377
+ "epoch": 37.0,
378
+ "eval_exact_match": 81.8182,
379
+ "eval_loss": 0.0033870378974825144,
380
+ "eval_runtime": 22.1209,
381
+ "eval_samples_per_second": 2.034,
382
+ "eval_steps_per_second": 0.542,
383
+ "step": 3552
384
+ },
385
+ {
386
+ "epoch": 38.0,
387
+ "eval_exact_match": 84.0909,
388
+ "eval_loss": 0.003428942058235407,
389
+ "eval_runtime": 22.2005,
390
+ "eval_samples_per_second": 2.027,
391
+ "eval_steps_per_second": 0.541,
392
+ "step": 3648
393
+ },
394
+ {
395
+ "epoch": 39.0,
396
+ "eval_exact_match": 84.0909,
397
+ "eval_loss": 0.003560782875865698,
398
+ "eval_runtime": 22.3893,
399
+ "eval_samples_per_second": 2.01,
400
+ "eval_steps_per_second": 0.536,
401
+ "step": 3744
402
+ },
403
+ {
404
+ "epoch": 40.0,
405
+ "eval_exact_match": 81.8182,
406
+ "eval_loss": 0.003526048269122839,
407
+ "eval_runtime": 22.5512,
408
+ "eval_samples_per_second": 1.995,
409
+ "eval_steps_per_second": 0.532,
410
+ "step": 3840
411
+ },
412
+ {
413
+ "epoch": 41.0,
414
+ "eval_exact_match": 81.8182,
415
+ "eval_loss": 0.003472343785688281,
416
+ "eval_runtime": 24.974,
417
+ "eval_samples_per_second": 1.802,
418
+ "eval_steps_per_second": 0.481,
419
+ "step": 3936
420
+ },
421
+ {
422
+ "epoch": 41.67,
423
+ "learning_rate": 1.5347222222222222e-05,
424
+ "loss": 0.0001,
425
+ "step": 4000
426
+ },
427
+ {
428
+ "epoch": 42.0,
429
+ "eval_exact_match": 81.8182,
430
+ "eval_loss": 0.0036066400352865458,
431
+ "eval_runtime": 22.3819,
432
+ "eval_samples_per_second": 2.011,
433
+ "eval_steps_per_second": 0.536,
434
+ "step": 4032
435
+ },
436
+ {
437
+ "epoch": 43.0,
438
+ "eval_exact_match": 81.8182,
439
+ "eval_loss": 0.0035321256145834923,
440
+ "eval_runtime": 22.355,
441
+ "eval_samples_per_second": 2.013,
442
+ "eval_steps_per_second": 0.537,
443
+ "step": 4128
444
+ },
445
+ {
446
+ "epoch": 44.0,
447
+ "eval_exact_match": 81.8182,
448
+ "eval_loss": 0.0035351745318621397,
449
+ "eval_runtime": 22.294,
450
+ "eval_samples_per_second": 2.018,
451
+ "eval_steps_per_second": 0.538,
452
+ "step": 4224
453
+ },
454
+ {
455
+ "epoch": 45.0,
456
+ "eval_exact_match": 81.8182,
457
+ "eval_loss": 0.0035474663600325584,
458
+ "eval_runtime": 22.453,
459
+ "eval_samples_per_second": 2.004,
460
+ "eval_steps_per_second": 0.534,
461
+ "step": 4320
462
+ },
463
+ {
464
+ "epoch": 46.0,
465
+ "eval_exact_match": 81.8182,
466
+ "eval_loss": 0.003549758577719331,
467
+ "eval_runtime": 22.3855,
468
+ "eval_samples_per_second": 2.01,
469
+ "eval_steps_per_second": 0.536,
470
+ "step": 4416
471
+ },
472
+ {
473
+ "epoch": 46.88,
474
+ "learning_rate": 1.1015625e-05,
475
+ "loss": 0.0,
476
+ "step": 4500
477
+ },
478
+ {
479
+ "epoch": 47.0,
480
+ "eval_exact_match": 81.8182,
481
+ "eval_loss": 0.0035524307750165462,
482
+ "eval_runtime": 22.2755,
483
+ "eval_samples_per_second": 2.02,
484
+ "eval_steps_per_second": 0.539,
485
+ "step": 4512
486
+ },
487
+ {
488
+ "epoch": 48.0,
489
+ "eval_exact_match": 81.8182,
490
+ "eval_loss": 0.003558830823749304,
491
+ "eval_runtime": 21.5364,
492
+ "eval_samples_per_second": 2.089,
493
+ "eval_steps_per_second": 0.557,
494
+ "step": 4608
495
+ },
496
+ {
497
+ "epoch": 49.0,
498
+ "eval_exact_match": 81.8182,
499
+ "eval_loss": 0.003596294904127717,
500
+ "eval_runtime": 21.3935,
501
+ "eval_samples_per_second": 2.103,
502
+ "eval_steps_per_second": 0.561,
503
+ "step": 4704
504
+ },
505
+ {
506
+ "epoch": 50.0,
507
+ "eval_exact_match": 84.0909,
508
+ "eval_loss": 0.0035491541493684053,
509
+ "eval_runtime": 24.3301,
510
+ "eval_samples_per_second": 1.85,
511
+ "eval_steps_per_second": 0.493,
512
+ "step": 4800
513
+ },
514
+ {
515
+ "epoch": 51.0,
516
+ "eval_exact_match": 84.0909,
517
+ "eval_loss": 0.0035416865721344948,
518
+ "eval_runtime": 21.4122,
519
+ "eval_samples_per_second": 2.102,
520
+ "eval_steps_per_second": 0.56,
521
+ "step": 4896
522
+ },
523
+ {
524
+ "epoch": 52.0,
525
+ "eval_exact_match": 84.0909,
526
+ "eval_loss": 0.003554833587259054,
527
+ "eval_runtime": 21.5127,
528
+ "eval_samples_per_second": 2.092,
529
+ "eval_steps_per_second": 0.558,
530
+ "step": 4992
531
+ },
532
+ {
533
+ "epoch": 52.08,
534
+ "learning_rate": 6.684027777777776e-06,
535
+ "loss": 0.0,
536
+ "step": 5000
537
+ },
538
+ {
539
+ "epoch": 53.0,
540
+ "eval_exact_match": 84.0909,
541
+ "eval_loss": 0.003558357246220112,
542
+ "eval_runtime": 24.3954,
543
+ "eval_samples_per_second": 1.845,
544
+ "eval_steps_per_second": 0.492,
545
+ "step": 5088
546
+ },
547
+ {
548
+ "epoch": 54.0,
549
+ "eval_exact_match": 84.0909,
550
+ "eval_loss": 0.003560525830835104,
551
+ "eval_runtime": 21.4306,
552
+ "eval_samples_per_second": 2.1,
553
+ "eval_steps_per_second": 0.56,
554
+ "step": 5184
555
+ },
556
+ {
557
+ "epoch": 55.0,
558
+ "eval_exact_match": 84.0909,
559
+ "eval_loss": 0.003565275575965643,
560
+ "eval_runtime": 21.3818,
561
+ "eval_samples_per_second": 2.105,
562
+ "eval_steps_per_second": 0.561,
563
+ "step": 5280
564
+ },
565
+ {
566
+ "epoch": 56.0,
567
+ "eval_exact_match": 84.0909,
568
+ "eval_loss": 0.0035686639603227377,
569
+ "eval_runtime": 21.4209,
570
+ "eval_samples_per_second": 2.101,
571
+ "eval_steps_per_second": 0.56,
572
+ "step": 5376
573
+ },
574
+ {
575
+ "epoch": 57.0,
576
+ "eval_exact_match": 84.0909,
577
+ "eval_loss": 0.0035712106619030237,
578
+ "eval_runtime": 24.3811,
579
+ "eval_samples_per_second": 1.846,
580
+ "eval_steps_per_second": 0.492,
581
+ "step": 5472
582
+ },
583
+ {
584
+ "epoch": 57.29,
585
+ "learning_rate": 2.352430555555553e-06,
586
+ "loss": 0.0,
587
+ "step": 5500
588
+ },
589
+ {
590
+ "epoch": 58.0,
591
+ "eval_exact_match": 84.0909,
592
+ "eval_loss": 0.0035736262798309326,
593
+ "eval_runtime": 21.4478,
594
+ "eval_samples_per_second": 2.098,
595
+ "eval_steps_per_second": 0.559,
596
+ "step": 5568
597
+ },
598
+ {
599
+ "epoch": 59.0,
600
+ "eval_exact_match": 84.0909,
601
+ "eval_loss": 0.0035762363113462925,
602
+ "eval_runtime": 21.3771,
603
+ "eval_samples_per_second": 2.105,
604
+ "eval_steps_per_second": 0.561,
605
+ "step": 5664
606
+ },
607
+ {
608
+ "epoch": 60.0,
609
+ "eval_exact_match": 84.0909,
610
+ "eval_loss": 0.003574397647753358,
611
+ "eval_runtime": 21.4318,
612
+ "eval_samples_per_second": 2.1,
613
+ "eval_steps_per_second": 0.56,
614
+ "step": 5760
615
+ },
616
+ {
617
+ "epoch": 60.0,
618
+ "step": 5760,
619
+ "total_flos": 2.80607795970048e+16,
620
+ "train_loss": 0.0008343008432954472,
621
+ "train_runtime": 4998.2627,
622
+ "train_samples_per_second": 4.61,
623
+ "train_steps_per_second": 1.152
624
+ }
625
+ ],
626
+ "max_steps": 5760,
627
+ "num_train_epochs": 60,
628
+ "total_flos": 2.80607795970048e+16,
629
+ "trial_name": null,
630
+ "trial_params": null
631
+ }
qa_atis_codet5p-220m_s2_latex_bs_lr/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:118a642995f3567a78b3ee996aa9cc1020a9328e169e41325917475ad605830a
3
+ size 3768
qa_atis_codet5p-220m_s2_latex_bs_lr/vocab.json ADDED
The diff for this file is too large to render. See raw diff
 
qa_atis_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-2688/config.json ADDED
@@ -0,0 +1,33 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/data/tir/projects/tir7/user_data/priyansk/qa_s2_codet5p-220m_latex/checkpoint-3300/",
3
+ "architectures": [
4
+ "T5ForConditionalGeneration"
5
+ ],
6
+ "bos_token_id": 1,
7
+ "d_ff": 3072,
8
+ "d_kv": 64,
9
+ "d_model": 768,
10
+ "decoder_start_token_id": 0,
11
+ "dense_act_fn": "relu",
12
+ "dropout_rate": 0.1,
13
+ "eos_token_id": 2,
14
+ "feed_forward_proj": "relu",
15
+ "initializer_factor": 1.0,
16
+ "is_encoder_decoder": true,
17
+ "is_gated_act": false,
18
+ "layer_norm_epsilon": 1e-06,
19
+ "max_length": 1024,
20
+ "model_type": "t5",
21
+ "n_positions": 512,
22
+ "num_decoder_layers": 12,
23
+ "num_heads": 12,
24
+ "num_layers": 12,
25
+ "output_past": true,
26
+ "pad_token_id": 0,
27
+ "relative_attention_max_distance": 128,
28
+ "relative_attention_num_buckets": 32,
29
+ "torch_dtype": "float32",
30
+ "transformers_version": "4.17.0",
31
+ "use_cache": true,
32
+ "vocab_size": 32100
33
+ }
qa_atis_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-2688/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
qa_atis_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-2688/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa4305b816852c43042c70384279bec8504c48df93eccd7addcfaf633b3c107c
3
+ size 1783209146
qa_atis_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-2688/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d64cba9021b8a4a89fa66c3db191d0ccafca8117d4b55800c40c00ca093a94c0
3
+ size 891647438
qa_atis_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-2688/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ceb44ec18fa1318b388d941c567080398e33ec5024be0723d9fc241aa53800ce
3
+ size 14244
qa_atis_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-2688/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a67227cd6c8d490e464500bc8e9d4f82a92ac308e5ed0dc0f66b6b126d7a21e3
3
+ size 988
qa_atis_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-2688/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:30b9c44419bb4d257b14d0d8c0f6ae34c6c2141628350ba0d40f43fff070ac7c
3
+ size 1064
qa_atis_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-2688/special_tokens_map.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": true}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, "additional_special_tokens": [{"content": "<extra_id_99>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_98>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_97>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_96>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_95>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_94>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_93>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_92>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_91>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_90>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_89>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_88>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_87>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_86>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_85>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_84>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_83>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_82>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_81>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_80>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_79>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_78>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_77>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_76>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_75>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_74>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_73>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_72>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_71>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_70>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_69>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_68>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_67>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_66>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_65>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_64>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_63>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_62>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_61>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_60>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_59>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_58>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_57>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_56>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_55>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_54>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_53>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_52>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_51>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_50>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_49>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_48>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_47>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_46>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_45>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_44>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_43>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_42>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_41>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_40>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_39>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_38>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_37>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_36>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_35>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_34>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_33>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_32>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_31>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_30>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_29>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_28>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_27>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_26>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_25>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_24>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_23>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_22>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_21>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_20>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_19>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_18>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_17>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_16>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_15>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_14>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_13>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_12>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_11>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_10>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_9>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_8>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_7>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_6>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_5>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_4>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_3>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_2>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_1>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}, {"content": "<extra_id_0>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": true}]}
qa_atis_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-2688/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
qa_atis_codet5p-220m_s2_latex_bs_lr_2024/checkpoint-2688/tokenizer_config.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"errors": "replace", "bos_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "eos_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "sep_token": {"content": "</s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "cls_token": {"content": "<s>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "unk_token": {"content": "<unk>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "pad_token": {"content": "<pad>", "single_word": false, "lstrip": false, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "mask_token": {"content": "<mask>", "single_word": false, "lstrip": true, "rstrip": false, "normalized": true, "special": false, "__type": "AddedToken"}, "add_prefix_space": false, "trim_offsets": true, "model_max_length": 512, "special_tokens_map_file": "/data/datasets/hf_cache/transformers/f432e4eb4a7dfc04b533beea5590e11f4b46c86f5630e8a032704ef76d7269c1.b9905d0575bde443a20834122b6e2d48e853b2e36444ce98ddeb43c38097eb3f", "name_or_path": "/data/tir/projects/tir7/user_data/priyansk/qa_s2_codet5p-220m_latex/checkpoint-3300/", "tokenizer_class": "RobertaTokenizer"}