Fidlobabovic commited on
Commit
d9b78f3
1 Parent(s): 24c3146

Upload 12 files

Browse files
README.md CHANGED
@@ -201,4 +201,4 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
201
 
202
  ### Framework versions
203
 
204
- - PEFT 0.8.2
 
201
 
202
  ### Framework versions
203
 
204
+ - PEFT 0.9.1.dev0
adapter_config.json CHANGED
@@ -15,7 +15,7 @@
15
  "megatron_core": "megatron.core",
16
  "modules_to_save": null,
17
  "peft_type": "LORA",
18
- "r": 16,
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
@@ -23,5 +23,6 @@
23
  "v"
24
  ],
25
  "task_type": "SEQ_2_SEQ_LM",
 
26
  "use_rslora": false
27
  }
 
15
  "megatron_core": "megatron.core",
16
  "modules_to_save": null,
17
  "peft_type": "LORA",
18
+ "r": 2,
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
 
23
  "v"
24
  ],
25
  "task_type": "SEQ_2_SEQ_LM",
26
+ "use_dora": false,
27
  "use_rslora": false
28
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53c3f097205552429368c45aaf8a706fcc25c4e8a1f56022779394c0d5584342
3
- size 7098016
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cbfe4187ccd3f6e385630a03a6cbab2e62e966681d0b28d327734059780ff2c3
3
+ size 904448
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66310fe24e2c2135ec5682f73bf2af1b494d686dded923489301df77b614ceea
3
- size 14241722
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db8d49a7fabe828c3aba6ecef409d574ab2e873c1da7ebc602aecd9b37053c9e
3
+ size 1855418
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d83ebbc412940984c9cce0d8d151956673a404462d05157cc5a5af4b26ece0e8
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c49550e595e4cef50f9f341076fa360541a83f04ce3871de9f034be3805e2693
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1e803851e932423d43f5c60c4ef2a6a6cd13c22b0ec6bc5cb6653edd74b0a5e2
3
  size 1000
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:550c024080a615b3ad0be87bcfbb2422a50cf7f7ff0520f431bc20e82a3d84dc
3
  size 1000
tokenizer_config.json CHANGED
@@ -930,7 +930,7 @@
930
  "clean_up_tokenization_spaces": true,
931
  "eos_token": "</s>",
932
  "extra_ids": 100,
933
- "model_max_length": 512,
934
  "pad_token": "<pad>",
935
  "tokenizer_class": "T5Tokenizer",
936
  "unk_token": "<unk>"
 
930
  "clean_up_tokenization_spaces": true,
931
  "eos_token": "</s>",
932
  "extra_ids": 100,
933
+ "model_max_length": 1000000000000000019884624838656,
934
  "pad_token": "<pad>",
935
  "tokenizer_class": "T5Tokenizer",
936
  "unk_token": "<unk>"
trainer_state.json CHANGED
@@ -1,149 +1,61 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.0,
5
  "eval_steps": 500,
6
- "global_step": 4500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
- {
12
- "epoch": 0.67,
13
- "grad_norm": 10.763190269470215,
14
- "learning_rate": 3.335112059765208e-05,
15
- "loss": 1.5382,
16
- "step": 500
17
- },
18
  {
19
  "epoch": 1.0,
20
- "eval_gen_len": 4.8812,
21
- "eval_loss": 1.1502126455307007,
22
- "eval_rouge1": 60.247880952380996,
23
- "eval_rouge2": 42.58590476190485,
24
- "eval_rougeL": 60.23339682539681,
25
- "eval_rougeLsum": 60.23446031746034,
26
- "eval_runtime": 1016.7664,
27
- "eval_samples_per_second": 4.918,
28
- "eval_steps_per_second": 1.229,
29
- "step": 750
30
  },
31
  {
32
  "epoch": 1.33,
33
- "grad_norm": 4.3876237869262695,
34
- "learning_rate": 6.670224119530416e-06,
35
- "loss": 1.2867,
36
- "step": 1000
37
- },
38
- {
39
- "epoch": 2.0,
40
- "grad_norm": 5.068687915802002,
41
- "learning_rate": 0.0,
42
- "loss": 1.1906,
43
- "step": 1500
44
  },
45
  {
46
  "epoch": 2.0,
47
- "eval_gen_len": 4.9042,
48
- "eval_loss": 1.1154725551605225,
49
- "eval_rouge1": 60.75021428571432,
50
- "eval_rouge2": 43.73785714285723,
51
- "eval_rougeL": 60.75853968253971,
52
- "eval_rougeLsum": 60.7197619047618,
53
- "eval_runtime": 1019.2932,
54
- "eval_samples_per_second": 4.905,
55
- "eval_steps_per_second": 1.226,
56
- "step": 1500
57
  },
58
  {
59
  "epoch": 2.67,
60
- "grad_norm": 6.656228065490723,
61
- "learning_rate": 0.0,
62
- "loss": 1.2365,
63
- "step": 2000
64
- },
65
- {
66
- "epoch": 3.0,
67
- "eval_gen_len": 4.9042,
68
- "eval_loss": 1.1154857873916626,
69
- "eval_rouge1": 60.75021428571432,
70
- "eval_rouge2": 43.73785714285723,
71
- "eval_rougeL": 60.75853968253971,
72
- "eval_rougeLsum": 60.7197619047618,
73
- "eval_runtime": 1019.9367,
74
- "eval_samples_per_second": 4.902,
75
- "eval_steps_per_second": 1.226,
76
- "step": 2250
77
- },
78
- {
79
- "epoch": 3.33,
80
- "grad_norm": 8.076435089111328,
81
- "learning_rate": 0.0,
82
- "loss": 1.2041,
83
- "step": 2500
84
- },
85
- {
86
- "epoch": 4.0,
87
- "grad_norm": 3.218427896499634,
88
- "learning_rate": 0.0,
89
- "loss": 1.1962,
90
- "step": 3000
91
- },
92
- {
93
- "epoch": 4.0,
94
- "eval_gen_len": 4.9042,
95
- "eval_loss": 1.1154814958572388,
96
- "eval_rouge1": 60.75021428571432,
97
- "eval_rouge2": 43.73785714285723,
98
- "eval_rougeL": 60.75853968253971,
99
- "eval_rougeLsum": 60.7197619047618,
100
- "eval_runtime": 1019.891,
101
- "eval_samples_per_second": 4.902,
102
- "eval_steps_per_second": 1.226,
103
- "step": 3000
104
- },
105
- {
106
- "epoch": 4.67,
107
- "grad_norm": 3.1441781520843506,
108
- "learning_rate": 0.0,
109
- "loss": 1.2323,
110
- "step": 3500
111
- },
112
- {
113
- "epoch": 5.0,
114
- "eval_gen_len": 4.9042,
115
- "eval_loss": 1.1154634952545166,
116
- "eval_rouge1": 60.75021428571432,
117
- "eval_rouge2": 43.73785714285723,
118
- "eval_rougeL": 60.75853968253971,
119
- "eval_rougeLsum": 60.7197619047618,
120
- "eval_runtime": 1016.3103,
121
- "eval_samples_per_second": 4.92,
122
- "eval_steps_per_second": 1.23,
123
- "step": 3750
124
- },
125
- {
126
- "epoch": 5.33,
127
- "grad_norm": 4.466716766357422,
128
- "learning_rate": 0.0,
129
- "loss": 1.2174,
130
- "step": 4000
131
- },
132
- {
133
- "epoch": 6.0,
134
- "grad_norm": 5.4029083251953125,
135
- "learning_rate": 0.0,
136
- "loss": 1.1927,
137
- "step": 4500
138
  }
139
  ],
140
  "logging_steps": 500,
141
- "max_steps": 5250,
142
  "num_input_tokens_seen": 0,
143
  "num_train_epochs": 7,
144
  "save_steps": 500,
145
- "total_flos": 1.1059086753792e+16,
146
- "train_batch_size": 4,
147
  "trial_name": null,
148
  "trial_params": null
149
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.6666666666666665,
5
  "eval_steps": 500,
6
+ "global_step": 1000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
 
 
 
 
 
 
 
11
  {
12
  "epoch": 1.0,
13
+ "eval_gen_len": 18.877,
14
+ "eval_loss": 3.9956815242767334,
15
+ "eval_rouge1": 3.640356760886191,
16
+ "eval_rouge2": 1.3241316461316452,
17
+ "eval_rougeL": 3.6251379927262484,
18
+ "eval_rougeLsum": 3.627662198912217,
19
+ "eval_runtime": 1371.7269,
20
+ "eval_samples_per_second": 3.645,
21
+ "eval_steps_per_second": 0.456,
22
+ "step": 375
23
  },
24
  {
25
  "epoch": 1.33,
26
+ "grad_norm": 11.406363487243652,
27
+ "learning_rate": 6.680896478121665e-06,
28
+ "loss": 4.77,
29
+ "step": 500
 
 
 
 
 
 
 
30
  },
31
  {
32
  "epoch": 2.0,
33
+ "eval_gen_len": 6.1788,
34
+ "eval_loss": 2.728412389755249,
35
+ "eval_rouge1": 11.110740703740731,
36
+ "eval_rouge2": 3.642182539682537,
37
+ "eval_rougeL": 11.095423465423494,
38
+ "eval_rougeLsum": 11.097078588078615,
39
+ "eval_runtime": 1076.8457,
40
+ "eval_samples_per_second": 4.643,
41
+ "eval_steps_per_second": 0.58,
42
+ "step": 750
43
  },
44
  {
45
  "epoch": 2.67,
46
+ "grad_norm": 14.748087882995605,
47
+ "learning_rate": 4.0128068303094986e-06,
48
+ "loss": 2.9857,
49
+ "step": 1000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  }
51
  ],
52
  "logging_steps": 500,
53
+ "max_steps": 2625,
54
  "num_input_tokens_seen": 0,
55
  "num_train_epochs": 7,
56
  "save_steps": 500,
57
+ "total_flos": 4877098942464000.0,
58
+ "train_batch_size": 8,
59
  "trial_name": null,
60
  "trial_params": null
61
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46df985f32c9133404178778d30698cb855d173a08623ee3e2588b465a4db162
3
  size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dccdccf9f076d61d4e27a0c66ed74eb5e46d003e8e6fa05464468f4a31f86eeb
3
  size 5048