MohamedAhmedAE commited on
Commit
bce7dfb
·
verified ·
1 Parent(s): f5dde94

Training in progress, step 600, checkpoint

Browse files
last-checkpoint/adapter_config.json CHANGED
@@ -23,13 +23,13 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "v_proj",
27
- "up_proj",
28
  "gate_proj",
29
- "k_proj",
30
- "o_proj",
31
  "q_proj",
32
- "down_proj"
 
 
 
 
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
 
26
  "gate_proj",
 
 
27
  "q_proj",
28
+ "k_proj",
29
+ "up_proj",
30
+ "v_proj",
31
+ "down_proj",
32
+ "o_proj"
33
  ],
34
  "task_type": "CAUSAL_LM",
35
  "use_dora": false,
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:30602946db0511e89472d2985d62506942982f91c28840fc64989587d9d5d5f3
3
  size 360740440
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2a1dd12a210017a07f561dace36dc00f59b55ff12d579593e8e5f59db1ca495d
3
  size 360740440
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae593a71e87e4da53cca674152b73afa1b696d643d8531e897898e897d841578
3
- size 183445626
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09dbea6aaf941a049c9aa3656b362f19b6c30102fcdcd2d81680e63cd278a9c4
3
+ size 184018770
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bf56788b1af92ed8ac278c2b5a5a7e56b531312246249a57078017be8884a01f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fe9c2e6cee455bb212d5ae0dd7c343acca65b7f37490224e2c429dd428b2c9ad
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f4460b404e87366806d8b48914980f9d75ea1e74835c790fae129ccdf808017b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:06eec3ac373cc6b81236635fbbf70132276a159a47d5685288ad3485f23d8131
3
  size 1064
last-checkpoint/special_tokens_map.json CHANGED
@@ -13,5 +13,11 @@
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
- "pad_token": "<|eot_id|>"
 
 
 
 
 
 
17
  }
 
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "pad_token": {
17
+ "content": "<|eot_id|>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ }
23
  }
last-checkpoint/tokenizer_config.json CHANGED
@@ -2055,6 +2055,7 @@
2055
  "clean_up_tokenization_spaces": true,
2056
  "eos_token": "<|eot_id|>",
2057
  "extra_special_tokens": {},
 
2058
  "model_input_names": [
2059
  "input_ids",
2060
  "attention_mask"
@@ -2062,5 +2063,8 @@
2062
  "model_max_length": 4096,
2063
  "pad_token": "<|eot_id|>",
2064
  "padding_side": "left",
2065
- "tokenizer_class": "PreTrainedTokenizerFast"
 
 
 
2066
  }
 
2055
  "clean_up_tokenization_spaces": true,
2056
  "eos_token": "<|eot_id|>",
2057
  "extra_special_tokens": {},
2058
+ "max_length": 4096,
2059
  "model_input_names": [
2060
  "input_ids",
2061
  "attention_mask"
 
2063
  "model_max_length": 4096,
2064
  "pad_token": "<|eot_id|>",
2065
  "padding_side": "left",
2066
+ "stride": 0,
2067
+ "tokenizer_class": "PreTrainedTokenizerFast",
2068
+ "truncation_side": "right",
2069
+ "truncation_strategy": "longest_first"
2070
  }
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0002781929421059623,
5
  "eval_steps": 500,
6
- "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -21,6 +21,13 @@
21
  "learning_rate": 1.9999999331656075e-05,
22
  "loss": 2.076,
23
  "step": 400
 
 
 
 
 
 
 
24
  }
25
  ],
26
  "logging_steps": 200,
@@ -40,7 +47,7 @@
40
  "attributes": {}
41
  }
42
  },
43
- "total_flos": 731147720306688.0,
44
  "train_batch_size": 1,
45
  "trial_name": null,
46
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.00041728941315894347,
5
  "eval_steps": 500,
6
+ "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
21
  "learning_rate": 1.9999999331656075e-05,
22
  "loss": 2.076,
23
  "step": 400
24
+ },
25
+ {
26
+ "epoch": 0.00041728941315894347,
27
+ "grad_norm": 7.084391117095947,
28
+ "learning_rate": 1.9999998496226195e-05,
29
+ "loss": 2.202,
30
+ "step": 600
31
  }
32
  ],
33
  "logging_steps": 200,
 
47
  "attributes": {}
48
  }
49
  },
50
+ "total_flos": 1138117833400320.0,
51
  "train_batch_size": 1,
52
  "trial_name": null,
53
  "trial_params": null
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dafb3d7d0b401b02200850bddb3d6e4859621630290323fd4a0dc0ddd9a3d4fc
3
  size 6840
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f73bef2970b56da564b1d8c87d27fe806335e746653d451535ecd6b817d641ba
3
  size 6840