Phanh2532 commited on
Commit
17d1172
1 Parent(s): 21d7f83

Upload folder using huggingface_hub

Browse files
adapter_config.json CHANGED
@@ -10,7 +10,7 @@
10
  "layers_pattern": null,
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
- "lora_alpha": 16,
14
  "lora_dropout": 0.1,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
@@ -20,8 +20,8 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "q_proj",
24
- "v_proj"
25
  ],
26
  "task_type": "CAUSAL_LM",
27
  "use_dora": false,
 
10
  "layers_pattern": null,
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
+ "lora_alpha": 32,
14
  "lora_dropout": 0.1,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "v_proj",
24
+ "q_proj"
25
  ],
26
  "task_type": "CAUSAL_LM",
27
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:53f31f5151288ce8bce09f1be497668a07de0d67744cfec814e83a51a1f0373e
3
  size 109069176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1912b4b957d908b8bfc710c3e58558d93617ba78eeee6b44cea5846184e311aa
3
  size 109069176
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ba21787dc827ac1988c3dc98860a3c8ae6b18c637d435abd180fe66fbdbc2e36
3
  size 218182458
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f61189ba8e2df9508d8734043f68c2833bde41baa22941571ec4278f299a891b
3
  size 218182458
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:54a18bd6f3c1fb6bc324ca3d6007005eb1895412504a4ac1675632ffc50077a8
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e8d2ce26e6c0f5a47ca9bc5544c0f80a1d4937019d9d7af01320826a96817c0
3
  size 14244
trainer_state.json CHANGED
@@ -10,122 +10,122 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.24154589371980675,
13
- "grad_norm": 0.3504341244697571,
14
  "learning_rate": 0.000493936638139193,
15
- "loss": 1.2154,
16
  "step": 25
17
  },
18
  {
19
  "epoch": 0.24154589371980675,
20
- "eval_loss": 1.2845402956008911,
21
- "eval_runtime": 58.0708,
22
- "eval_samples_per_second": 3.565,
23
- "eval_steps_per_second": 0.448,
24
  "step": 25
25
  },
26
  {
27
  "epoch": 0.4830917874396135,
28
- "grad_norm": 0.11877840012311935,
29
  "learning_rate": 0.000463751348237005,
30
- "loss": 0.7638,
31
  "step": 50
32
  },
33
  {
34
  "epoch": 0.4830917874396135,
35
- "eval_loss": 1.255712628364563,
36
- "eval_runtime": 58.0315,
37
- "eval_samples_per_second": 3.567,
38
- "eval_steps_per_second": 0.448,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 0.7246376811594203,
43
- "grad_norm": 0.09250932186841965,
44
  "learning_rate": 0.000411248712216741,
45
- "loss": 0.7477,
46
  "step": 75
47
  },
48
  {
49
  "epoch": 0.7246376811594203,
50
- "eval_loss": 1.1869494915008545,
51
- "eval_runtime": 57.9221,
52
- "eval_samples_per_second": 3.574,
53
- "eval_steps_per_second": 0.449,
54
  "step": 75
55
  },
56
  {
57
  "epoch": 0.966183574879227,
58
- "grad_norm": 0.10278703272342682,
59
  "learning_rate": 0.00034191042415818,
60
- "loss": 0.7249,
61
  "step": 100
62
  },
63
  {
64
  "epoch": 0.966183574879227,
65
- "eval_loss": 1.317003607749939,
66
- "eval_runtime": 57.9128,
67
- "eval_samples_per_second": 3.574,
68
- "eval_steps_per_second": 0.449,
69
  "step": 100
70
  },
71
  {
72
  "epoch": 1.2077294685990339,
73
- "grad_norm": 0.09411193430423737,
74
  "learning_rate": 0.00026297595453297494,
75
- "loss": 0.7095,
76
  "step": 125
77
  },
78
  {
79
  "epoch": 1.2077294685990339,
80
- "eval_loss": 1.3763236999511719,
81
- "eval_runtime": 57.9829,
82
- "eval_samples_per_second": 3.57,
83
- "eval_steps_per_second": 0.448,
84
  "step": 125
85
  },
86
  {
87
  "epoch": 1.4492753623188406,
88
- "grad_norm": 0.08513357490301132,
89
  "learning_rate": 0.00018268669172909137,
90
- "loss": 0.708,
91
  "step": 150
92
  },
93
  {
94
  "epoch": 1.4492753623188406,
95
- "eval_loss": 1.2287328243255615,
96
- "eval_runtime": 57.9704,
97
- "eval_samples_per_second": 3.571,
98
- "eval_steps_per_second": 0.449,
99
  "step": 150
100
  },
101
  {
102
  "epoch": 1.6908212560386473,
103
- "grad_norm": 0.08826680481433868,
104
  "learning_rate": 0.00010942547535123056,
105
- "loss": 0.7055,
106
  "step": 175
107
  },
108
  {
109
  "epoch": 1.6908212560386473,
110
- "eval_loss": 1.2410857677459717,
111
- "eval_runtime": 58.0223,
112
- "eval_samples_per_second": 3.568,
113
- "eval_steps_per_second": 0.448,
114
  "step": 175
115
  },
116
  {
117
  "epoch": 1.9323671497584543,
118
- "grad_norm": 0.0850740373134613,
119
  "learning_rate": 5.0841360885691e-05,
120
- "loss": 0.7017,
121
  "step": 200
122
  },
123
  {
124
  "epoch": 1.9323671497584543,
125
- "eval_loss": 1.2755271196365356,
126
- "eval_runtime": 57.8348,
127
- "eval_samples_per_second": 3.579,
128
- "eval_steps_per_second": 0.45,
129
  "step": 200
130
  }
131
  ],
@@ -133,7 +133,7 @@
133
  "max_steps": 250,
134
  "num_input_tokens_seen": 0,
135
  "num_train_epochs": 3,
136
- "save_steps": 100,
137
  "stateful_callbacks": {
138
  "TrainerControl": {
139
  "args": {
@@ -146,7 +146,7 @@
146
  "attributes": {}
147
  }
148
  },
149
- "total_flos": 1.8667153521278976e+16,
150
  "train_batch_size": 1,
151
  "trial_name": null,
152
  "trial_params": null
 
10
  "log_history": [
11
  {
12
  "epoch": 0.24154589371980675,
13
+ "grad_norm": 0.24812498688697815,
14
  "learning_rate": 0.000493936638139193,
15
+ "loss": 1.1502,
16
  "step": 25
17
  },
18
  {
19
  "epoch": 0.24154589371980675,
20
+ "eval_loss": 0.9789943099021912,
21
+ "eval_runtime": 52.5341,
22
+ "eval_samples_per_second": 3.94,
23
+ "eval_steps_per_second": 0.495,
24
  "step": 25
25
  },
26
  {
27
  "epoch": 0.4830917874396135,
28
+ "grad_norm": 0.35792288184165955,
29
  "learning_rate": 0.000463751348237005,
30
+ "loss": 0.9241,
31
  "step": 50
32
  },
33
  {
34
  "epoch": 0.4830917874396135,
35
+ "eval_loss": 0.9137127995491028,
36
+ "eval_runtime": 52.1664,
37
+ "eval_samples_per_second": 3.968,
38
+ "eval_steps_per_second": 0.498,
39
  "step": 50
40
  },
41
  {
42
  "epoch": 0.7246376811594203,
43
+ "grad_norm": 0.1500280201435089,
44
  "learning_rate": 0.000411248712216741,
45
+ "loss": 0.8907,
46
  "step": 75
47
  },
48
  {
49
  "epoch": 0.7246376811594203,
50
+ "eval_loss": 0.871120274066925,
51
+ "eval_runtime": 52.6024,
52
+ "eval_samples_per_second": 3.935,
53
+ "eval_steps_per_second": 0.494,
54
  "step": 75
55
  },
56
  {
57
  "epoch": 0.966183574879227,
58
+ "grad_norm": 0.17570209503173828,
59
  "learning_rate": 0.00034191042415818,
60
+ "loss": 0.8684,
61
  "step": 100
62
  },
63
  {
64
  "epoch": 0.966183574879227,
65
+ "eval_loss": 0.8509008288383484,
66
+ "eval_runtime": 52.4802,
67
+ "eval_samples_per_second": 3.944,
68
+ "eval_steps_per_second": 0.495,
69
  "step": 100
70
  },
71
  {
72
  "epoch": 1.2077294685990339,
73
+ "grad_norm": 0.12721501290798187,
74
  "learning_rate": 0.00026297595453297494,
75
+ "loss": 0.8553,
76
  "step": 125
77
  },
78
  {
79
  "epoch": 1.2077294685990339,
80
+ "eval_loss": 0.8410875201225281,
81
+ "eval_runtime": 52.6568,
82
+ "eval_samples_per_second": 3.931,
83
+ "eval_steps_per_second": 0.494,
84
  "step": 125
85
  },
86
  {
87
  "epoch": 1.4492753623188406,
88
+ "grad_norm": 0.14220421016216278,
89
  "learning_rate": 0.00018268669172909137,
90
+ "loss": 0.8527,
91
  "step": 150
92
  },
93
  {
94
  "epoch": 1.4492753623188406,
95
+ "eval_loss": 0.8425164818763733,
96
+ "eval_runtime": 52.5225,
97
+ "eval_samples_per_second": 3.941,
98
+ "eval_steps_per_second": 0.495,
99
  "step": 150
100
  },
101
  {
102
  "epoch": 1.6908212560386473,
103
+ "grad_norm": 0.13018099963665009,
104
  "learning_rate": 0.00010942547535123056,
105
+ "loss": 0.8532,
106
  "step": 175
107
  },
108
  {
109
  "epoch": 1.6908212560386473,
110
+ "eval_loss": 0.83249831199646,
111
+ "eval_runtime": 52.6913,
112
+ "eval_samples_per_second": 3.929,
113
+ "eval_steps_per_second": 0.493,
114
  "step": 175
115
  },
116
  {
117
  "epoch": 1.9323671497584543,
118
+ "grad_norm": 0.1186065599322319,
119
  "learning_rate": 5.0841360885691e-05,
120
+ "loss": 0.8463,
121
  "step": 200
122
  },
123
  {
124
  "epoch": 1.9323671497584543,
125
+ "eval_loss": 0.8275504112243652,
126
+ "eval_runtime": 52.8803,
127
+ "eval_samples_per_second": 3.915,
128
+ "eval_steps_per_second": 0.492,
129
  "step": 200
130
  }
131
  ],
 
133
  "max_steps": 250,
134
  "num_input_tokens_seen": 0,
135
  "num_train_epochs": 3,
136
+ "save_steps": 50,
137
  "stateful_callbacks": {
138
  "TrainerControl": {
139
  "args": {
 
146
  "attributes": {}
147
  }
148
  },
149
+ "total_flos": 1.5515046692683776e+16,
150
  "train_batch_size": 1,
151
  "trial_name": null,
152
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10646daa363a63b95014966745d9c23a0caf85b2d424c228c2ecbcbe5088f9b2
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6dfba6deedf548ad55e99786ef825605d398f2faa387a7055cf0e4a125a246d4
3
  size 5368