edpowers commited on
Commit
bb74d8e
·
verified ·
1 Parent(s): 6e9e2d2

End of training

Browse files
adapter_config.json CHANGED
@@ -20,11 +20,11 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
- "v_proj",
24
  "q_proj",
25
- "o_proj",
26
  "k_proj",
27
- "gate_proj"
 
28
  ],
29
  "task_type": "CAUSAL_LM",
30
  "use_dora": false,
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "gate_proj",
24
  "q_proj",
 
25
  "k_proj",
26
+ "o_proj",
27
+ "v_proj"
28
  ],
29
  "task_type": "CAUSAL_LM",
30
  "use_dora": false,
all_results.json CHANGED
@@ -1,14 +1,14 @@
1
  {
2
  "epoch": 5.923076923076923,
3
  "eval_loss": 1.2003010511398315,
4
- "eval_runtime": 30.6491,
5
  "eval_samples": 169,
6
- "eval_samples_per_second": 2.904,
7
- "eval_steps_per_second": 0.392,
8
  "total_flos": 1.7606154086724403e+17,
9
  "train_loss": 4.533969319902815e-05,
10
- "train_runtime": 4.4385,
11
  "train_samples": 1346,
12
- "train_samples_per_second": 901.197,
13
- "train_steps_per_second": 225.299
14
  }
 
1
  {
2
  "epoch": 5.923076923076923,
3
  "eval_loss": 1.2003010511398315,
4
+ "eval_runtime": 30.5205,
5
  "eval_samples": 169,
6
+ "eval_samples_per_second": 2.916,
7
+ "eval_steps_per_second": 0.393,
8
  "total_flos": 1.7606154086724403e+17,
9
  "train_loss": 4.533969319902815e-05,
10
+ "train_runtime": 4.3598,
11
  "train_samples": 1346,
12
+ "train_samples_per_second": 917.469,
13
+ "train_steps_per_second": 229.367
14
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 5.923076923076923,
3
  "eval_loss": 1.2003010511398315,
4
- "eval_runtime": 30.6491,
5
  "eval_samples": 169,
6
- "eval_samples_per_second": 2.904,
7
- "eval_steps_per_second": 0.392
8
  }
 
1
  {
2
  "epoch": 5.923076923076923,
3
  "eval_loss": 1.2003010511398315,
4
+ "eval_runtime": 30.5205,
5
  "eval_samples": 169,
6
+ "eval_samples_per_second": 2.916,
7
+ "eval_steps_per_second": 0.393
8
  }
train_results.json CHANGED
@@ -2,8 +2,8 @@
2
  "epoch": 5.923076923076923,
3
  "total_flos": 1.7606154086724403e+17,
4
  "train_loss": 4.533969319902815e-05,
5
- "train_runtime": 4.4385,
6
  "train_samples": 1346,
7
- "train_samples_per_second": 901.197,
8
- "train_steps_per_second": 225.299
9
  }
 
2
  "epoch": 5.923076923076923,
3
  "total_flos": 1.7606154086724403e+17,
4
  "train_loss": 4.533969319902815e-05,
5
+ "train_runtime": 4.3598,
6
  "train_samples": 1346,
7
+ "train_samples_per_second": 917.469,
8
+ "train_steps_per_second": 229.367
9
  }
trainer_state.json CHANGED
@@ -613,9 +613,9 @@
613
  "step": 1001,
614
  "total_flos": 1.7606154086724403e+17,
615
  "train_loss": 4.533969319902815e-05,
616
- "train_runtime": 4.4385,
617
- "train_samples_per_second": 901.197,
618
- "train_steps_per_second": 225.299
619
  }
620
  ],
621
  "logging_steps": 25,
 
613
  "step": 1001,
614
  "total_flos": 1.7606154086724403e+17,
615
  "train_loss": 4.533969319902815e-05,
616
+ "train_runtime": 4.3598,
617
+ "train_samples_per_second": 917.469,
618
+ "train_steps_per_second": 229.367
619
  }
620
  ],
621
  "logging_steps": 25,