duyvt6663 commited on
Commit
eb68c92
·
1 Parent(s): 40857c0

Training in progress, step 200, checkpoint

Browse files
checkpoint-200/README.md CHANGED
@@ -216,4 +216,23 @@ The following `bitsandbytes` quantization config was used during training:
216
  ### Framework versions
217
 
218
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
219
  - PEFT 0.6.0.dev0
 
216
  ### Framework versions
217
 
218
 
219
+ - PEFT 0.6.0.dev0
220
+ ## Training procedure
221
+
222
+
223
+ The following `bitsandbytes` quantization config was used during training:
224
+ - quant_method: bitsandbytes
225
+ - load_in_8bit: True
226
+ - load_in_4bit: False
227
+ - llm_int8_threshold: 6.0
228
+ - llm_int8_skip_modules: None
229
+ - llm_int8_enable_fp32_cpu_offload: False
230
+ - llm_int8_has_fp16_weight: False
231
+ - bnb_4bit_quant_type: fp4
232
+ - bnb_4bit_use_double_quant: False
233
+ - bnb_4bit_compute_dtype: float32
234
+
235
+ ### Framework versions
236
+
237
+
238
  - PEFT 0.6.0.dev0
checkpoint-200/adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7269b850f6f41ba7c1d66ea749e7522072b975c8b7566fffb59bf81b0ec49e7e
3
  size 9873829
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02fc7ae11f75cf918dede926679dd7d282377662923fe8f232e583ac7b5ffcaa
3
  size 9873829
checkpoint-200/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:031e3b86982e5c64ef004e11b13cb4f773afd6611c65834f21e2fcfbd45b2bab
3
  size 42724
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:44c61db8c54f88280856241d25545a2f77f192b2abacb4fd5466a48cb5b0cfb3
3
  size 42724
checkpoint-200/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8f467c31b4b77b1fb94b7449189df5c441d6ed0a8c79be61c95999fc9950eb31
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87284756e2bb4c6eff667a34242b85381924820ec6fe0fe5fd799db8c3103821
3
  size 14244
checkpoint-200/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:74ab2cfe8bb88e0e1613adb80a019b7fd597126800ba6c4a209650193e26a7d9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:653c5d079a71d092b2d513b1b568a861c98ec213041332341725cab9e70ed0ed
3
  size 1064
checkpoint-200/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_metric": 0.7115468382835388,
3
  "best_model_checkpoint": "output/checkpoint-200",
4
- "epoch": 0.20052638175209925,
5
  "eval_steps": 50,
6
  "global_step": 200,
7
  "is_hyper_param_search": false,
@@ -10,76 +10,76 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
- "learning_rate": 1.6666666666666667e-06,
14
- "loss": 1.2477,
15
  "step": 1
16
  },
17
  {
18
- "epoch": 0.05,
19
- "learning_rate": 7.5e-05,
20
- "loss": 0.9677,
21
  "step": 50
22
  },
23
  {
24
- "epoch": 0.05,
25
- "eval_accuracy": 0.6925373134328359,
26
- "eval_loss": 0.6988171339035034,
27
- "eval_runtime": 62.4321,
28
- "eval_samples_per_second": 5.366,
29
- "eval_steps_per_second": 1.073,
30
  "step": 50
31
  },
32
  {
33
- "epoch": 0.1,
34
- "learning_rate": 0.00015833333333333332,
35
- "loss": 0.7171,
36
  "step": 100
37
  },
38
  {
39
- "epoch": 0.1,
40
- "eval_accuracy": 0.6805970149253732,
41
- "eval_loss": 0.7650710344314575,
42
- "eval_runtime": 62.3301,
43
- "eval_samples_per_second": 5.375,
44
- "eval_steps_per_second": 1.075,
45
  "step": 100
46
  },
47
  {
48
- "epoch": 0.15,
49
- "learning_rate": 0.00024166666666666664,
50
- "loss": 0.6936,
51
  "step": 150
52
  },
53
  {
54
- "epoch": 0.15,
55
- "eval_accuracy": 0.6358208955223881,
56
- "eval_loss": 0.9820537567138672,
57
- "eval_runtime": 62.565,
58
- "eval_samples_per_second": 5.354,
59
- "eval_steps_per_second": 1.071,
60
  "step": 150
61
  },
62
  {
63
- "epoch": 0.2,
64
- "learning_rate": 0.00029449204406364746,
65
- "loss": 0.6546,
66
  "step": 200
67
  },
68
  {
69
- "epoch": 0.2,
70
  "eval_accuracy": 0.6865671641791045,
71
- "eval_loss": 0.7115468382835388,
72
- "eval_runtime": 62.8545,
73
- "eval_samples_per_second": 5.33,
74
- "eval_steps_per_second": 1.066,
75
  "step": 200
76
  }
77
  ],
78
  "logging_steps": 50,
79
- "max_steps": 997,
80
  "num_train_epochs": 1,
81
  "save_steps": 100,
82
- "total_flos": 9.64590287374848e+16,
83
  "trial_name": null,
84
  "trial_params": null
85
  }
 
1
  {
2
+ "best_metric": 0.6528732180595398,
3
  "best_model_checkpoint": "output/checkpoint-200",
4
+ "epoch": 0.16043316955780607,
5
  "eval_steps": 50,
6
  "global_step": 200,
7
  "is_hyper_param_search": false,
 
10
  "log_history": [
11
  {
12
  "epoch": 0.0,
13
+ "learning_rate": 1.3333333333333332e-06,
14
+ "loss": 0.785,
15
  "step": 1
16
  },
17
  {
18
+ "epoch": 0.04,
19
+ "learning_rate": 5.9999999999999995e-05,
20
+ "loss": 0.6547,
21
  "step": 50
22
  },
23
  {
24
+ "epoch": 0.04,
25
+ "eval_accuracy": 0.6626865671641791,
26
+ "eval_loss": 0.8300915956497192,
27
+ "eval_runtime": 61.5689,
28
+ "eval_samples_per_second": 5.441,
29
+ "eval_steps_per_second": 1.364,
30
  "step": 50
31
  },
32
  {
33
+ "epoch": 0.08,
34
+ "learning_rate": 0.00012666666666666666,
35
+ "loss": 0.6654,
36
  "step": 100
37
  },
38
  {
39
+ "epoch": 0.08,
40
+ "eval_accuracy": 0.6895522388059702,
41
+ "eval_loss": 0.6663276553153992,
42
+ "eval_runtime": 56.4633,
43
+ "eval_samples_per_second": 5.933,
44
+ "eval_steps_per_second": 1.488,
45
  "step": 100
46
  },
47
  {
48
+ "epoch": 0.12,
49
+ "learning_rate": 0.00019333333333333333,
50
+ "loss": 0.6491,
51
  "step": 150
52
  },
53
  {
54
+ "epoch": 0.12,
55
+ "eval_accuracy": 0.6537313432835821,
56
+ "eval_loss": 0.9172552824020386,
57
+ "eval_runtime": 56.4705,
58
+ "eval_samples_per_second": 5.932,
59
+ "eval_steps_per_second": 1.488,
60
  "step": 150
61
  },
62
  {
63
+ "epoch": 0.16,
64
+ "learning_rate": 0.00026,
65
+ "loss": 0.641,
66
  "step": 200
67
  },
68
  {
69
+ "epoch": 0.16,
70
  "eval_accuracy": 0.6865671641791045,
71
+ "eval_loss": 0.6528732180595398,
72
+ "eval_runtime": 56.4485,
73
+ "eval_samples_per_second": 5.935,
74
+ "eval_steps_per_second": 1.488,
75
  "step": 200
76
  }
77
  ],
78
  "logging_steps": 50,
79
+ "max_steps": 1246,
80
  "num_train_epochs": 1,
81
  "save_steps": 100,
82
+ "total_flos": 7.505235259858944e+16,
83
  "trial_name": null,
84
  "trial_params": null
85
  }
checkpoint-200/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:934593fa9dd04c2539b69e5c067547ac475b57b743c50836b866c620827a4a76
3
  size 4472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e32e120c6d65576e6fe76fdf63d907877e27469586cc746d8cd6a71d07635ea9
3
  size 4472