flytech commited on
Commit
f06f27b
1 Parent(s): 1565d64

Training in progress, step 2, checkpoint

Browse files
last-checkpoint/README.md CHANGED
@@ -4,6 +4,42 @@ library_name: peft
4
  ## Training procedure
5
 
6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7
  The following `bitsandbytes` quantization config was used during training:
8
  - quant_method: bitsandbytes
9
  - load_in_8bit: False
@@ -17,5 +53,8 @@ The following `bitsandbytes` quantization config was used during training:
17
  - bnb_4bit_compute_dtype: float16
18
  ### Framework versions
19
 
 
 
 
20
 
21
  - PEFT 0.5.0
 
4
  ## Training procedure
5
 
6
 
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - quant_method: bitsandbytes
9
+ - load_in_8bit: False
10
+ - load_in_4bit: True
11
+ - llm_int8_threshold: 6.0
12
+ - llm_int8_skip_modules: None
13
+ - llm_int8_enable_fp32_cpu_offload: False
14
+ - llm_int8_has_fp16_weight: False
15
+ - bnb_4bit_quant_type: nf4
16
+ - bnb_4bit_use_double_quant: True
17
+ - bnb_4bit_compute_dtype: float16
18
+
19
+ The following `bitsandbytes` quantization config was used during training:
20
+ - quant_method: bitsandbytes
21
+ - load_in_8bit: False
22
+ - load_in_4bit: True
23
+ - llm_int8_threshold: 6.0
24
+ - llm_int8_skip_modules: None
25
+ - llm_int8_enable_fp32_cpu_offload: False
26
+ - llm_int8_has_fp16_weight: False
27
+ - bnb_4bit_quant_type: nf4
28
+ - bnb_4bit_use_double_quant: True
29
+ - bnb_4bit_compute_dtype: float16
30
+
31
+ The following `bitsandbytes` quantization config was used during training:
32
+ - quant_method: bitsandbytes
33
+ - load_in_8bit: False
34
+ - load_in_4bit: True
35
+ - llm_int8_threshold: 6.0
36
+ - llm_int8_skip_modules: None
37
+ - llm_int8_enable_fp32_cpu_offload: False
38
+ - llm_int8_has_fp16_weight: False
39
+ - bnb_4bit_quant_type: nf4
40
+ - bnb_4bit_use_double_quant: True
41
+ - bnb_4bit_compute_dtype: float16
42
+
43
  The following `bitsandbytes` quantization config was used during training:
44
  - quant_method: bitsandbytes
45
  - load_in_8bit: False
 
53
  - bnb_4bit_compute_dtype: float16
54
  ### Framework versions
55
 
56
+ - PEFT 0.5.0
57
+ - PEFT 0.5.0
58
+ - PEFT 0.5.0
59
 
60
  - PEFT 0.5.0
last-checkpoint/adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:36c36f565cee153168e16fab891a20aef7b385171ac78c5722366eb330db1e78
3
  size 40137613
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:894902fc83ce981ce902d325f92c66c93f068f601ec88ba81ca2818c4f4ea82c
3
  size 40137613
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3b3dab902f654a25e94e3ae343233a6f70542e70de8175dda45b60e2be64cde
3
  size 40036040
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f41804539ce4dbc951729526a56d2696d7d2042884d136c4e448563b20767c9c
3
  size 40036040
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04c3113c32b4000e515b8d0f088a79febc7bae6b99372543b69f4d5d6af8d9ee
3
  size 20523679
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5f1842a91acf8bc3388db088cf49775e50c5c216645f7d35ef69df393c50c91
3
  size 20523679
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9f2d7e8bf2ed7515ccab34b515d3863906e4d6e9f597722937aaa40e93ad4ba
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:18aff74b667dba3ffb21fc4e30c6282bcfa51148f5f176894821bf027611df3f
3
  size 14575
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8078c9f184228e33330b036411337dd5e5bec4381d5a61fcfdc2d0a6a6021bc
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:359ea6d423af08e8e9047ba958e9726b4e403615b5c587f71a606d0689d2621a
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,123 +1,32 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.0,
5
- "eval_steps": 25,
6
- "global_step": 200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.25,
13
- "learning_rate": 0.0001,
14
- "loss": 1.6153,
15
- "step": 25
16
  },
17
  {
18
- "epoch": 0.25,
19
- "eval_runtime": 216.6929,
20
- "eval_samples_per_second": 1.846,
21
- "eval_steps_per_second": 0.157,
22
- "step": 25
23
- },
24
- {
25
- "epoch": 0.5,
26
- "learning_rate": 0.0001,
27
- "loss": 0.6312,
28
- "step": 50
29
- },
30
- {
31
- "epoch": 0.5,
32
- "eval_runtime": 217.0379,
33
- "eval_samples_per_second": 1.843,
34
- "eval_steps_per_second": 0.157,
35
- "step": 50
36
- },
37
- {
38
- "epoch": 0.75,
39
- "learning_rate": 0.0001,
40
- "loss": 0.6063,
41
- "step": 75
42
- },
43
- {
44
- "epoch": 0.75,
45
- "eval_runtime": 216.6607,
46
- "eval_samples_per_second": 1.846,
47
- "eval_steps_per_second": 0.157,
48
- "step": 75
49
- },
50
- {
51
- "epoch": 1.0,
52
- "learning_rate": 0.0001,
53
- "loss": 0.5707,
54
- "step": 100
55
- },
56
- {
57
- "epoch": 1.0,
58
- "eval_runtime": 216.7207,
59
- "eval_samples_per_second": 1.846,
60
- "eval_steps_per_second": 0.157,
61
- "step": 100
62
- },
63
- {
64
- "epoch": 1.25,
65
- "learning_rate": 0.0001,
66
- "loss": 0.4976,
67
- "step": 125
68
- },
69
- {
70
- "epoch": 1.25,
71
- "eval_runtime": 216.1906,
72
- "eval_samples_per_second": 1.85,
73
- "eval_steps_per_second": 0.157,
74
- "step": 125
75
- },
76
- {
77
- "epoch": 1.5,
78
- "learning_rate": 0.0001,
79
- "loss": 0.5296,
80
- "step": 150
81
- },
82
- {
83
- "epoch": 1.5,
84
- "eval_runtime": 216.4796,
85
- "eval_samples_per_second": 1.848,
86
- "eval_steps_per_second": 0.157,
87
- "step": 150
88
- },
89
- {
90
- "epoch": 1.75,
91
- "learning_rate": 0.0001,
92
- "loss": 0.5748,
93
- "step": 175
94
- },
95
- {
96
- "epoch": 1.75,
97
- "eval_runtime": 216.7074,
98
- "eval_samples_per_second": 1.846,
99
- "eval_steps_per_second": 0.157,
100
- "step": 175
101
- },
102
- {
103
- "epoch": 2.0,
104
- "learning_rate": 0.0001,
105
- "loss": 0.5471,
106
- "step": 200
107
- },
108
- {
109
- "epoch": 2.0,
110
- "eval_runtime": 216.7005,
111
- "eval_samples_per_second": 1.846,
112
- "eval_steps_per_second": 0.157,
113
- "step": 200
114
  }
115
  ],
116
- "logging_steps": 25,
117
- "max_steps": 200,
118
- "num_train_epochs": 2,
119
- "save_steps": 25,
120
- "total_flos": 3.32202164355072e+16,
121
  "trial_name": null,
122
  "trial_params": null
123
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.025,
5
+ "eval_steps": 2,
6
+ "global_step": 2,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.03,
13
+ "learning_rate": 0.001,
14
+ "loss": 3.1823,
15
+ "step": 2
16
  },
17
  {
18
+ "epoch": 0.03,
19
+ "eval_runtime": 127.9324,
20
+ "eval_samples_per_second": 1.759,
21
+ "eval_steps_per_second": 0.227,
22
+ "step": 2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
  }
24
  ],
25
+ "logging_steps": 2,
26
+ "max_steps": 240,
27
+ "num_train_epochs": 3,
28
+ "save_steps": 2,
29
+ "total_flos": 332202164355072.0,
30
  "trial_name": null,
31
  "trial_params": null
32
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c8c27537b3f48a590e7245cfc358a5c006cb5add7444724e14c4fd4a1f89680
3
- size 4091
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:887525ecb45e22150885f827b7fded85dffa90a11a9d2671d3af44cae52f9aff
3
+ size 4027