frcm commited on
Commit
9aa8ece
1 Parent(s): 2db1992

Upload folder using huggingface_hub

Browse files
adapter_config.json CHANGED
@@ -12,18 +12,18 @@
12
  "lora_dropout": 0.05,
13
  "modules_to_save": null,
14
  "peft_type": "LORA",
15
- "r": 10,
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
- "k_proj",
20
- "q_proj",
21
- "o_proj",
22
  "v_proj",
 
23
  "gate_proj",
 
 
24
  "up_proj",
25
- "lm_head",
26
- "down_proj"
27
  ],
28
  "task_type": "CAUSAL_LM"
29
  }
 
12
  "lora_dropout": 0.05,
13
  "modules_to_save": null,
14
  "peft_type": "LORA",
15
+ "r": 8,
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
 
 
 
19
  "v_proj",
20
+ "lm_head",
21
  "gate_proj",
22
+ "q_proj",
23
+ "k_proj",
24
  "up_proj",
25
+ "down_proj",
26
+ "o_proj"
27
  ],
28
  "task_type": "CAUSAL_LM"
29
  }
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e656ad14e1d76b0391a061c1797fbf326c79c057f1731672e11cb75b6a6e1fa6
3
- size 106462933
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76d801ea8704901b08aee34b31db2a65164205ce78bda431afb1ad2040968879
3
+ size 85202645
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6393c7ed447f7a3f01c6918424051ce72b5a6b770778a57372124a39168a7cfe
3
- size 53769383
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1b1b062a9026cdff22918215afa1e701a5c5018c5d07cd951ec0ec341229660b
3
+ size 43126695
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46da827e165696189c34c2493b211d8adc8abaffc57cfa6ca30d52808eb30a0e
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6ed61af62f500446a510a2dd0027dc95c613e9d4678794729df15c9c15f0581
3
  size 14575
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:08342e596f9ce0d2810a89135af8758741c90d0421d3905ccfac5d85179e1963
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:490ec907d63ee4b432e03215897359bdad5d7a21cb4e01a81d706b7a66b06a7e
3
  size 627
trainer_state.json CHANGED
@@ -1,137 +1,83 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.9446693657219973,
5
  "eval_steps": 50,
6
- "global_step": 700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.07,
13
- "eval_loss": 1.4132792949676514,
14
- "eval_runtime": 24.6579,
15
- "eval_samples_per_second": 4.056,
16
- "eval_steps_per_second": 0.527,
17
  "step": 50
18
  },
19
  {
20
- "epoch": 0.13,
21
- "eval_loss": 1.3802086114883423,
22
- "eval_runtime": 24.6958,
23
- "eval_samples_per_second": 4.049,
24
- "eval_steps_per_second": 0.526,
25
  "step": 100
26
  },
27
  {
28
- "epoch": 0.2,
29
- "eval_loss": 1.3620883226394653,
30
- "eval_runtime": 24.6721,
31
- "eval_samples_per_second": 4.053,
32
- "eval_steps_per_second": 0.527,
33
  "step": 150
34
  },
35
  {
36
- "epoch": 0.27,
37
- "eval_loss": 1.34859037399292,
38
- "eval_runtime": 24.7313,
39
- "eval_samples_per_second": 4.043,
40
- "eval_steps_per_second": 0.526,
41
  "step": 200
42
  },
43
  {
44
- "epoch": 0.34,
45
- "eval_loss": 1.3401516675949097,
46
- "eval_runtime": 24.6981,
47
- "eval_samples_per_second": 4.049,
48
- "eval_steps_per_second": 0.526,
49
  "step": 250
50
  },
51
  {
52
- "epoch": 0.4,
53
- "eval_loss": 1.3350552320480347,
54
- "eval_runtime": 24.7526,
55
- "eval_samples_per_second": 4.04,
56
- "eval_steps_per_second": 0.525,
57
  "step": 300
58
  },
59
  {
60
- "epoch": 0.47,
61
- "eval_loss": 1.3288078308105469,
62
- "eval_runtime": 24.7114,
63
- "eval_samples_per_second": 4.047,
64
- "eval_steps_per_second": 0.526,
65
  "step": 350
66
  },
67
  {
68
- "epoch": 0.54,
69
- "eval_loss": 1.3192832469940186,
70
- "eval_runtime": 24.7246,
71
- "eval_samples_per_second": 4.045,
72
- "eval_steps_per_second": 0.526,
73
  "step": 400
74
- },
75
- {
76
- "epoch": 0.61,
77
- "eval_loss": 1.3144173622131348,
78
- "eval_runtime": 24.6861,
79
- "eval_samples_per_second": 4.051,
80
- "eval_steps_per_second": 0.527,
81
- "step": 450
82
- },
83
- {
84
- "epoch": 0.67,
85
- "learning_rate": 7.224606580829757e-06,
86
- "loss": 1.3853,
87
- "step": 500
88
- },
89
- {
90
- "epoch": 0.67,
91
- "eval_loss": 1.3092302083969116,
92
- "eval_runtime": 24.7017,
93
- "eval_samples_per_second": 4.048,
94
- "eval_steps_per_second": 0.526,
95
- "step": 500
96
- },
97
- {
98
- "epoch": 0.74,
99
- "eval_loss": 1.303202509880066,
100
- "eval_runtime": 24.6861,
101
- "eval_samples_per_second": 4.051,
102
- "eval_steps_per_second": 0.527,
103
- "step": 550
104
- },
105
- {
106
- "epoch": 0.81,
107
- "eval_loss": 1.29935884475708,
108
- "eval_runtime": 24.6791,
109
- "eval_samples_per_second": 4.052,
110
- "eval_steps_per_second": 0.527,
111
- "step": 600
112
- },
113
- {
114
- "epoch": 0.88,
115
- "eval_loss": 1.2970906496047974,
116
- "eval_runtime": 24.7065,
117
- "eval_samples_per_second": 4.048,
118
- "eval_steps_per_second": 0.526,
119
- "step": 650
120
- },
121
- {
122
- "epoch": 0.94,
123
- "eval_loss": 1.2959811687469482,
124
- "eval_runtime": 24.7115,
125
- "eval_samples_per_second": 4.047,
126
- "eval_steps_per_second": 0.526,
127
- "step": 700
128
  }
129
  ],
130
  "logging_steps": 500,
131
- "max_steps": 700,
132
- "num_train_epochs": 1,
133
- "save_steps": 50,
134
- "total_flos": 4.49645833728e+16,
135
  "trial_name": null,
136
  "trial_params": null
137
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.9323671497584543,
5
  "eval_steps": 50,
6
+ "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.24,
13
+ "eval_loss": 1.0984270572662354,
14
+ "eval_runtime": 21.1382,
15
+ "eval_samples_per_second": 7.664,
16
+ "eval_steps_per_second": 0.993,
17
  "step": 50
18
  },
19
  {
20
+ "epoch": 0.48,
21
+ "eval_loss": 1.0866656303405762,
22
+ "eval_runtime": 21.1298,
23
+ "eval_samples_per_second": 7.667,
24
+ "eval_steps_per_second": 0.994,
25
  "step": 100
26
  },
27
  {
28
+ "epoch": 0.72,
29
+ "eval_loss": 1.0614020824432373,
30
+ "eval_runtime": 21.1356,
31
+ "eval_samples_per_second": 7.665,
32
+ "eval_steps_per_second": 0.994,
33
  "step": 150
34
  },
35
  {
36
+ "epoch": 0.97,
37
+ "eval_loss": 1.0404683351516724,
38
+ "eval_runtime": 21.1305,
39
+ "eval_samples_per_second": 7.667,
40
+ "eval_steps_per_second": 0.994,
41
  "step": 200
42
  },
43
  {
44
+ "epoch": 1.21,
45
+ "eval_loss": 1.062158465385437,
46
+ "eval_runtime": 21.14,
47
+ "eval_samples_per_second": 7.663,
48
+ "eval_steps_per_second": 0.993,
49
  "step": 250
50
  },
51
  {
52
+ "epoch": 1.45,
53
+ "eval_loss": 1.0509228706359863,
54
+ "eval_runtime": 21.1314,
55
+ "eval_samples_per_second": 7.666,
56
+ "eval_steps_per_second": 0.994,
57
  "step": 300
58
  },
59
  {
60
+ "epoch": 1.69,
61
+ "eval_loss": 1.03497314453125,
62
+ "eval_runtime": 21.1338,
63
+ "eval_samples_per_second": 7.665,
64
+ "eval_steps_per_second": 0.994,
65
  "step": 350
66
  },
67
  {
68
+ "epoch": 1.93,
69
+ "eval_loss": 1.0270304679870605,
70
+ "eval_runtime": 21.1332,
71
+ "eval_samples_per_second": 7.666,
72
+ "eval_steps_per_second": 0.994,
73
  "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  }
75
  ],
76
  "logging_steps": 500,
77
+ "max_steps": 2000,
78
+ "num_train_epochs": 10,
79
+ "save_steps": 100,
80
+ "total_flos": 1.2821409870336e+17,
81
  "trial_name": null,
82
  "trial_params": null
83
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:127d5f0e7e6f41442f9837e46f4f0568cb2b6b2eb071d6ef7a98e02aadc0d78d
3
  size 4027
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a9b20d48ed28f5d1fcc2ffbef76d52e9379516adac2d305ede51ee83eb93f99
3
  size 4027