pefanis27 commited on
Commit
cb82d9a
·
verified ·
1 Parent(s): 2220e12

phi-3.5-new

Browse files
adapter_config.json CHANGED
@@ -23,8 +23,8 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
- "gate_up_proj",
27
  "down_proj",
 
28
  "qkv_proj",
29
  "o_proj"
30
  ],
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
26
  "down_proj",
27
+ "gate_up_proj",
28
  "qkv_proj",
29
  "o_proj"
30
  ],
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:263a69bdb9de370405091d8f38a7af188f5c8271576941714e6e32907ca8a968
3
  size 50365768
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4304b28412f7f55d6290ffd8d0f78abe64343826c049368281ccf51c7c170087
3
  size 50365768
all_results.json CHANGED
@@ -1,12 +1,12 @@
1
  {
2
- "epoch": 6.9523809523809526,
3
- "eval_loss": 1.0488358736038208,
4
- "eval_runtime": 3.908,
5
- "eval_samples_per_second": 4.35,
6
- "eval_steps_per_second": 2.303,
7
- "total_flos": 6666636405768192.0,
8
- "train_loss": 1.0272208958455962,
9
- "train_runtime": 456.6289,
10
- "train_samples_per_second": 1.818,
11
- "train_steps_per_second": 0.219
12
  }
 
1
  {
2
+ "epoch": 8.952380952380953,
3
+ "eval_loss": 1.0497090816497803,
4
+ "eval_runtime": 3.9138,
5
+ "eval_samples_per_second": 4.344,
6
+ "eval_steps_per_second": 2.3,
7
+ "total_flos": 8571389664559104.0,
8
+ "train_loss": 0.9873519552514908,
9
+ "train_runtime": 598.1578,
10
+ "train_samples_per_second": 1.388,
11
+ "train_steps_per_second": 0.167
12
  }
eval_results.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "epoch": 6.9523809523809526,
3
- "eval_loss": 1.0488358736038208,
4
- "eval_runtime": 3.908,
5
- "eval_samples_per_second": 4.35,
6
- "eval_steps_per_second": 2.303
7
  }
 
1
  {
2
+ "epoch": 8.952380952380953,
3
+ "eval_loss": 1.0497090816497803,
4
+ "eval_runtime": 3.9138,
5
+ "eval_samples_per_second": 4.344,
6
+ "eval_steps_per_second": 2.3
7
  }
runs/Jan27_12-23-32_dmlab/events.out.tfevents.1737973412.dmlab.10950.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f951710b7e4600dde972ecf56a65162cf3d95d1d42a91f7ba8c6568555f3fd0
3
+ size 12921
runs/Jan27_12-23-32_dmlab/events.out.tfevents.1737974014.dmlab.10950.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08d71714ffcca237e8a8ef12feeeec01044fb1858c12f6899b27de42418385e8
3
+ size 354
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 6.9523809523809526,
3
- "total_flos": 6666636405768192.0,
4
- "train_loss": 1.0272208958455962,
5
- "train_runtime": 456.6289,
6
- "train_samples_per_second": 1.818,
7
- "train_steps_per_second": 0.219
8
  }
 
1
  {
2
+ "epoch": 8.952380952380953,
3
+ "total_flos": 8571389664559104.0,
4
+ "train_loss": 0.9873519552514908,
5
+ "train_runtime": 598.1578,
6
+ "train_samples_per_second": 1.388,
7
+ "train_steps_per_second": 0.167
8
  }
trainer_state.json CHANGED
@@ -1,126 +1,156 @@
1
  {
2
- "best_metric": 1.0488358736038208,
3
- "best_model_checkpoint": "/home/labuser/Documents/phi-3/phi-3.5-new/checkpoint-42",
4
- "epoch": 6.9523809523809526,
5
  "eval_steps": 500,
6
- "global_step": 73,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.9523809523809523,
13
- "grad_norm": 2.678637981414795,
14
  "learning_rate": 0.0001,
15
- "loss": 1.5168,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.9523809523809523,
20
- "eval_loss": 1.3078712224960327,
21
- "eval_runtime": 3.9056,
22
- "eval_samples_per_second": 4.353,
23
- "eval_steps_per_second": 2.304,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 2.0,
28
- "grad_norm": 2.171729564666748,
29
  "learning_rate": 9.635919272833938e-05,
30
- "loss": 1.0953,
31
  "step": 21
32
  },
33
  {
34
  "epoch": 2.0,
35
- "eval_loss": 1.1388882398605347,
36
- "eval_runtime": 3.9103,
37
- "eval_samples_per_second": 4.348,
38
- "eval_steps_per_second": 2.302,
39
  "step": 21
40
  },
41
  {
42
  "epoch": 2.9523809523809526,
43
- "grad_norm": 2.044914960861206,
44
  "learning_rate": 8.715724127386972e-05,
45
- "loss": 1.0287,
46
  "step": 31
47
  },
48
  {
49
  "epoch": 2.9523809523809526,
50
- "eval_loss": 1.0680148601531982,
51
- "eval_runtime": 3.9097,
52
- "eval_samples_per_second": 4.348,
53
- "eval_steps_per_second": 2.302,
54
  "step": 31
55
  },
56
  {
57
  "epoch": 4.0,
58
- "grad_norm": 3.0813491344451904,
59
  "learning_rate": 7.191855733945387e-05,
60
- "loss": 0.8858,
61
  "step": 42
62
  },
63
  {
64
  "epoch": 4.0,
65
- "eval_loss": 1.0488358736038208,
66
- "eval_runtime": 3.9065,
67
- "eval_samples_per_second": 4.352,
68
- "eval_steps_per_second": 2.304,
69
  "step": 42
70
  },
71
  {
72
  "epoch": 4.9523809523809526,
73
- "grad_norm": 2.399099349975586,
74
  "learning_rate": 5.522642316338268e-05,
75
- "loss": 0.9442,
76
  "step": 52
77
  },
78
  {
79
  "epoch": 4.9523809523809526,
80
- "eval_loss": 1.0551538467407227,
81
- "eval_runtime": 3.9027,
82
- "eval_samples_per_second": 4.356,
83
- "eval_steps_per_second": 2.306,
84
  "step": 52
85
  },
86
  {
87
  "epoch": 6.0,
88
- "grad_norm": 2.558990240097046,
89
  "learning_rate": 3.6218132209150045e-05,
90
- "loss": 0.8393,
91
  "step": 63
92
  },
93
  {
94
  "epoch": 6.0,
95
- "eval_loss": 1.0510764122009277,
96
- "eval_runtime": 3.9324,
97
- "eval_samples_per_second": 4.323,
98
- "eval_steps_per_second": 2.289,
99
  "step": 63
100
  },
101
  {
102
  "epoch": 6.9523809523809526,
103
- "grad_norm": 2.0949923992156982,
104
  "learning_rate": 2.061073738537635e-05,
105
- "loss": 0.9065,
106
  "step": 73
107
  },
108
  {
109
  "epoch": 6.9523809523809526,
110
- "eval_loss": 1.0621706247329712,
111
- "eval_runtime": 3.9116,
112
- "eval_samples_per_second": 4.346,
113
- "eval_steps_per_second": 2.301,
114
  "step": 73
115
  },
116
  {
117
- "epoch": 6.9523809523809526,
118
- "step": 73,
119
- "total_flos": 6666636405768192.0,
120
- "train_loss": 1.0272208958455962,
121
- "train_runtime": 456.6289,
122
- "train_samples_per_second": 1.818,
123
- "train_steps_per_second": 0.219
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
124
  }
125
  ],
126
  "logging_steps": 500,
@@ -149,7 +179,7 @@
149
  "attributes": {}
150
  }
151
  },
152
- "total_flos": 6666636405768192.0,
153
  "train_batch_size": 2,
154
  "trial_name": null,
155
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.0497090816497803,
3
+ "best_model_checkpoint": "/home/labuser/Documents/phi-3/phi-3.5-new/checkpoint-63",
4
+ "epoch": 8.952380952380953,
5
  "eval_steps": 500,
6
+ "global_step": 94,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.9523809523809523,
13
+ "grad_norm": 2.605168342590332,
14
  "learning_rate": 0.0001,
15
+ "loss": 1.515,
16
  "step": 10
17
  },
18
  {
19
  "epoch": 0.9523809523809523,
20
+ "eval_loss": 1.310670256614685,
21
+ "eval_runtime": 3.9865,
22
+ "eval_samples_per_second": 4.264,
23
+ "eval_steps_per_second": 2.258,
24
  "step": 10
25
  },
26
  {
27
  "epoch": 2.0,
28
+ "grad_norm": 2.2956364154815674,
29
  "learning_rate": 9.635919272833938e-05,
30
+ "loss": 1.0908,
31
  "step": 21
32
  },
33
  {
34
  "epoch": 2.0,
35
+ "eval_loss": 1.1376752853393555,
36
+ "eval_runtime": 3.9866,
37
+ "eval_samples_per_second": 4.264,
38
+ "eval_steps_per_second": 2.258,
39
  "step": 21
40
  },
41
  {
42
  "epoch": 2.9523809523809526,
43
+ "grad_norm": 1.952481746673584,
44
  "learning_rate": 8.715724127386972e-05,
45
+ "loss": 1.0257,
46
  "step": 31
47
  },
48
  {
49
  "epoch": 2.9523809523809526,
50
+ "eval_loss": 1.0673834085464478,
51
+ "eval_runtime": 4.0023,
52
+ "eval_samples_per_second": 4.248,
53
+ "eval_steps_per_second": 2.249,
54
  "step": 31
55
  },
56
  {
57
  "epoch": 4.0,
58
+ "grad_norm": 2.819559097290039,
59
  "learning_rate": 7.191855733945387e-05,
60
+ "loss": 0.886,
61
  "step": 42
62
  },
63
  {
64
  "epoch": 4.0,
65
+ "eval_loss": 1.050466537475586,
66
+ "eval_runtime": 4.0072,
67
+ "eval_samples_per_second": 4.242,
68
+ "eval_steps_per_second": 2.246,
69
  "step": 42
70
  },
71
  {
72
  "epoch": 4.9523809523809526,
73
+ "grad_norm": 2.7341201305389404,
74
  "learning_rate": 5.522642316338268e-05,
75
+ "loss": 0.9453,
76
  "step": 52
77
  },
78
  {
79
  "epoch": 4.9523809523809526,
80
+ "eval_loss": 1.0541181564331055,
81
+ "eval_runtime": 4.0101,
82
+ "eval_samples_per_second": 4.239,
83
+ "eval_steps_per_second": 2.244,
84
  "step": 52
85
  },
86
  {
87
  "epoch": 6.0,
88
+ "grad_norm": 2.5818099975585938,
89
  "learning_rate": 3.6218132209150045e-05,
90
+ "loss": 0.8409,
91
  "step": 63
92
  },
93
  {
94
  "epoch": 6.0,
95
+ "eval_loss": 1.0497090816497803,
96
+ "eval_runtime": 3.9956,
97
+ "eval_samples_per_second": 4.255,
98
+ "eval_steps_per_second": 2.252,
99
  "step": 63
100
  },
101
  {
102
  "epoch": 6.9523809523809526,
103
+ "grad_norm": 2.11718487739563,
104
  "learning_rate": 2.061073738537635e-05,
105
+ "loss": 0.9078,
106
  "step": 73
107
  },
108
  {
109
  "epoch": 6.9523809523809526,
110
+ "eval_loss": 1.0589897632598877,
111
+ "eval_runtime": 4.0003,
112
+ "eval_samples_per_second": 4.25,
113
+ "eval_steps_per_second": 2.25,
114
  "step": 73
115
  },
116
  {
117
+ "epoch": 8.0,
118
+ "grad_norm": 2.463954210281372,
119
+ "learning_rate": 7.597595192178702e-06,
120
+ "loss": 0.8154,
121
+ "step": 84
122
+ },
123
+ {
124
+ "epoch": 8.0,
125
+ "eval_loss": 1.0593781471252441,
126
+ "eval_runtime": 3.9931,
127
+ "eval_samples_per_second": 4.257,
128
+ "eval_steps_per_second": 2.254,
129
+ "step": 84
130
+ },
131
+ {
132
+ "epoch": 8.952380952380953,
133
+ "grad_norm": 2.035900831222534,
134
+ "learning_rate": 1.0926199633097157e-06,
135
+ "loss": 0.8909,
136
+ "step": 94
137
+ },
138
+ {
139
+ "epoch": 8.952380952380953,
140
+ "eval_loss": 1.057528018951416,
141
+ "eval_runtime": 3.9065,
142
+ "eval_samples_per_second": 4.352,
143
+ "eval_steps_per_second": 2.304,
144
+ "step": 94
145
+ },
146
+ {
147
+ "epoch": 8.952380952380953,
148
+ "step": 94,
149
+ "total_flos": 8571389664559104.0,
150
+ "train_loss": 0.9873519552514908,
151
+ "train_runtime": 598.1578,
152
+ "train_samples_per_second": 1.388,
153
+ "train_steps_per_second": 0.167
154
  }
155
  ],
156
  "logging_steps": 500,
 
179
  "attributes": {}
180
  }
181
  },
182
+ "total_flos": 8571389664559104.0,
183
  "train_batch_size": 2,
184
  "trial_name": null,
185
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:906c3e67879a4a768ad056051772ba574f2f336ffda087c2d37e302a39428848
3
  size 5624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3cc87110c49fdc13ae4a9c637c417bf45b1df7830aa3e7ba4f2e0ca0aadba6ac
3
  size 5624