satyanshu404 commited on
Commit
30477d9
1 Parent(s): 207e0e5

End of training

Browse files
README.md CHANGED
@@ -16,8 +16,6 @@ should probably proofread and complete it, then remove this comment. -->
16
  # Phi-3-mini-4k-instruct-finetuned
17
 
18
  This model was trained from scratch on the None dataset.
19
- It achieves the following results on the evaluation set:
20
- - Loss: 12.3823
21
 
22
  ## Model description
23
 
 
16
  # Phi-3-mini-4k-instruct-finetuned
17
 
18
  This model was trained from scratch on the None dataset.
 
 
19
 
20
  ## Model description
21
 
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dc9abd601ea3eed093e3d6e130e4949fdcd10a5b2d3d7c67bd46540dbc988775
3
  size 25183064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ce0e58ae3d344005aa6b6ae4344ecc7d3f4c1359c6bce7db18bcdf22b8837db1
3
  size 25183064
all_results.json CHANGED
@@ -5,9 +5,9 @@
5
  "eval_samples": 145,
6
  "eval_samples_per_second": 39.318,
7
  "eval_steps_per_second": 19.795,
8
- "total_flos": 1968746702340096.0,
9
- "train_loss": 12.81857436607624,
10
- "train_runtime": 41.5298,
11
- "train_samples_per_second": 13.942,
12
- "train_steps_per_second": 6.983
13
  }
 
5
  "eval_samples": 145,
6
  "eval_samples_per_second": 39.318,
7
  "eval_steps_per_second": 19.795,
8
+ "total_flos": 8592765192732672.0,
9
+ "train_loss": 10.40311779153758,
10
+ "train_runtime": 167.4524,
11
+ "train_samples_per_second": 3.458,
12
+ "train_steps_per_second": 1.732
13
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "total_flos": 1968746702340096.0,
4
- "train_loss": 12.81857436607624,
5
- "train_runtime": 41.5298,
6
- "train_samples_per_second": 13.942,
7
- "train_steps_per_second": 6.983
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "total_flos": 8592765192732672.0,
4
+ "train_loss": 10.40311779153758,
5
+ "train_runtime": 167.4524,
6
+ "train_samples_per_second": 3.458,
7
+ "train_steps_per_second": 1.732
8
  }
trainer_state.json CHANGED
@@ -11,95 +11,95 @@
11
  {
12
  "epoch": 0.07,
13
  "learning_rate": 1.724137931034483e-06,
14
- "loss": 13.6309,
15
  "step": 20
16
  },
17
  {
18
  "epoch": 0.14,
19
  "learning_rate": 3.448275862068966e-06,
20
- "loss": 13.5642,
21
  "step": 40
22
  },
23
  {
24
  "epoch": 0.21,
25
  "learning_rate": 4.999083215558211e-06,
26
- "loss": 13.3984,
27
  "step": 60
28
  },
29
  {
30
  "epoch": 0.28,
31
  "learning_rate": 4.88988035667903e-06,
32
- "loss": 13.3684,
33
  "step": 80
34
  },
35
  {
36
  "epoch": 0.34,
37
  "learning_rate": 4.606455184041623e-06,
38
- "loss": 12.9849,
39
  "step": 100
40
  },
41
  {
42
  "epoch": 0.41,
43
  "learning_rate": 4.169469396971739e-06,
44
- "loss": 12.9424,
45
  "step": 120
46
  },
47
  {
48
  "epoch": 0.48,
49
  "learning_rate": 3.6107792658847597e-06,
50
- "loss": 12.7222,
51
  "step": 140
52
  },
53
  {
54
  "epoch": 0.55,
55
  "learning_rate": 2.971113309695796e-06,
56
- "loss": 12.615,
57
  "step": 160
58
  },
59
  {
60
  "epoch": 0.62,
61
  "learning_rate": 2.2971031861814225e-06,
62
- "loss": 12.4667,
63
  "step": 180
64
  },
65
  {
66
  "epoch": 0.69,
67
  "learning_rate": 1.6378842434300746e-06,
68
- "loss": 12.3938,
69
  "step": 200
70
  },
71
  {
72
  "epoch": 0.76,
73
  "learning_rate": 1.041513552231265e-06,
74
- "loss": 12.3768,
75
  "step": 220
76
  },
77
  {
78
  "epoch": 0.83,
79
  "learning_rate": 5.51466544896021e-07,
80
- "loss": 12.3142,
81
  "step": 240
82
  },
83
  {
84
  "epoch": 0.9,
85
  "learning_rate": 2.0346765559094566e-07,
86
- "loss": 12.4309,
87
  "step": 260
88
  },
89
  {
90
  "epoch": 0.97,
91
  "learning_rate": 2.2886008552983064e-08,
92
- "loss": 12.4571,
93
  "step": 280
94
  },
95
  {
96
  "epoch": 1.0,
97
  "step": 290,
98
- "total_flos": 1968746702340096.0,
99
- "train_loss": 12.81857436607624,
100
- "train_runtime": 41.5298,
101
- "train_samples_per_second": 13.942,
102
- "train_steps_per_second": 6.983
103
  }
104
  ],
105
  "logging_steps": 20,
@@ -107,7 +107,7 @@
107
  "num_input_tokens_seen": 0,
108
  "num_train_epochs": 1,
109
  "save_steps": 100,
110
- "total_flos": 1968746702340096.0,
111
  "train_batch_size": 2,
112
  "trial_name": null,
113
  "trial_params": null
 
11
  {
12
  "epoch": 0.07,
13
  "learning_rate": 1.724137931034483e-06,
14
+ "loss": 13.7082,
15
  "step": 20
16
  },
17
  {
18
  "epoch": 0.14,
19
  "learning_rate": 3.448275862068966e-06,
20
+ "loss": 13.6049,
21
  "step": 40
22
  },
23
  {
24
  "epoch": 0.21,
25
  "learning_rate": 4.999083215558211e-06,
26
+ "loss": 13.2358,
27
  "step": 60
28
  },
29
  {
30
  "epoch": 0.28,
31
  "learning_rate": 4.88988035667903e-06,
32
+ "loss": 12.582,
33
  "step": 80
34
  },
35
  {
36
  "epoch": 0.34,
37
  "learning_rate": 4.606455184041623e-06,
38
+ "loss": 11.3961,
39
  "step": 100
40
  },
41
  {
42
  "epoch": 0.41,
43
  "learning_rate": 4.169469396971739e-06,
44
+ "loss": 10.3617,
45
  "step": 120
46
  },
47
  {
48
  "epoch": 0.48,
49
  "learning_rate": 3.6107792658847597e-06,
50
+ "loss": 9.6083,
51
  "step": 140
52
  },
53
  {
54
  "epoch": 0.55,
55
  "learning_rate": 2.971113309695796e-06,
56
+ "loss": 9.3376,
57
  "step": 160
58
  },
59
  {
60
  "epoch": 0.62,
61
  "learning_rate": 2.2971031861814225e-06,
62
+ "loss": 8.9473,
63
  "step": 180
64
  },
65
  {
66
  "epoch": 0.69,
67
  "learning_rate": 1.6378842434300746e-06,
68
+ "loss": 8.776,
69
  "step": 200
70
  },
71
  {
72
  "epoch": 0.76,
73
  "learning_rate": 1.041513552231265e-06,
74
+ "loss": 8.6731,
75
  "step": 220
76
  },
77
  {
78
  "epoch": 0.83,
79
  "learning_rate": 5.51466544896021e-07,
80
+ "loss": 8.6288,
81
  "step": 240
82
  },
83
  {
84
  "epoch": 0.9,
85
  "learning_rate": 2.0346765559094566e-07,
86
+ "loss": 8.7519,
87
  "step": 260
88
  },
89
  {
90
  "epoch": 0.97,
91
  "learning_rate": 2.2886008552983064e-08,
92
+ "loss": 8.7899,
93
  "step": 280
94
  },
95
  {
96
  "epoch": 1.0,
97
  "step": 290,
98
+ "total_flos": 8592765192732672.0,
99
+ "train_loss": 10.40311779153758,
100
+ "train_runtime": 167.4524,
101
+ "train_samples_per_second": 3.458,
102
+ "train_steps_per_second": 1.732
103
  }
104
  ],
105
  "logging_steps": 20,
 
107
  "num_input_tokens_seen": 0,
108
  "num_train_epochs": 1,
109
  "save_steps": 100,
110
+ "total_flos": 8592765192732672.0,
111
  "train_batch_size": 2,
112
  "trial_name": null,
113
  "trial_params": null