Krish356 commited on
Commit
39bf4e8
·
verified ·
1 Parent(s): 40d48a2

Training in progress, step 60, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2a731ad90cb1208f24500760746be021e19f1c8a28830d6576caaa8378d6390
3
  size 3380768360
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5b541bb702a004bd26e22a9b25613caa40ba226e1c32db33b163b11a283ee28
3
  size 3380768360
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:57e6fef3b80c3a466d80f26292b1b4075bb303f0144109f1c0bf035a689e9eff
3
- size 1756772889
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:de5abaa4082b22bd4a967f9c79a0482a15f529323f25a7e5d2a3e5d715794ca1
3
+ size 1757617777
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:544245d4a0da8d847252bdfe403e41a1a64fc9c974c22cff9871dfc244f5affc
3
  size 14645
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8568e47b8b6c26e0a7b5540da26be8f44a34e9279c25ee15a34f94896178a6b5
3
  size 14645
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:50733553a6cea7b86768bb775429ef570a01c42581c9a321c702771217ac2eb9
3
  size 1465
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:09d25955d4e2674e5dd5eed0c89d1bc41de0292bbbb39829ac908ea95d7cec10
3
  size 1465
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.313588850174216,
6
  "eval_steps": 30,
7
- "global_step": 30,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -65,6 +65,56 @@
65
  "eval_samples_per_second": 0.292,
66
  "eval_steps_per_second": 0.073,
67
  "step": 30
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
68
  }
69
  ],
70
  "logging_steps": 5,
@@ -84,7 +134,7 @@
84
  "attributes": {}
85
  }
86
  },
87
- "total_flos": 2.0997070578499584e+17,
88
  "train_batch_size": 8,
89
  "trial_name": null,
90
  "trial_params": null
 
2
  "best_global_step": null,
3
  "best_metric": null,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.627177700348432,
6
  "eval_steps": 30,
7
+ "global_step": 60,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
65
  "eval_samples_per_second": 0.292,
66
  "eval_steps_per_second": 0.073,
67
  "step": 30
68
+ },
69
+ {
70
+ "epoch": 0.36585365853658536,
71
+ "grad_norm": 0.08448098599910736,
72
+ "learning_rate": 9.451192254041758e-05,
73
+ "loss": 0.285,
74
+ "step": 35
75
+ },
76
+ {
77
+ "epoch": 0.4181184668989547,
78
+ "grad_norm": 0.0991150364279747,
79
+ "learning_rate": 9.243221287473756e-05,
80
+ "loss": 0.3197,
81
+ "step": 40
82
+ },
83
+ {
84
+ "epoch": 0.47038327526132406,
85
+ "grad_norm": 0.17393162846565247,
86
+ "learning_rate": 9.005005472346924e-05,
87
+ "loss": 0.3749,
88
+ "step": 45
89
+ },
90
+ {
91
+ "epoch": 0.5226480836236934,
92
+ "grad_norm": 0.0953838899731636,
93
+ "learning_rate": 8.738242764239046e-05,
94
+ "loss": 0.2699,
95
+ "step": 50
96
+ },
97
+ {
98
+ "epoch": 0.5749128919860628,
99
+ "grad_norm": 0.07958400994539261,
100
+ "learning_rate": 8.444834595378434e-05,
101
+ "loss": 0.2421,
102
+ "step": 55
103
+ },
104
+ {
105
+ "epoch": 0.627177700348432,
106
+ "grad_norm": 0.08708363026380539,
107
+ "learning_rate": 8.126872321608184e-05,
108
+ "loss": 0.2645,
109
+ "step": 60
110
+ },
111
+ {
112
+ "epoch": 0.627177700348432,
113
+ "eval_loss": 0.26565828919410706,
114
+ "eval_runtime": 1749.0443,
115
+ "eval_samples_per_second": 0.292,
116
+ "eval_steps_per_second": 0.073,
117
+ "step": 60
118
  }
119
  ],
120
  "logging_steps": 5,
 
134
  "attributes": {}
135
  }
136
  },
137
+ "total_flos": 4.066496537929974e+17,
138
  "train_batch_size": 8,
139
  "trial_name": null,
140
  "trial_params": null