cocolalala
commited on
Commit
•
d5b9649
1
Parent(s):
c1a75b8
Model save
Browse files
all_results.json
CHANGED
@@ -1,9 +1,14 @@
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
|
|
|
|
|
|
|
|
|
|
3 |
"total_flos": 1.5751056572484157e+19,
|
4 |
-
"train_loss": 0.
|
5 |
-
"train_runtime":
|
6 |
"train_samples": 1055292,
|
7 |
-
"train_samples_per_second":
|
8 |
-
"train_steps_per_second": 0.
|
9 |
}
|
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
+
"eval_loss": 0.7983009815216064,
|
4 |
+
"eval_runtime": 7985.9663,
|
5 |
+
"eval_samples": 263823,
|
6 |
+
"eval_samples_per_second": 10.19,
|
7 |
+
"eval_steps_per_second": 0.159,
|
8 |
"total_flos": 1.5751056572484157e+19,
|
9 |
+
"train_loss": 0.013955340304839536,
|
10 |
+
"train_runtime": 11108.4839,
|
11 |
"train_samples": 1055292,
|
12 |
+
"train_samples_per_second": 29.312,
|
13 |
+
"train_steps_per_second": 0.458
|
14 |
}
|
eval_results.json
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 1.0,
|
3 |
+
"eval_loss": 0.7983009815216064,
|
4 |
+
"eval_runtime": 7985.9663,
|
5 |
+
"eval_samples": 263823,
|
6 |
+
"eval_samples_per_second": 10.19,
|
7 |
+
"eval_steps_per_second": 0.159
|
8 |
+
}
|
runs/May25_13-55-16_br1t43-s3-25/events.out.tfevents.1716673878.br1t43-s3-25.187086.1
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9aaa7f8fbb9f75e566fcde30f482fcb7a5578a9d37b3382a6d356ad6e0743613
|
3 |
+
size 359
|
runs/May26_01-54-52_br1t43-s3-25/events.out.tfevents.1716688506.br1t43-s3-25.190932.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:729d2f0c91d57354efb497f12dfb73a29e4347765a3bbc83beab8a36bdf1c070
|
3 |
+
size 9665
|
train_results.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
"total_flos": 1.5751056572484157e+19,
|
4 |
-
"train_loss": 0.
|
5 |
-
"train_runtime":
|
6 |
"train_samples": 1055292,
|
7 |
-
"train_samples_per_second":
|
8 |
-
"train_steps_per_second": 0.
|
9 |
}
|
|
|
1 |
{
|
2 |
"epoch": 1.0,
|
3 |
"total_flos": 1.5751056572484157e+19,
|
4 |
+
"train_loss": 0.013955340304839536,
|
5 |
+
"train_runtime": 11108.4839,
|
6 |
"train_samples": 1055292,
|
7 |
+
"train_samples_per_second": 29.312,
|
8 |
+
"train_steps_per_second": 0.458
|
9 |
}
|
trainer_state.json
CHANGED
@@ -7031,9 +7031,9 @@
|
|
7031 |
},
|
7032 |
{
|
7033 |
"epoch": 0.9856525157232704,
|
7034 |
-
"grad_norm": 0.
|
7035 |
"learning_rate": 1.2539591749821666e-07,
|
7036 |
-
"loss": 0.
|
7037 |
"step": 5015
|
7038 |
},
|
7039 |
{
|
@@ -7047,14 +7047,14 @@
|
|
7047 |
"epoch": 0.9876179245283019,
|
7048 |
"grad_norm": 0.31640625,
|
7049 |
"learning_rate": 9.339893961548551e-08,
|
7050 |
-
"loss": 0.
|
7051 |
"step": 5025
|
7052 |
},
|
7053 |
{
|
7054 |
"epoch": 0.9886006289308176,
|
7055 |
"grad_norm": 0.310546875,
|
7056 |
"learning_rate": 7.916389256541479e-08,
|
7057 |
-
"loss": 0.
|
7058 |
"step": 5030
|
7059 |
},
|
7060 |
{
|
@@ -7082,21 +7082,21 @@
|
|
7082 |
"epoch": 0.9925314465408805,
|
7083 |
"grad_norm": 0.310546875,
|
7084 |
"learning_rate": 3.3983730900377655e-08,
|
7085 |
-
"loss": 0.
|
7086 |
"step": 5050
|
7087 |
},
|
7088 |
{
|
7089 |
"epoch": 0.9935141509433962,
|
7090 |
-
"grad_norm": 0.
|
7091 |
"learning_rate": 2.5629361711809742e-08,
|
7092 |
-
"loss": 0.
|
7093 |
"step": 5055
|
7094 |
},
|
7095 |
{
|
7096 |
"epoch": 0.9944968553459119,
|
7097 |
"grad_norm": 0.30078125,
|
7098 |
"learning_rate": 1.8451478405223653e-08,
|
7099 |
-
"loss": 0.
|
7100 |
"step": 5060
|
7101 |
},
|
7102 |
{
|
@@ -7108,48 +7108,48 @@
|
|
7108 |
},
|
7109 |
{
|
7110 |
"epoch": 0.9964622641509434,
|
7111 |
-
"grad_norm": 0.
|
7112 |
"learning_rate": 7.62549346601249e-09,
|
7113 |
-
"loss": 0.
|
7114 |
"step": 5070
|
7115 |
},
|
7116 |
{
|
7117 |
"epoch": 0.9974449685534591,
|
7118 |
"grad_norm": 0.302734375,
|
7119 |
"learning_rate": 3.977519232223337e-09,
|
7120 |
-
"loss": 0.
|
7121 |
"step": 5075
|
7122 |
},
|
7123 |
{
|
7124 |
"epoch": 0.9984276729559748,
|
7125 |
"grad_norm": 0.302734375,
|
7126 |
"learning_rate": 1.5062856765779565e-09,
|
7127 |
-
"loss": 0.
|
7128 |
"step": 5080
|
7129 |
},
|
7130 |
{
|
7131 |
"epoch": 0.9994103773584906,
|
7132 |
-
"grad_norm": 0.
|
7133 |
"learning_rate": 2.118218802582561e-10,
|
7134 |
"loss": 0.8288,
|
7135 |
"step": 5085
|
7136 |
},
|
7137 |
{
|
7138 |
"epoch": 1.0,
|
7139 |
-
"eval_loss": 0.
|
7140 |
-
"eval_runtime":
|
7141 |
-
"eval_samples_per_second":
|
7142 |
-
"eval_steps_per_second": 0.
|
7143 |
"step": 5088
|
7144 |
},
|
7145 |
{
|
7146 |
"epoch": 1.0,
|
7147 |
"step": 5088,
|
7148 |
"total_flos": 1.5751056572484157e+19,
|
7149 |
-
"train_loss": 0.
|
7150 |
-
"train_runtime":
|
7151 |
-
"train_samples_per_second":
|
7152 |
-
"train_steps_per_second": 0.
|
7153 |
}
|
7154 |
],
|
7155 |
"logging_steps": 5,
|
|
|
7031 |
},
|
7032 |
{
|
7033 |
"epoch": 0.9856525157232704,
|
7034 |
+
"grad_norm": 0.318359375,
|
7035 |
"learning_rate": 1.2539591749821666e-07,
|
7036 |
+
"loss": 0.7974,
|
7037 |
"step": 5015
|
7038 |
},
|
7039 |
{
|
|
|
7047 |
"epoch": 0.9876179245283019,
|
7048 |
"grad_norm": 0.31640625,
|
7049 |
"learning_rate": 9.339893961548551e-08,
|
7050 |
+
"loss": 0.8153,
|
7051 |
"step": 5025
|
7052 |
},
|
7053 |
{
|
7054 |
"epoch": 0.9886006289308176,
|
7055 |
"grad_norm": 0.310546875,
|
7056 |
"learning_rate": 7.916389256541479e-08,
|
7057 |
+
"loss": 0.8146,
|
7058 |
"step": 5030
|
7059 |
},
|
7060 |
{
|
|
|
7082 |
"epoch": 0.9925314465408805,
|
7083 |
"grad_norm": 0.310546875,
|
7084 |
"learning_rate": 3.3983730900377655e-08,
|
7085 |
+
"loss": 0.8008,
|
7086 |
"step": 5050
|
7087 |
},
|
7088 |
{
|
7089 |
"epoch": 0.9935141509433962,
|
7090 |
+
"grad_norm": 0.302734375,
|
7091 |
"learning_rate": 2.5629361711809742e-08,
|
7092 |
+
"loss": 0.8024,
|
7093 |
"step": 5055
|
7094 |
},
|
7095 |
{
|
7096 |
"epoch": 0.9944968553459119,
|
7097 |
"grad_norm": 0.30078125,
|
7098 |
"learning_rate": 1.8451478405223653e-08,
|
7099 |
+
"loss": 0.7952,
|
7100 |
"step": 5060
|
7101 |
},
|
7102 |
{
|
|
|
7108 |
},
|
7109 |
{
|
7110 |
"epoch": 0.9964622641509434,
|
7111 |
+
"grad_norm": 0.306640625,
|
7112 |
"learning_rate": 7.62549346601249e-09,
|
7113 |
+
"loss": 0.8112,
|
7114 |
"step": 5070
|
7115 |
},
|
7116 |
{
|
7117 |
"epoch": 0.9974449685534591,
|
7118 |
"grad_norm": 0.302734375,
|
7119 |
"learning_rate": 3.977519232223337e-09,
|
7120 |
+
"loss": 0.8175,
|
7121 |
"step": 5075
|
7122 |
},
|
7123 |
{
|
7124 |
"epoch": 0.9984276729559748,
|
7125 |
"grad_norm": 0.302734375,
|
7126 |
"learning_rate": 1.5062856765779565e-09,
|
7127 |
+
"loss": 0.8088,
|
7128 |
"step": 5080
|
7129 |
},
|
7130 |
{
|
7131 |
"epoch": 0.9994103773584906,
|
7132 |
+
"grad_norm": 0.310546875,
|
7133 |
"learning_rate": 2.118218802582561e-10,
|
7134 |
"loss": 0.8288,
|
7135 |
"step": 5085
|
7136 |
},
|
7137 |
{
|
7138 |
"epoch": 1.0,
|
7139 |
+
"eval_loss": 0.7983007431030273,
|
7140 |
+
"eval_runtime": 9224.1097,
|
7141 |
+
"eval_samples_per_second": 8.823,
|
7142 |
+
"eval_steps_per_second": 0.138,
|
7143 |
"step": 5088
|
7144 |
},
|
7145 |
{
|
7146 |
"epoch": 1.0,
|
7147 |
"step": 5088,
|
7148 |
"total_flos": 1.5751056572484157e+19,
|
7149 |
+
"train_loss": 0.013955340304839536,
|
7150 |
+
"train_runtime": 11108.4839,
|
7151 |
+
"train_samples_per_second": 29.312,
|
7152 |
+
"train_steps_per_second": 0.458
|
7153 |
}
|
7154 |
],
|
7155 |
"logging_steps": 5,
|