bogdancazan
commited on
Commit
•
1a4efb3
1
Parent(s):
1445bdd
Update README.md
Browse files
README.md
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
training_args = TrainingArguments(
|
2 |
-
output_dir='t5-base-newsela-
|
3 |
num_train_epochs=20,
|
4 |
warmup_steps=250,
|
5 |
per_device_train_batch_size=BATCH_SIZE,
|
@@ -10,13 +10,13 @@ training_args = TrainingArguments(
|
|
10 |
)
|
11 |
|
12 |
Step Training Loss
|
13 |
-
500
|
14 |
-
1000
|
15 |
-
1500
|
16 |
-
2000
|
17 |
-
2500
|
18 |
-
3000
|
19 |
-
3500
|
20 |
-
4000
|
21 |
-
4500
|
22 |
-
TrainOutput(global_step=4640, training_loss=
|
|
|
1 |
training_args = TrainingArguments(
|
2 |
+
output_dir='t5-base-wikilarge-newsela-with-domain-adaptation',
|
3 |
num_train_epochs=20,
|
4 |
warmup_steps=250,
|
5 |
per_device_train_batch_size=BATCH_SIZE,
|
|
|
10 |
)
|
11 |
|
12 |
Step Training Loss
|
13 |
+
500 4.184500
|
14 |
+
1000 2.470900
|
15 |
+
1500 2.128900
|
16 |
+
2000 1.951600
|
17 |
+
2500 1.834400
|
18 |
+
3000 1.755800
|
19 |
+
3500 1.701800
|
20 |
+
4000 1.656300
|
21 |
+
4500 1.628800
|
22 |
+
TrainOutput(global_step=4640, training_loss=2.1286644540984057, metrics={'train_runtime': 4090.6694, 'train_samples_per_second': 72.526, 'train_steps_per_second': 1.134, 'total_flos': 0.0, 'train_loss': 2.1286644540984057, 'epoch': 20.0})
|