bogdancazan
commited on
Commit
•
c78f6ba
1
Parent(s):
af89aae
Create README.md
Browse files
README.md
ADDED
@@ -0,0 +1,22 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
training_args = TrainingArguments(
|
2 |
+
output_dir='t5-base-newsela-biendata-with-domain-adaptation',
|
3 |
+
num_train_epochs=20,
|
4 |
+
warmup_steps=250,
|
5 |
+
per_device_train_batch_size=BATCH_SIZE,
|
6 |
+
weight_decay=0.01,
|
7 |
+
learning_rate=2e-4,
|
8 |
+
# fp16=True,
|
9 |
+
optim="adafactor",
|
10 |
+
)
|
11 |
+
|
12 |
+
Step Training Loss
|
13 |
+
500 31.865400
|
14 |
+
1000 37.290400
|
15 |
+
1500 18.697100
|
16 |
+
2000 4.517400
|
17 |
+
2500 2.976500
|
18 |
+
3000 2.558100
|
19 |
+
3500 2.305200
|
20 |
+
4000 2.187100
|
21 |
+
4500 2.125000
|
22 |
+
TrainOutput(global_step=4640, training_loss=11.326794861102925, metrics={'train_runtime': 5055.1704, 'train_samples_per_second': 58.688, 'train_steps_per_second': 0.918, 'total_flos': 0.0, 'train_loss': 11.326794861102925, 'epoch': 20.0})
|