bogdancazan
commited on
Commit
•
c09fca4
1
Parent(s):
6700421
Create README.md
Browse files
README.md
ADDED
@@ -0,0 +1,87 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
training_args = TrainingArguments(
|
2 |
+
output_dir='pegasus-base-wikilarge-newsela-with-domain-adaptation',
|
3 |
+
num_train_epochs=20,
|
4 |
+
warmup_steps=250,
|
5 |
+
per_device_train_batch_size=BATCH_SIZE,
|
6 |
+
weight_decay=0.01,
|
7 |
+
learning_rate=2e-4,
|
8 |
+
# fp16=True,
|
9 |
+
optim="adafactor",
|
10 |
+
)
|
11 |
+
|
12 |
+
Step Training Loss
|
13 |
+
500 4.391800
|
14 |
+
1000 3.994400
|
15 |
+
1500 3.009300
|
16 |
+
2000 2.596300
|
17 |
+
2500 2.389600
|
18 |
+
3000 2.328100
|
19 |
+
3500 2.272900
|
20 |
+
4000 2.125300
|
21 |
+
4500 2.019500
|
22 |
+
5000 2.005500
|
23 |
+
5500 1.994400
|
24 |
+
6000 1.853300
|
25 |
+
6500 1.838700
|
26 |
+
7000 1.840200
|
27 |
+
7500 1.822700
|
28 |
+
8000 1.716600
|
29 |
+
8500 1.734100
|
30 |
+
9000 1.739500
|
31 |
+
9500 1.696000
|
32 |
+
10000 1.661700
|
33 |
+
10500 1.672500
|
34 |
+
11000 1.666800
|
35 |
+
11500 1.617700
|
36 |
+
12000 1.611400
|
37 |
+
12500 1.616300
|
38 |
+
13000 1.625800
|
39 |
+
13500 1.567700
|
40 |
+
14000 1.584600
|
41 |
+
14500 1.589800
|
42 |
+
15000 1.574600
|
43 |
+
15500 1.548300
|
44 |
+
16000 1.559800
|
45 |
+
16500 1.562100
|
46 |
+
17000 1.541600
|
47 |
+
17500 1.533500
|
48 |
+
18000 1.538400
|
49 |
+
18500 1.545700
|
50 |
+
19000 1.510100
|
51 |
+
19500 1.522600
|
52 |
+
20000 1.529100
|
53 |
+
20500 1.520900
|
54 |
+
21000 1.501100
|
55 |
+
21500 1.508400
|
56 |
+
22000 1.515400
|
57 |
+
22500 1.500100
|
58 |
+
23000 1.496700
|
59 |
+
23500 1.495900
|
60 |
+
24000 1.505200
|
61 |
+
24500 1.484400
|
62 |
+
25000 1.483000
|
63 |
+
25500 1.491200
|
64 |
+
26000 1.491900
|
65 |
+
26500 1.468600
|
66 |
+
27000 1.479800
|
67 |
+
27500 1.479600
|
68 |
+
28000 1.474400
|
69 |
+
28500 1.466000
|
70 |
+
29000 1.471700
|
71 |
+
29500 1.473100
|
72 |
+
30000 1.463900
|
73 |
+
30500 1.459300
|
74 |
+
31000 1.463200
|
75 |
+
31500 1.466800
|
76 |
+
32000 1.455300
|
77 |
+
32500 1.458600
|
78 |
+
33000 1.455200
|
79 |
+
33500 1.455500
|
80 |
+
34000 1.449100
|
81 |
+
34500 1.451400
|
82 |
+
35000 1.451200
|
83 |
+
35500 1.449000
|
84 |
+
36000 1.443700
|
85 |
+
36500 1.447600
|
86 |
+
37000 1.447300
|
87 |
+
TrainOutput(global_step=37100, training_loss=1.7110925547967382, metrics={'train_runtime': 33376.534, 'train_samples_per_second': 8.889, 'train_steps_per_second': 1.112, 'total_flos': 0.0, 'train_loss': 1.7110925547967382, 'epoch': 20.0})
|