bogdancazan commited on
Commit
c09fca4
1 Parent(s): 6700421

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +87 -0
README.md ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ training_args = TrainingArguments(
2
+ output_dir='pegasus-base-wikilarge-newsela-with-domain-adaptation',
3
+ num_train_epochs=20,
4
+ warmup_steps=250,
5
+ per_device_train_batch_size=BATCH_SIZE,
6
+ weight_decay=0.01,
7
+ learning_rate=2e-4,
8
+ # fp16=True,
9
+ optim="adafactor",
10
+ )
11
+
12
+ Step Training Loss
13
+ 500 4.391800
14
+ 1000 3.994400
15
+ 1500 3.009300
16
+ 2000 2.596300
17
+ 2500 2.389600
18
+ 3000 2.328100
19
+ 3500 2.272900
20
+ 4000 2.125300
21
+ 4500 2.019500
22
+ 5000 2.005500
23
+ 5500 1.994400
24
+ 6000 1.853300
25
+ 6500 1.838700
26
+ 7000 1.840200
27
+ 7500 1.822700
28
+ 8000 1.716600
29
+ 8500 1.734100
30
+ 9000 1.739500
31
+ 9500 1.696000
32
+ 10000 1.661700
33
+ 10500 1.672500
34
+ 11000 1.666800
35
+ 11500 1.617700
36
+ 12000 1.611400
37
+ 12500 1.616300
38
+ 13000 1.625800
39
+ 13500 1.567700
40
+ 14000 1.584600
41
+ 14500 1.589800
42
+ 15000 1.574600
43
+ 15500 1.548300
44
+ 16000 1.559800
45
+ 16500 1.562100
46
+ 17000 1.541600
47
+ 17500 1.533500
48
+ 18000 1.538400
49
+ 18500 1.545700
50
+ 19000 1.510100
51
+ 19500 1.522600
52
+ 20000 1.529100
53
+ 20500 1.520900
54
+ 21000 1.501100
55
+ 21500 1.508400
56
+ 22000 1.515400
57
+ 22500 1.500100
58
+ 23000 1.496700
59
+ 23500 1.495900
60
+ 24000 1.505200
61
+ 24500 1.484400
62
+ 25000 1.483000
63
+ 25500 1.491200
64
+ 26000 1.491900
65
+ 26500 1.468600
66
+ 27000 1.479800
67
+ 27500 1.479600
68
+ 28000 1.474400
69
+ 28500 1.466000
70
+ 29000 1.471700
71
+ 29500 1.473100
72
+ 30000 1.463900
73
+ 30500 1.459300
74
+ 31000 1.463200
75
+ 31500 1.466800
76
+ 32000 1.455300
77
+ 32500 1.458600
78
+ 33000 1.455200
79
+ 33500 1.455500
80
+ 34000 1.449100
81
+ 34500 1.451400
82
+ 35000 1.451200
83
+ 35500 1.449000
84
+ 36000 1.443700
85
+ 36500 1.447600
86
+ 37000 1.447300
87
+ TrainOutput(global_step=37100, training_loss=1.7110925547967382, metrics={'train_runtime': 33376.534, 'train_samples_per_second': 8.889, 'train_steps_per_second': 1.112, 'total_flos': 0.0, 'train_loss': 1.7110925547967382, 'epoch': 20.0})