JessicaOjo commited on
Commit
2e40951
·
verified ·
1 Parent(s): 6b64352

Upload trainer_state.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. trainer_state.json +146 -0
trainer_state.json ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_global_step": 1632,
3
+ "best_metric": 1.5434752702713013,
4
+ "best_model_checkpoint": "./mt5_base/afri_loss/hau/checkpoint-1632",
5
+ "epoch": 6.0,
6
+ "eval_steps": 500,
7
+ "global_step": 2448,
8
+ "is_hyper_param_search": false,
9
+ "is_local_process_zero": true,
10
+ "is_world_process_zero": true,
11
+ "log_history": [
12
+ {
13
+ "epoch": 1.0,
14
+ "eval_bleu": 11.0428,
15
+ "eval_gen_len": 19.9161,
16
+ "eval_loss": 1.777604103088379,
17
+ "eval_runtime": 30.3524,
18
+ "eval_samples_per_second": 20.427,
19
+ "eval_steps_per_second": 2.57,
20
+ "eval_wer": 0.6495,
21
+ "step": 408
22
+ },
23
+ {
24
+ "epoch": 1.2254901960784315,
25
+ "grad_norm": 3.150831460952759,
26
+ "learning_rate": 4.694240196078432e-05,
27
+ "loss": 4.2043,
28
+ "step": 500
29
+ },
30
+ {
31
+ "epoch": 2.0,
32
+ "eval_bleu": 12.0593,
33
+ "eval_gen_len": 19.9355,
34
+ "eval_loss": 1.6548242568969727,
35
+ "eval_runtime": 27.9266,
36
+ "eval_samples_per_second": 22.201,
37
+ "eval_steps_per_second": 2.793,
38
+ "eval_wer": 0.6258,
39
+ "step": 816
40
+ },
41
+ {
42
+ "epoch": 2.450980392156863,
43
+ "grad_norm": 1.5954734086990356,
44
+ "learning_rate": 4.3878676470588234e-05,
45
+ "loss": 0.6735,
46
+ "step": 1000
47
+ },
48
+ {
49
+ "epoch": 3.0,
50
+ "eval_bleu": 12.2009,
51
+ "eval_gen_len": 19.9355,
52
+ "eval_loss": 1.661535382270813,
53
+ "eval_runtime": 27.9118,
54
+ "eval_samples_per_second": 22.213,
55
+ "eval_steps_per_second": 2.795,
56
+ "eval_wer": 0.628,
57
+ "step": 1224
58
+ },
59
+ {
60
+ "epoch": 3.6764705882352944,
61
+ "grad_norm": 1.46583092212677,
62
+ "learning_rate": 4.081495098039216e-05,
63
+ "loss": 0.5464,
64
+ "step": 1500
65
+ },
66
+ {
67
+ "epoch": 4.0,
68
+ "eval_bleu": 12.3959,
69
+ "eval_gen_len": 19.9339,
70
+ "eval_loss": 1.5434752702713013,
71
+ "eval_runtime": 28.2519,
72
+ "eval_samples_per_second": 21.945,
73
+ "eval_steps_per_second": 2.761,
74
+ "eval_wer": 0.6297,
75
+ "step": 1632
76
+ },
77
+ {
78
+ "epoch": 4.901960784313726,
79
+ "grad_norm": 1.8919347524642944,
80
+ "learning_rate": 3.775122549019608e-05,
81
+ "loss": 0.4793,
82
+ "step": 2000
83
+ },
84
+ {
85
+ "epoch": 5.0,
86
+ "eval_bleu": 12.5111,
87
+ "eval_gen_len": 19.9323,
88
+ "eval_loss": 1.552480697631836,
89
+ "eval_runtime": 27.9429,
90
+ "eval_samples_per_second": 22.188,
91
+ "eval_steps_per_second": 2.791,
92
+ "eval_wer": 0.6303,
93
+ "step": 2040
94
+ },
95
+ {
96
+ "epoch": 6.0,
97
+ "eval_bleu": 12.4327,
98
+ "eval_gen_len": 19.9323,
99
+ "eval_loss": 1.5919784307479858,
100
+ "eval_runtime": 29.0796,
101
+ "eval_samples_per_second": 21.321,
102
+ "eval_steps_per_second": 2.682,
103
+ "eval_wer": 0.6308,
104
+ "step": 2448
105
+ },
106
+ {
107
+ "epoch": 6.0,
108
+ "step": 2448,
109
+ "total_flos": 3428147830947840.0,
110
+ "train_loss": 1.285437066570606,
111
+ "train_runtime": 691.8942,
112
+ "train_samples_per_second": 94.205,
113
+ "train_steps_per_second": 11.794
114
+ }
115
+ ],
116
+ "logging_steps": 500,
117
+ "max_steps": 8160,
118
+ "num_input_tokens_seen": 0,
119
+ "num_train_epochs": 20,
120
+ "save_steps": 50000.0,
121
+ "stateful_callbacks": {
122
+ "EarlyStoppingCallback": {
123
+ "args": {
124
+ "early_stopping_patience": 2,
125
+ "early_stopping_threshold": 0.0
126
+ },
127
+ "attributes": {
128
+ "early_stopping_patience_counter": 2
129
+ }
130
+ },
131
+ "TrainerControl": {
132
+ "args": {
133
+ "should_epoch_stop": false,
134
+ "should_evaluate": false,
135
+ "should_log": false,
136
+ "should_save": true,
137
+ "should_training_stop": true
138
+ },
139
+ "attributes": {}
140
+ }
141
+ },
142
+ "total_flos": 3428147830947840.0,
143
+ "train_batch_size": 8,
144
+ "trial_name": null,
145
+ "trial_params": null
146
+ }