sourgum commited on
Commit
4563969
1 Parent(s): a724836

Upload trainer_state.json with huggingface_hub

Browse files
Files changed (1) hide show
  1. trainer_state.json +111 -0
trainer_state.json ADDED
@@ -0,0 +1,111 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 1.0291192531585693,
3
+ "best_model_checkpoint": "savedEpoch_nccl/checkpoint-91656",
4
+ "epoch": 6.0,
5
+ "eval_steps": 500,
6
+ "global_step": 91656,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 1.0,
13
+ "grad_norm": 0.6372342109680176,
14
+ "learning_rate": 9.882461854198269e-05,
15
+ "loss": 1.1441,
16
+ "step": 15276
17
+ },
18
+ {
19
+ "epoch": 1.0,
20
+ "eval_loss": 1.0554957389831543,
21
+ "eval_runtime": 57.5344,
22
+ "eval_samples_per_second": 12743.952,
23
+ "eval_steps_per_second": 33.198,
24
+ "step": 15276
25
+ },
26
+ {
27
+ "epoch": 2.0,
28
+ "grad_norm": 0.6399237513542175,
29
+ "learning_rate": 9.541848844472206e-05,
30
+ "loss": 1.0486,
31
+ "step": 30552
32
+ },
33
+ {
34
+ "epoch": 2.0,
35
+ "eval_loss": 1.0447489023208618,
36
+ "eval_runtime": 240.72,
37
+ "eval_samples_per_second": 3045.924,
38
+ "eval_steps_per_second": 7.935,
39
+ "step": 30552
40
+ },
41
+ {
42
+ "epoch": 3.0,
43
+ "grad_norm": 0.7907009124755859,
44
+ "learning_rate": 9.201235834746142e-05,
45
+ "loss": 1.0407,
46
+ "step": 45828
47
+ },
48
+ {
49
+ "epoch": 3.0,
50
+ "eval_loss": 1.0373780727386475,
51
+ "eval_runtime": 56.906,
52
+ "eval_samples_per_second": 12884.672,
53
+ "eval_steps_per_second": 33.564,
54
+ "step": 45828
55
+ },
56
+ {
57
+ "epoch": 4.0,
58
+ "grad_norm": 0.5956621766090393,
59
+ "learning_rate": 8.860622825020078e-05,
60
+ "loss": 1.0376,
61
+ "step": 61104
62
+ },
63
+ {
64
+ "epoch": 4.0,
65
+ "eval_loss": 1.0327318906784058,
66
+ "eval_runtime": 57.0015,
67
+ "eval_samples_per_second": 12863.086,
68
+ "eval_steps_per_second": 33.508,
69
+ "step": 61104
70
+ },
71
+ {
72
+ "epoch": 5.0,
73
+ "grad_norm": 0.6499078273773193,
74
+ "learning_rate": 8.519965200321228e-05,
75
+ "loss": 1.037,
76
+ "step": 76380
77
+ },
78
+ {
79
+ "epoch": 5.0,
80
+ "eval_loss": 1.0331288576126099,
81
+ "eval_runtime": 57.0146,
82
+ "eval_samples_per_second": 12860.131,
83
+ "eval_steps_per_second": 33.5,
84
+ "step": 76380
85
+ },
86
+ {
87
+ "epoch": 6.0,
88
+ "grad_norm": 0.5220733284950256,
89
+ "learning_rate": 8.179329883108772e-05,
90
+ "loss": 1.0344,
91
+ "step": 91656
92
+ },
93
+ {
94
+ "epoch": 6.0,
95
+ "eval_loss": 1.0291192531585693,
96
+ "eval_runtime": 56.9287,
97
+ "eval_samples_per_second": 12879.541,
98
+ "eval_steps_per_second": 33.551,
99
+ "step": 91656
100
+ }
101
+ ],
102
+ "logging_steps": 500,
103
+ "max_steps": 458280,
104
+ "num_input_tokens_seen": 0,
105
+ "num_train_epochs": 30,
106
+ "save_steps": 500,
107
+ "total_flos": 8.139053392031908e+17,
108
+ "train_batch_size": 96,
109
+ "trial_name": null,
110
+ "trial_params": null
111
+ }