horsbug98 commited on
Commit
e8b02f7
1 Parent(s): 074529d

Upload trainer_state.json

Browse files
Files changed (1) hide show
  1. trainer_state.json +57 -15
trainer_state.json CHANGED
@@ -2,36 +2,78 @@
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 1.0,
5
- "global_step": 1242,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
- "epoch": 0.4,
12
- "learning_rate": 1.7922705314009663e-05,
13
- "loss": 1.5077,
14
  "step": 500
15
  },
16
  {
17
- "epoch": 0.81,
18
- "learning_rate": 5.845410628019324e-06,
19
- "loss": 1.2752,
20
  "step": 1000
21
  },
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
  {
23
  "epoch": 1.0,
24
- "step": 1242,
25
- "total_flos": 2920187229092352.0,
26
- "train_loss": 1.3494737904620824,
27
- "train_runtime": 616.7037,
28
- "train_samples_per_second": 24.162,
29
- "train_steps_per_second": 2.014
30
  }
31
  ],
32
- "max_steps": 1242,
33
  "num_train_epochs": 1,
34
- "total_flos": 2920187229092352.0,
35
  "trial_name": null,
36
  "trial_params": null
37
  }
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 1.0,
5
+ "global_step": 4685,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
  {
11
+ "epoch": 0.11,
12
+ "learning_rate": 2.67982924226254e-05,
13
+ "loss": 2.2761,
14
  "step": 500
15
  },
16
  {
17
+ "epoch": 0.21,
18
+ "learning_rate": 2.35965848452508e-05,
19
+ "loss": 1.6473,
20
  "step": 1000
21
  },
22
+ {
23
+ "epoch": 0.32,
24
+ "learning_rate": 2.0394877267876203e-05,
25
+ "loss": 1.5402,
26
+ "step": 1500
27
+ },
28
+ {
29
+ "epoch": 0.43,
30
+ "learning_rate": 1.7193169690501603e-05,
31
+ "loss": 1.4675,
32
+ "step": 2000
33
+ },
34
+ {
35
+ "epoch": 0.53,
36
+ "learning_rate": 1.3991462113127e-05,
37
+ "loss": 1.4086,
38
+ "step": 2500
39
+ },
40
+ {
41
+ "epoch": 0.64,
42
+ "learning_rate": 1.0789754535752402e-05,
43
+ "loss": 1.3708,
44
+ "step": 3000
45
+ },
46
+ {
47
+ "epoch": 0.75,
48
+ "learning_rate": 7.588046958377802e-06,
49
+ "loss": 1.3538,
50
+ "step": 3500
51
+ },
52
+ {
53
+ "epoch": 0.85,
54
+ "learning_rate": 4.386339381003202e-06,
55
+ "loss": 1.2966,
56
+ "step": 4000
57
+ },
58
+ {
59
+ "epoch": 0.96,
60
+ "learning_rate": 1.1846318036286021e-06,
61
+ "loss": 1.2822,
62
+ "step": 4500
63
+ },
64
  {
65
  "epoch": 1.0,
66
+ "step": 4685,
67
+ "total_flos": 1.1016793857503232e+16,
68
+ "train_loss": 1.5054649426308615,
69
+ "train_runtime": 2424.9282,
70
+ "train_samples_per_second": 23.183,
71
+ "train_steps_per_second": 1.932
72
  }
73
  ],
74
+ "max_steps": 4685,
75
  "num_train_epochs": 1,
76
+ "total_flos": 1.1016793857503232e+16,
77
  "trial_name": null,
78
  "trial_params": null
79
  }