File size: 2,324 Bytes
9e5bc07
 
 
8f1c71f
9e5bc07
a718b54
9e5bc07
 
 
 
8f1c71f
a718b54
 
 
8f1c71f
 
9e5bc07
a718b54
 
 
9e5bc07
 
 
 
a718b54
 
 
 
 
 
8c429e8
 
a718b54
 
 
 
8c429e8
 
a718b54
 
 
 
8c429e8
 
a718b54
 
 
 
8c429e8
 
8f1c71f
a718b54
 
 
 
 
 
8c429e8
 
a718b54
 
 
 
8c429e8
 
8f1c71f
a718b54
 
 
84c2159
 
a718b54
 
 
 
f18fc82
 
8f1c71f
a718b54
 
 
 
 
 
f18fc82
 
8f1c71f
a718b54
 
 
 
 
 
9e5bc07
 
8f1c71f
a718b54
9e5bc07
8f1c71f
9e5bc07
a718b54
8f1c71f
9e5bc07
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.0,
  "eval_steps": 500,
  "global_step": 84,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.36,
      "learning_rate": 7.996491320395433e-06,
      "loss": 459.6986,
      "step": 10
    },
    {
      "epoch": 0.71,
      "learning_rate": 7.582847040957651e-06,
      "loss": 429.225,
      "step": 20
    },
    {
      "epoch": 1.0,
      "eval_f1": 0.0,
      "eval_loss": 329.4008483886719,
      "eval_runtime": 2.9528,
      "eval_samples_per_second": 59.604,
      "eval_steps_per_second": 1.016,
      "step": 28
    },
    {
      "epoch": 1.07,
      "learning_rate": 6.549695958994759e-06,
      "loss": 361.3969,
      "step": 30
    },
    {
      "epoch": 1.43,
      "learning_rate": 5.0756792824610625e-06,
      "loss": 347.9836,
      "step": 40
    },
    {
      "epoch": 1.79,
      "learning_rate": 3.415667885750354e-06,
      "loss": 312.0525,
      "step": 50
    },
    {
      "epoch": 2.0,
      "eval_f1": 0.0,
      "eval_loss": 266.168701171875,
      "eval_runtime": 2.5812,
      "eval_samples_per_second": 68.187,
      "eval_steps_per_second": 1.162,
      "step": 56
    },
    {
      "epoch": 2.14,
      "learning_rate": 1.8566928200840125e-06,
      "loss": 324.8967,
      "step": 60
    },
    {
      "epoch": 2.5,
      "learning_rate": 6.683150371596022e-07,
      "loss": 286.3452,
      "step": 70
    },
    {
      "epoch": 2.86,
      "learning_rate": 5.601585171798051e-08,
      "loss": 280.638,
      "step": 80
    },
    {
      "epoch": 3.0,
      "eval_f1": 0.0,
      "eval_loss": 253.1309051513672,
      "eval_runtime": 2.6388,
      "eval_samples_per_second": 66.696,
      "eval_steps_per_second": 1.137,
      "step": 84
    },
    {
      "epoch": 3.0,
      "step": 84,
      "total_flos": 2697390194688.0,
      "train_loss": 348.7352382114955,
      "train_runtime": 184.625,
      "train_samples_per_second": 14.251,
      "train_steps_per_second": 0.455
    }
  ],
  "logging_steps": 10,
  "max_steps": 84,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 500,
  "total_flos": 2697390194688.0,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}