File size: 2,596 Bytes
ce30918
 
 
0e2e36c
ce30918
0e2e36c
ce30918
 
 
 
 
0e2e36c
 
927a8c7
ce30918
 
 
0e2e36c
 
927a8c7
ce30918
 
 
0e2e36c
 
927a8c7
ce30918
 
 
0e2e36c
927a8c7
 
 
 
ce30918
 
 
0e2e36c
 
927a8c7
ce30918
 
 
0e2e36c
 
927a8c7
ce30918
 
 
0e2e36c
927a8c7
 
 
 
ce30918
 
 
0e2e36c
 
927a8c7
ce30918
 
 
0e2e36c
 
927a8c7
0e2e36c
 
 
 
927a8c7
 
 
 
0e2e36c
 
 
 
 
927a8c7
0e2e36c
 
 
 
 
927a8c7
0e2e36c
 
 
 
927a8c7
 
 
 
0e2e36c
 
 
 
 
 
927a8c7
 
 
0e2e36c
ce30918
 
 
0e2e36c
ce30918
 
 
0e2e36c
 
ce30918
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 0.9905660377358491,
  "eval_steps": 10,
  "global_step": 42,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.02,
      "learning_rate": 4.9930094929529506e-05,
      "loss": 1.1016,
      "step": 1
    },
    {
      "epoch": 0.12,
      "learning_rate": 4.827184371610511e-05,
      "loss": 1.0942,
      "step": 5
    },
    {
      "epoch": 0.24,
      "learning_rate": 4.332629679574566e-05,
      "loss": 1.1071,
      "step": 10
    },
    {
      "epoch": 0.24,
      "eval_loss": 1.0860216617584229,
      "eval_runtime": 24.5643,
      "eval_samples_per_second": 5.252,
      "eval_steps_per_second": 1.751,
      "step": 10
    },
    {
      "epoch": 0.35,
      "learning_rate": 3.5847093477938956e-05,
      "loss": 1.0982,
      "step": 15
    },
    {
      "epoch": 0.47,
      "learning_rate": 2.686825233966061e-05,
      "loss": 1.0907,
      "step": 20
    },
    {
      "epoch": 0.47,
      "eval_loss": 1.0737882852554321,
      "eval_runtime": 24.5637,
      "eval_samples_per_second": 5.252,
      "eval_steps_per_second": 1.751,
      "step": 20
    },
    {
      "epoch": 0.59,
      "learning_rate": 1.7631120639727393e-05,
      "loss": 1.0756,
      "step": 25
    },
    {
      "epoch": 0.71,
      "learning_rate": 9.412754953531663e-06,
      "loss": 1.0866,
      "step": 30
    },
    {
      "epoch": 0.71,
      "eval_loss": 1.0668362379074097,
      "eval_runtime": 24.554,
      "eval_samples_per_second": 5.254,
      "eval_steps_per_second": 1.751,
      "step": 30
    },
    {
      "epoch": 0.83,
      "learning_rate": 3.3493649053890326e-06,
      "loss": 1.0749,
      "step": 35
    },
    {
      "epoch": 0.94,
      "learning_rate": 2.7922934437178695e-07,
      "loss": 1.0689,
      "step": 40
    },
    {
      "epoch": 0.94,
      "eval_loss": 1.0651096105575562,
      "eval_runtime": 24.5601,
      "eval_samples_per_second": 5.252,
      "eval_steps_per_second": 1.751,
      "step": 40
    },
    {
      "epoch": 0.99,
      "step": 42,
      "total_flos": 6.447644673468006e+16,
      "train_loss": 1.086761324178605,
      "train_runtime": 3234.0471,
      "train_samples_per_second": 1.572,
      "train_steps_per_second": 0.013
    }
  ],
  "logging_steps": 5,
  "max_steps": 42,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 20,
  "total_flos": 6.447644673468006e+16,
  "train_batch_size": 6,
  "trial_name": null,
  "trial_params": null
}