File size: 2,589 Bytes
d91a210
 
 
721d7d8
d91a210
721d7d8
d91a210
 
 
 
 
 
721d7d8
 
 
d91a210
 
 
 
721d7d8
 
 
d91a210
 
 
 
721d7d8
 
 
d91a210
 
 
 
721d7d8
 
 
d91a210
 
 
 
721d7d8
 
 
d91a210
 
 
 
721d7d8
 
 
d91a210
 
 
721d7d8
 
 
 
 
d91a210
 
721d7d8
 
 
 
 
d91a210
 
721d7d8
 
 
 
 
d91a210
 
721d7d8
 
 
 
 
d91a210
 
721d7d8
 
 
 
 
d91a210
 
721d7d8
 
d91a210
721d7d8
 
d91a210
 
721d7d8
 
 
 
 
 
 
d91a210
 
721d7d8
 
d91a210
721d7d8
d91a210
721d7d8
d91a210
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 14.4,
  "eval_steps": 500,
  "global_step": 180,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 1.2,
      "grad_norm": 0.8679821491241455,
      "learning_rate": 0.00019868265225415265,
      "loss": 4.9095,
      "step": 15
    },
    {
      "epoch": 2.4,
      "grad_norm": 1.064207911491394,
      "learning_rate": 0.00019075754196709572,
      "loss": 3.2019,
      "step": 30
    },
    {
      "epoch": 3.6,
      "grad_norm": 0.6014457941055298,
      "learning_rate": 0.00017621620551276366,
      "loss": 1.7706,
      "step": 45
    },
    {
      "epoch": 4.8,
      "grad_norm": 0.4954649806022644,
      "learning_rate": 0.00015611870653623825,
      "loss": 1.2936,
      "step": 60
    },
    {
      "epoch": 6.0,
      "grad_norm": 0.418344110250473,
      "learning_rate": 0.000131930153013598,
      "loss": 1.0465,
      "step": 75
    },
    {
      "epoch": 7.2,
      "grad_norm": 0.40778854489326477,
      "learning_rate": 0.00010541389085854176,
      "loss": 0.9186,
      "step": 90
    },
    {
      "epoch": 8.4,
      "grad_norm": 0.3741260766983032,
      "learning_rate": 7.85029559788976e-05,
      "loss": 0.8364,
      "step": 105
    },
    {
      "epoch": 9.6,
      "grad_norm": 0.4041515290737152,
      "learning_rate": 5.3159155930021e-05,
      "loss": 0.7861,
      "step": 120
    },
    {
      "epoch": 10.8,
      "grad_norm": 0.3697131872177124,
      "learning_rate": 3.123005411465766e-05,
      "loss": 0.7488,
      "step": 135
    },
    {
      "epoch": 12.0,
      "grad_norm": 0.4183999001979828,
      "learning_rate": 1.4314282383241096e-05,
      "loss": 0.7274,
      "step": 150
    },
    {
      "epoch": 13.2,
      "grad_norm": 0.4263518154621124,
      "learning_rate": 3.6450007480777093e-06,
      "loss": 0.7245,
      "step": 165
    },
    {
      "epoch": 14.4,
      "grad_norm": 0.3251570165157318,
      "learning_rate": 0.0,
      "loss": 0.7288,
      "step": 180
    },
    {
      "epoch": 14.4,
      "step": 180,
      "total_flos": 116292530995200.0,
      "train_loss": 1.4744000752766928,
      "train_runtime": 81.674,
      "train_samples_per_second": 18.366,
      "train_steps_per_second": 2.204
    }
  ],
  "logging_steps": 15,
  "max_steps": 180,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 15,
  "save_steps": 500,
  "total_flos": 116292530995200.0,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}