File size: 1,560 Bytes
5250b95
 
 
bf97c39
5250b95
bf97c39
5250b95
 
 
 
240c2ee
a4ce4c7
f7507b0
 
f2a9eda
240c2ee
 
a4ce4c7
f7507b0
 
f2a9eda
053f8fb
 
a4ce4c7
f7507b0
 
f2a9eda
053f8fb
 
a4ce4c7
f7507b0
 
f2a9eda
053f8fb
 
bf97c39
 
f7507b0
bf97c39
 
 
 
 
f7507b0
bf97c39
 
 
 
 
f7507b0
bf97c39
 
 
 
 
f7507b0
bf97c39
 
 
 
 
5250b95
f7507b0
 
 
 
5250b95
 
240c2ee
bf97c39
5250b95
bf97c39
5250b95
 
a4ce4c7
5250b95
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 20.0,
  "eval_steps": 1000,
  "global_step": 200,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 2.5,
      "learning_rate": 4.425e-07,
      "loss": 9.0808,
      "step": 25
    },
    {
      "epoch": 5.0,
      "learning_rate": 3.7999999999999996e-07,
      "loss": 8.0507,
      "step": 50
    },
    {
      "epoch": 7.5,
      "learning_rate": 3.175e-07,
      "loss": 7.5948,
      "step": 75
    },
    {
      "epoch": 10.0,
      "learning_rate": 2.55e-07,
      "loss": 7.2855,
      "step": 100
    },
    {
      "epoch": 12.5,
      "learning_rate": 1.9249999999999998e-07,
      "loss": 7.1565,
      "step": 125
    },
    {
      "epoch": 15.0,
      "learning_rate": 1.3e-07,
      "loss": 7.0459,
      "step": 150
    },
    {
      "epoch": 17.5,
      "learning_rate": 6.75e-08,
      "loss": 7.0015,
      "step": 175
    },
    {
      "epoch": 20.0,
      "learning_rate": 5e-09,
      "loss": 6.9769,
      "step": 200
    },
    {
      "epoch": 20.0,
      "step": 200,
      "total_flos": 0.0,
      "train_loss": 7.524070739746094,
      "train_runtime": 153.2004,
      "train_samples_per_second": 1.305,
      "train_steps_per_second": 1.305
    }
  ],
  "logging_steps": 25,
  "max_steps": 200,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 20,
  "save_steps": 500,
  "total_flos": 0.0,
  "train_batch_size": 1,
  "trial_name": null,
  "trial_params": null
}