File size: 1,722 Bytes
edbc511
 
 
6f3590e
edbc511
6f3590e
edbc511
 
 
 
 
6f3590e
 
 
1b05c1d
 
 
6f3590e
 
 
1b05c1d
 
 
6f3590e
 
 
1b05c1d
 
 
6f3590e
 
 
1b05c1d
 
 
6f3590e
 
 
ae8157c
edbc511
 
6f3590e
 
 
1b05c1d
 
 
6f3590e
 
 
1b05c1d
 
 
6f3590e
 
 
1b05c1d
 
 
6f3590e
 
 
1b05c1d
 
 
6f3590e
 
 
 
 
 
 
edbc511
 
1b05c1d
beaf5b1
 
edbc511
6f3590e
edbc511
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 14.922630560928432,
  "eval_steps": 500,
  "global_step": 960,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 1.55,
      "learning_rate": 4.21875e-05,
      "loss": 4.1586,
      "step": 100
    },
    {
      "epoch": 3.09,
      "learning_rate": 3.4375e-05,
      "loss": 2.0648,
      "step": 200
    },
    {
      "epoch": 4.64,
      "learning_rate": 2.6562500000000002e-05,
      "loss": 1.7655,
      "step": 300
    },
    {
      "epoch": 6.19,
      "learning_rate": 1.8750000000000002e-05,
      "loss": 1.6324,
      "step": 400
    },
    {
      "epoch": 7.74,
      "learning_rate": 1.09375e-05,
      "loss": 1.5631,
      "step": 500
    },
    {
      "epoch": 9.35,
      "learning_rate": 1.8750000000000002e-05,
      "loss": 1.5197,
      "step": 600
    },
    {
      "epoch": 10.9,
      "learning_rate": 1.3541666666666666e-05,
      "loss": 1.4763,
      "step": 700
    },
    {
      "epoch": 12.45,
      "learning_rate": 8.333333333333334e-06,
      "loss": 1.4491,
      "step": 800
    },
    {
      "epoch": 13.99,
      "learning_rate": 3.125e-06,
      "loss": 1.4228,
      "step": 900
    },
    {
      "epoch": 14.92,
      "step": 960,
      "total_flos": 4.8490452612096e+16,
      "train_loss": 0.7002776622772217,
      "train_runtime": 2199.0947,
      "train_samples_per_second": 84.601,
      "train_steps_per_second": 0.437
    }
  ],
  "logging_steps": 100,
  "max_steps": 960,
  "num_train_epochs": 15,
  "save_steps": 500,
  "total_flos": 4.8490452612096e+16,
  "trial_name": null,
  "trial_params": null
}