File size: 1,547 Bytes
9754a88
 
 
d0b8347
9754a88
96523bd
9754a88
 
 
 
 
d0b8347
96523bd
d0b8347
9754a88
 
 
d0b8347
96523bd
d0b8347
9754a88
 
 
d0b8347
 
 
 
 
 
 
 
 
96523bd
d0b8347
9754a88
 
 
d0b8347
96523bd
d0b8347
9754a88
 
 
d0b8347
 
 
 
 
9754a88
 
 
d0b8347
96523bd
d0b8347
 
 
 
 
9754a88
 
 
96523bd
9754a88
d0b8347
9754a88
d0b8347
9754a88
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.92,
  "eval_steps": 500,
  "global_step": 18,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.11,
      "learning_rate": 0.0001,
      "loss": 2.1556,
      "step": 1
    },
    {
      "epoch": 0.53,
      "learning_rate": 0.00018314696123025454,
      "loss": 1.6966,
      "step": 5
    },
    {
      "epoch": 0.96,
      "eval_loss": 0.4002748727798462,
      "eval_runtime": 5.8667,
      "eval_samples_per_second": 17.045,
      "eval_steps_per_second": 2.216,
      "step": 9
    },
    {
      "epoch": 1.07,
      "learning_rate": 0.0001,
      "loss": 0.5695,
      "step": 10
    },
    {
      "epoch": 1.6,
      "learning_rate": 1.6853038769745467e-05,
      "loss": 0.3419,
      "step": 15
    },
    {
      "epoch": 1.92,
      "eval_loss": 0.3410404324531555,
      "eval_runtime": 7.4319,
      "eval_samples_per_second": 13.456,
      "eval_steps_per_second": 1.749,
      "step": 18
    },
    {
      "epoch": 1.92,
      "step": 18,
      "total_flos": 27578625622016.0,
      "train_loss": 0.8035296764638689,
      "train_runtime": 289.9665,
      "train_samples_per_second": 2.069,
      "train_steps_per_second": 0.062
    }
  ],
  "logging_steps": 5,
  "max_steps": 18,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 2,
  "save_steps": 500,
  "total_flos": 27578625622016.0,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}