File size: 1,999 Bytes
dda2c5c
 
 
 
 
 
 
 
 
 
 
 
5210ab6
dda2c5c
5210ab6
dda2c5c
 
 
 
5210ab6
dda2c5c
5210ab6
dda2c5c
 
 
 
5210ab6
dda2c5c
5210ab6
dda2c5c
 
 
 
5210ab6
dda2c5c
5210ab6
dda2c5c
 
 
 
5210ab6
dda2c5c
5210ab6
dda2c5c
 
 
 
5210ab6
dda2c5c
5210ab6
dda2c5c
 
 
 
 
5210ab6
 
 
 
dda2c5c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5210ab6
dda2c5c
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.5555555555555554,
  "eval_steps": 500,
  "global_step": 12,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.5925925925925926,
      "grad_norm": 1.1158123016357422,
      "learning_rate": 0.0001666666666666667,
      "loss": 9.4829,
      "step": 2
    },
    {
      "epoch": 1.1851851851851851,
      "grad_norm": 1.368739366531372,
      "learning_rate": 0.00013333333333333334,
      "loss": 9.144,
      "step": 4
    },
    {
      "epoch": 1.7777777777777777,
      "grad_norm": 2.1182327270507812,
      "learning_rate": 0.0001,
      "loss": 8.7124,
      "step": 6
    },
    {
      "epoch": 2.3703703703703702,
      "grad_norm": 2.3530497550964355,
      "learning_rate": 6.666666666666667e-05,
      "loss": 8.2535,
      "step": 8
    },
    {
      "epoch": 2.962962962962963,
      "grad_norm": 1.8051578998565674,
      "learning_rate": 3.3333333333333335e-05,
      "loss": 7.8896,
      "step": 10
    },
    {
      "epoch": 3.5555555555555554,
      "grad_norm": 1.4535489082336426,
      "learning_rate": 0.0,
      "loss": 7.7924,
      "step": 12
    },
    {
      "epoch": 3.5555555555555554,
      "step": 12,
      "total_flos": 57778513872936.0,
      "train_loss": 8.54578431447347,
      "train_runtime": 54.5704,
      "train_samples_per_second": 3.958,
      "train_steps_per_second": 0.22
    }
  ],
  "logging_steps": 2,
  "max_steps": 12,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 4,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": false,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 57778513872936.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}