File size: 2,001 Bytes
9b3480b
 
 
 
 
 
 
 
 
 
 
 
c3a108b
9b3480b
c3a108b
9b3480b
 
 
 
c3a108b
9b3480b
c3a108b
9b3480b
 
 
 
c3a108b
9b3480b
c3a108b
9b3480b
 
 
 
c3a108b
9b3480b
c3a108b
9b3480b
 
 
 
c3a108b
9b3480b
c3a108b
9b3480b
 
 
 
c3a108b
9b3480b
c3a108b
9b3480b
 
 
 
 
c3a108b
 
 
 
775f6d0
9b3480b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
c3a108b
9b3480b
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.5555555555555554,
  "eval_steps": 500,
  "global_step": 12,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.5925925925925926,
      "grad_norm": 1.0686613321304321,
      "learning_rate": 0.0001666666666666667,
      "loss": 9.4726,
      "step": 2
    },
    {
      "epoch": 1.1851851851851851,
      "grad_norm": 1.2996327877044678,
      "learning_rate": 0.00013333333333333334,
      "loss": 9.1175,
      "step": 4
    },
    {
      "epoch": 1.7777777777777777,
      "grad_norm": 2.3309664726257324,
      "learning_rate": 0.0001,
      "loss": 8.7037,
      "step": 6
    },
    {
      "epoch": 2.3703703703703702,
      "grad_norm": 2.276108503341675,
      "learning_rate": 6.666666666666667e-05,
      "loss": 8.2289,
      "step": 8
    },
    {
      "epoch": 2.962962962962963,
      "grad_norm": 1.6182162761688232,
      "learning_rate": 3.3333333333333335e-05,
      "loss": 7.8725,
      "step": 10
    },
    {
      "epoch": 3.5555555555555554,
      "grad_norm": 1.3706101179122925,
      "learning_rate": 0.0,
      "loss": 7.7669,
      "step": 12
    },
    {
      "epoch": 3.5555555555555554,
      "step": 12,
      "total_flos": 60069698944968.0,
      "train_loss": 8.527011315027872,
      "train_runtime": 334.2918,
      "train_samples_per_second": 0.646,
      "train_steps_per_second": 0.036
    }
  ],
  "logging_steps": 2,
  "max_steps": 12,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 4,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 60069698944968.0,
  "train_batch_size": 2,
  "trial_name": null,
  "trial_params": null
}