File size: 1,692 Bytes
78222e0
 
 
 
 
804f0fa
78222e0
 
 
 
 
804f0fa
 
78222e0
804f0fa
78222e0
 
 
804f0fa
 
78222e0
804f0fa
78222e0
 
 
804f0fa
 
78222e0
804f0fa
78222e0
 
 
804f0fa
 
78222e0
804f0fa
78222e0
 
 
804f0fa
 
78222e0
804f0fa
78222e0
 
 
804f0fa
 
78222e0
804f0fa
78222e0
 
 
 
804f0fa
78222e0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
804f0fa
78222e0
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 6.0,
  "eval_steps": 500,
  "global_step": 1650,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.9090909090909091,
      "grad_norm": 0.6740289330482483,
      "learning_rate": 0.0002,
      "loss": 0.8442,
      "step": 250
    },
    {
      "epoch": 1.8181818181818183,
      "grad_norm": 0.722176194190979,
      "learning_rate": 0.0002,
      "loss": 0.421,
      "step": 500
    },
    {
      "epoch": 2.7272727272727275,
      "grad_norm": 0.679882287979126,
      "learning_rate": 0.0002,
      "loss": 0.2707,
      "step": 750
    },
    {
      "epoch": 3.6363636363636362,
      "grad_norm": 0.9423966407775879,
      "learning_rate": 0.0002,
      "loss": 0.2051,
      "step": 1000
    },
    {
      "epoch": 4.545454545454545,
      "grad_norm": 0.7787116765975952,
      "learning_rate": 0.0002,
      "loss": 0.1783,
      "step": 1250
    },
    {
      "epoch": 5.454545454545454,
      "grad_norm": 0.499831885099411,
      "learning_rate": 0.0002,
      "loss": 0.1577,
      "step": 1500
    }
  ],
  "logging_steps": 250,
  "max_steps": 1650,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 6,
  "save_steps": 250,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 2.324891664420864e+16,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}