File size: 1,697 Bytes
db7f9c2
 
 
 
 
dacbbef
db7f9c2
 
 
 
 
dacbbef
 
db7f9c2
dacbbef
db7f9c2
 
 
dacbbef
 
db7f9c2
dacbbef
db7f9c2
 
 
dacbbef
 
db7f9c2
dacbbef
db7f9c2
 
 
dacbbef
 
db7f9c2
dacbbef
db7f9c2
 
 
dacbbef
 
db7f9c2
dacbbef
db7f9c2
 
 
dacbbef
 
db7f9c2
dacbbef
db7f9c2
 
 
 
dacbbef
db7f9c2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dacbbef
db7f9c2
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 6.0,
  "eval_steps": 500,
  "global_step": 1650,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.9090909090909091,
      "grad_norm": 0.3847486674785614,
      "learning_rate": 0.0002,
      "loss": 0.774,
      "step": 250
    },
    {
      "epoch": 1.8181818181818183,
      "grad_norm": 0.48815852403640747,
      "learning_rate": 0.0002,
      "loss": 0.3868,
      "step": 500
    },
    {
      "epoch": 2.7272727272727275,
      "grad_norm": 0.574570894241333,
      "learning_rate": 0.0002,
      "loss": 0.2431,
      "step": 750
    },
    {
      "epoch": 3.6363636363636362,
      "grad_norm": 0.3991849720478058,
      "learning_rate": 0.0002,
      "loss": 0.1852,
      "step": 1000
    },
    {
      "epoch": 4.545454545454545,
      "grad_norm": 0.34638234972953796,
      "learning_rate": 0.0002,
      "loss": 0.1582,
      "step": 1250
    },
    {
      "epoch": 5.454545454545454,
      "grad_norm": 0.32800230383872986,
      "learning_rate": 0.0002,
      "loss": 0.1419,
      "step": 1500
    }
  ],
  "logging_steps": 250,
  "max_steps": 1650,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 6,
  "save_steps": 250,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 2.028449026965504e+16,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}