File size: 1,677 Bytes
af2c5aa
 
 
 
 
 
 
 
 
 
 
 
780a12c
af2c5aa
780a12c
af2c5aa
 
 
 
780a12c
af2c5aa
780a12c
af2c5aa
 
 
 
780a12c
af2c5aa
780a12c
af2c5aa
 
 
 
780a12c
af2c5aa
780a12c
af2c5aa
 
 
 
780a12c
af2c5aa
780a12c
af2c5aa
 
 
 
780a12c
af2c5aa
780a12c
af2c5aa
 
 
 
 
780a12c
 
 
 
 
af2c5aa
 
 
 
 
 
 
780a12c
af2c5aa
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 3.0,
  "eval_steps": 500,
  "global_step": 3300,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.45,
      "grad_norm": 3.782728910446167,
      "learning_rate": 4.242424242424243e-05,
      "loss": 0.532,
      "step": 500
    },
    {
      "epoch": 0.91,
      "grad_norm": 3.925285816192627,
      "learning_rate": 3.484848484848485e-05,
      "loss": 0.4013,
      "step": 1000
    },
    {
      "epoch": 1.36,
      "grad_norm": 7.782624244689941,
      "learning_rate": 2.7272727272727273e-05,
      "loss": 0.281,
      "step": 1500
    },
    {
      "epoch": 1.82,
      "grad_norm": 3.6547534465789795,
      "learning_rate": 1.9696969696969697e-05,
      "loss": 0.2454,
      "step": 2000
    },
    {
      "epoch": 2.27,
      "grad_norm": 0.9583206176757812,
      "learning_rate": 1.2121212121212122e-05,
      "loss": 0.1696,
      "step": 2500
    },
    {
      "epoch": 2.73,
      "grad_norm": 11.225361824035645,
      "learning_rate": 4.5454545454545455e-06,
      "loss": 0.1393,
      "step": 3000
    },
    {
      "epoch": 3.0,
      "step": 3300,
      "total_flos": 2871828253461180.0,
      "train_loss": 0.27864328904585406,
      "train_runtime": 413.6435,
      "train_samples_per_second": 127.632,
      "train_steps_per_second": 7.978
    }
  ],
  "logging_steps": 500,
  "max_steps": 3300,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 500,
  "total_flos": 2871828253461180.0,
  "train_batch_size": 16,
  "trial_name": null,
  "trial_params": null
}