File size: 2,518 Bytes
6696c3f
 
 
 
 
 
 
 
 
 
 
 
0243293
6696c3f
0243293
6696c3f
 
 
 
0243293
6696c3f
0243293
6696c3f
 
 
 
0243293
6696c3f
0243293
6696c3f
 
 
 
0243293
6696c3f
0243293
6696c3f
 
 
 
0243293
6696c3f
0243293
6696c3f
 
 
 
0243293
6696c3f
0243293
6696c3f
 
 
 
0243293
6696c3f
0243293
6696c3f
 
 
 
0243293
6696c3f
0243293
6696c3f
 
 
 
0243293
 
 
 
6696c3f
 
 
 
 
 
0243293
 
 
 
6696c3f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 500,
  "global_step": 35,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.02857142857142857,
      "grad_norm": 1.1929763555526733,
      "learning_rate": 5e-05,
      "loss": 0.842,
      "step": 1
    },
    {
      "epoch": 0.14285714285714285,
      "grad_norm": 0.5904757380485535,
      "learning_rate": 0.00019948693233918952,
      "loss": 0.7602,
      "step": 5
    },
    {
      "epoch": 0.2857142857142857,
      "grad_norm": 0.35242587327957153,
      "learning_rate": 0.00018207634412072764,
      "loss": 0.5166,
      "step": 10
    },
    {
      "epoch": 0.42857142857142855,
      "grad_norm": 0.23204153776168823,
      "learning_rate": 0.00014403941515576344,
      "loss": 0.3699,
      "step": 15
    },
    {
      "epoch": 0.5714285714285714,
      "grad_norm": 0.21575891971588135,
      "learning_rate": 9.493508311612874e-05,
      "loss": 0.2784,
      "step": 20
    },
    {
      "epoch": 0.7142857142857143,
      "grad_norm": 0.27512410283088684,
      "learning_rate": 4.710359896730379e-05,
      "loss": 0.222,
      "step": 25
    },
    {
      "epoch": 0.8571428571428571,
      "grad_norm": 0.3192328214645386,
      "learning_rate": 1.2565338385541792e-05,
      "loss": 0.1813,
      "step": 30
    },
    {
      "epoch": 1.0,
      "grad_norm": 0.11489653587341309,
      "learning_rate": 0.0,
      "loss": 0.1727,
      "step": 35
    },
    {
      "epoch": 1.0,
      "eval_loss": 0.1722353994846344,
      "eval_runtime": 11.0271,
      "eval_samples_per_second": 5.623,
      "eval_steps_per_second": 0.363,
      "step": 35
    },
    {
      "epoch": 1.0,
      "step": 35,
      "total_flos": 5.19321114598441e+16,
      "train_loss": 0.3596490706716265,
      "train_runtime": 336.8523,
      "train_samples_per_second": 1.657,
      "train_steps_per_second": 0.104
    }
  ],
  "logging_steps": 5,
  "max_steps": 35,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 1,
  "save_steps": 100,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": true
      },
      "attributes": {}
    }
  },
  "total_flos": 5.19321114598441e+16,
  "train_batch_size": 4,
  "trial_name": null,
  "trial_params": null
}