File size: 2,710 Bytes
336e56b
7298db5
 
 
336e56b
7298db5
336e56b
 
 
 
 
 
7298db5
336e56b
7298db5
336e56b
 
 
 
7298db5
336e56b
7298db5
336e56b
 
 
 
7298db5
336e56b
7298db5
336e56b
 
 
 
7298db5
336e56b
7298db5
336e56b
 
 
 
7298db5
336e56b
7298db5
336e56b
 
 
 
7298db5
336e56b
7298db5
336e56b
 
 
 
7298db5
336e56b
7298db5
336e56b
 
 
 
7298db5
336e56b
7298db5
336e56b
 
 
 
7298db5
336e56b
7298db5
336e56b
 
 
 
7298db5
 
 
 
 
 
 
 
336e56b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7298db5
336e56b
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
{
  "best_metric": 7.529487609863281,
  "best_model_checkpoint": "./results/checkpoint-916",
  "epoch": 1.0,
  "eval_steps": 500,
  "global_step": 916,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.1091703056768559,
      "grad_norm": 25.480337142944336,
      "learning_rate": 2.959061135371179e-05,
      "loss": 56.923,
      "step": 100
    },
    {
      "epoch": 0.2183406113537118,
      "grad_norm": 39.87223815917969,
      "learning_rate": 2.918122270742358e-05,
      "loss": 46.6475,
      "step": 200
    },
    {
      "epoch": 0.32751091703056767,
      "grad_norm": 48.05048751831055,
      "learning_rate": 2.877183406113537e-05,
      "loss": 33.6867,
      "step": 300
    },
    {
      "epoch": 0.4366812227074236,
      "grad_norm": 31.941883087158203,
      "learning_rate": 2.8362445414847164e-05,
      "loss": 21.1084,
      "step": 400
    },
    {
      "epoch": 0.5458515283842795,
      "grad_norm": 55.025856018066406,
      "learning_rate": 2.7953056768558954e-05,
      "loss": 12.9495,
      "step": 500
    },
    {
      "epoch": 0.6550218340611353,
      "grad_norm": 34.957523345947266,
      "learning_rate": 2.7543668122270742e-05,
      "loss": 10.0745,
      "step": 600
    },
    {
      "epoch": 0.7641921397379913,
      "grad_norm": 24.020906448364258,
      "learning_rate": 2.7134279475982533e-05,
      "loss": 8.3541,
      "step": 700
    },
    {
      "epoch": 0.8733624454148472,
      "grad_norm": 32.709571838378906,
      "learning_rate": 2.6724890829694323e-05,
      "loss": 7.5128,
      "step": 800
    },
    {
      "epoch": 0.982532751091703,
      "grad_norm": 38.94672393798828,
      "learning_rate": 2.6315502183406114e-05,
      "loss": 7.2241,
      "step": 900
    },
    {
      "epoch": 1.0,
      "eval_avg_mae": 7.529487609863281,
      "eval_loss": 7.529487609863281,
      "eval_mae_lex": 6.992014408111572,
      "eval_mae_sem": 5.432034492492676,
      "eval_mae_syn": 10.164413452148438,
      "eval_runtime": 27.1764,
      "eval_samples_per_second": 269.609,
      "eval_steps_per_second": 8.426,
      "step": 916
    }
  ],
  "logging_steps": 100,
  "max_steps": 7328,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 8,
  "save_steps": 500,
  "stateful_callbacks": {
    "TrainerControl": {
      "args": {
        "should_epoch_stop": false,
        "should_evaluate": false,
        "should_log": false,
        "should_save": true,
        "should_training_stop": false
      },
      "attributes": {}
    }
  },
  "total_flos": 1927766233338624.0,
  "train_batch_size": 32,
  "trial_name": null,
  "trial_params": null
}