File size: 2,398 Bytes
c71a050
20beda7
c71a050
 
 
 
 
 
 
 
 
 
20beda7
 
 
c71a050
 
 
 
20beda7
 
 
c71a050
 
 
 
20beda7
 
 
 
 
c71a050
 
 
 
20beda7
 
 
c71a050
 
 
 
20beda7
 
 
c71a050
 
 
 
20beda7
 
 
 
 
c71a050
 
 
 
20beda7
 
 
c71a050
 
 
 
20beda7
 
 
c71a050
 
 
 
20beda7
 
 
 
 
c71a050
 
 
 
 
20beda7
c71a050
 
20beda7
 
c71a050
 
20beda7
c71a050
 
20beda7
c71a050
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
{
  "best_metric": 0.4386209168112411,
  "best_model_checkpoint": "distilbert-base-uncased-finetuned-cola/run-0/checkpoint-3207",
  "epoch": 3.0,
  "eval_steps": 500,
  "global_step": 3207,
  "is_hyper_param_search": true,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.47,
      "grad_norm": 5.35684061050415,
      "learning_rate": 2.775809181053764e-06,
      "loss": 0.5956,
      "step": 500
    },
    {
      "epoch": 0.94,
      "grad_norm": 11.188376426696777,
      "learning_rate": 2.263099690648562e-06,
      "loss": 0.5375,
      "step": 1000
    },
    {
      "epoch": 1.0,
      "eval_loss": 0.5379385948181152,
      "eval_matthews_correlation": 0.29871168199754417,
      "eval_runtime": 0.7649,
      "eval_samples_per_second": 1363.614,
      "eval_steps_per_second": 86.288,
      "step": 1069
    },
    {
      "epoch": 1.4,
      "grad_norm": 10.391807556152344,
      "learning_rate": 1.7503902002433598e-06,
      "loss": 0.4937,
      "step": 1500
    },
    {
      "epoch": 1.87,
      "grad_norm": 19.357559204101562,
      "learning_rate": 1.2376807098381578e-06,
      "loss": 0.4738,
      "step": 2000
    },
    {
      "epoch": 2.0,
      "eval_loss": 0.526983916759491,
      "eval_matthews_correlation": 0.41870797137315424,
      "eval_runtime": 0.7468,
      "eval_samples_per_second": 1396.551,
      "eval_steps_per_second": 88.372,
      "step": 2138
    },
    {
      "epoch": 2.34,
      "grad_norm": 11.473833084106445,
      "learning_rate": 7.249712194329557e-07,
      "loss": 0.4364,
      "step": 2500
    },
    {
      "epoch": 2.81,
      "grad_norm": 12.574313163757324,
      "learning_rate": 2.1226172902775366e-07,
      "loss": 0.4349,
      "step": 3000
    },
    {
      "epoch": 3.0,
      "eval_loss": 0.5435938239097595,
      "eval_matthews_correlation": 0.4386209168112411,
      "eval_runtime": 0.7548,
      "eval_samples_per_second": 1381.78,
      "eval_steps_per_second": 87.438,
      "step": 3207
    }
  ],
  "logging_steps": 500,
  "max_steps": 3207,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 3,
  "save_steps": 500,
  "total_flos": 113055491519748.0,
  "train_batch_size": 8,
  "trial_name": null,
  "trial_params": {
    "learning_rate": 3.288518671458966e-06,
    "num_train_epochs": 3,
    "per_device_train_batch_size": 8,
    "seed": 24
  }
}