File size: 4,045 Bytes
7133580
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
{
  "best_metric": 0.9028693291973633,
  "best_model_checkpoint": "tiny-bert-sst2-distilled/run-27/checkpoint-3870",
  "epoch": 6.0,
  "eval_steps": 500,
  "global_step": 3870,
  "is_hyper_param_search": true,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 1.0,
      "grad_norm": 2.374743938446045,
      "learning_rate": 8.115439310915276e-05,
      "loss": 0.333,
      "step": 645
    },
    {
      "epoch": 1.0,
      "eval_accuracy": 0.8912369135323769,
      "eval_f1": 0.16888888888888887,
      "eval_loss": 0.30435287952423096,
      "eval_mcc": 0.19762599967287012,
      "eval_precision": 0.5377358490566038,
      "eval_recall": 0.10017574692442882,
      "eval_runtime": 9.9164,
      "eval_samples_per_second": 520.146,
      "eval_steps_per_second": 16.337,
      "step": 645
    },
    {
      "epoch": 2.0,
      "grad_norm": 0.9690736532211304,
      "learning_rate": 6.95609083792738e-05,
      "loss": 0.2975,
      "step": 1290
    },
    {
      "epoch": 2.0,
      "eval_accuracy": 0.8953082590151221,
      "eval_f1": 0.19161676646706588,
      "eval_loss": 0.2960352897644043,
      "eval_mcc": 0.2394077507232092,
      "eval_precision": 0.6464646464646465,
      "eval_recall": 0.11247803163444639,
      "eval_runtime": 9.9096,
      "eval_samples_per_second": 520.505,
      "eval_steps_per_second": 16.348,
      "step": 1290
    },
    {
      "epoch": 3.0,
      "grad_norm": 2.378129005432129,
      "learning_rate": 5.796742364939483e-05,
      "loss": 0.2881,
      "step": 1935
    },
    {
      "epoch": 3.0,
      "eval_accuracy": 0.8968592477704537,
      "eval_f1": 0.25280898876404495,
      "eval_loss": 0.28235557675361633,
      "eval_mcc": 0.27977789937011266,
      "eval_precision": 0.6293706293706294,
      "eval_recall": 0.15817223198594024,
      "eval_runtime": 9.9079,
      "eval_samples_per_second": 520.597,
      "eval_steps_per_second": 16.351,
      "step": 1935
    },
    {
      "epoch": 4.0,
      "grad_norm": 2.622187852859497,
      "learning_rate": 4.6373938919515864e-05,
      "loss": 0.2834,
      "step": 2580
    },
    {
      "epoch": 4.0,
      "eval_accuracy": 0.9003489724699496,
      "eval_f1": 0.3622828784119107,
      "eval_loss": 0.2773243486881256,
      "eval_mcc": 0.354261144004657,
      "eval_precision": 0.6160337552742616,
      "eval_recall": 0.2565905096660808,
      "eval_runtime": 9.9154,
      "eval_samples_per_second": 520.202,
      "eval_steps_per_second": 16.338,
      "step": 2580
    },
    {
      "epoch": 5.0,
      "grad_norm": 2.0789802074432373,
      "learning_rate": 3.47804541896369e-05,
      "loss": 0.2776,
      "step": 3225
    },
    {
      "epoch": 5.0,
      "eval_accuracy": 0.8984102365257852,
      "eval_f1": 0.3498759305210918,
      "eval_loss": 0.2715882658958435,
      "eval_mcc": 0.33948247492694117,
      "eval_precision": 0.5949367088607594,
      "eval_recall": 0.2478031634446397,
      "eval_runtime": 9.9239,
      "eval_samples_per_second": 519.755,
      "eval_steps_per_second": 16.324,
      "step": 3225
    },
    {
      "epoch": 6.0,
      "grad_norm": 1.8614028692245483,
      "learning_rate": 2.3186969459757932e-05,
      "loss": 0.2734,
      "step": 3870
    },
    {
      "epoch": 6.0,
      "eval_accuracy": 0.9028693291973633,
      "eval_f1": 0.33817701453104354,
      "eval_loss": 0.26717719435691833,
      "eval_mcc": 0.35420175730444564,
      "eval_precision": 0.6808510638297872,
      "eval_recall": 0.22495606326889278,
      "eval_runtime": 9.9134,
      "eval_samples_per_second": 520.306,
      "eval_steps_per_second": 16.342,
      "step": 3870
    }
  ],
  "logging_steps": 500,
  "max_steps": 5160,
  "num_input_tokens_seen": 0,
  "num_train_epochs": 8,
  "save_steps": 500,
  "total_flos": 9521655195600.0,
  "train_batch_size": 32,
  "trial_name": null,
  "trial_params": {
    "alpha": 0.9885260340274179,
    "learning_rate": 9.274787783903173e-05,
    "num_train_epochs": 8,
    "temperature": 24
  }
}