araelectra-base-artydiqa / trainer_state.json
1
{
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
  "epoch": 3.0,
5
  "global_step": 5763,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
9
  "log_history": [
10
    {
11
      "epoch": 0.26,
12
      "learning_rate": 3e-05,
13
      "loss": 3.21,
14
      "step": 500
15
    },
16
    {
17
      "epoch": 0.52,
18
      "learning_rate": 2.7912026726057907e-05,
19
      "loss": 1.4143,
20
      "step": 1000
21
    },
22
    {
23
      "epoch": 0.78,
24
      "learning_rate": 2.5824053452115813e-05,
25
      "loss": 1.2072,
26
      "step": 1500
27
    },
28
    {
29
      "epoch": 1.0,
30
      "exact_match": 72.9641693811075,
31
      "f1": 84.24962401921071,
32
      "step": 1921
33
    },
34
    {
35
      "epoch": 1.04,
36
      "learning_rate": 2.3736080178173723e-05,
37
      "loss": 1.1298,
38
      "step": 2000
39
    },
40
    {
41
      "epoch": 1.3,
42
      "learning_rate": 2.1648106904231625e-05,
43
      "loss": 0.7843,
44
      "step": 2500
45
    },
46
    {
47
      "epoch": 1.56,
48
      "learning_rate": 1.956013363028953e-05,
49
      "loss": 0.8011,
50
      "step": 3000
51
    },
52
    {
53
      "epoch": 1.82,
54
      "learning_rate": 1.7472160356347438e-05,
55
      "loss": 0.7699,
56
      "step": 3500
57
    },
58
    {
59
      "epoch": 2.0,
60
      "exact_match": 73.0727470141151,
61
      "f1": 85.57152971417061,
62
      "step": 3842
63
    },
64
    {
65
      "epoch": 2.08,
66
      "learning_rate": 1.5384187082405344e-05,
67
      "loss": 0.6582,
68
      "step": 4000
69
    },
70
    {
71
      "epoch": 2.34,
72
      "learning_rate": 1.3296213808463252e-05,
73
      "loss": 0.4987,
74
      "step": 4500
75
    },
76
    {
77
      "epoch": 2.6,
78
      "learning_rate": 1.120824053452116e-05,
79
      "loss": 0.4904,
80
      "step": 5000
81
    },
82
    {
83
      "epoch": 2.86,
84
      "learning_rate": 9.120267260579064e-06,
85
      "loss": 0.4922,
86
      "step": 5500
87
    }
88
  ],
89
  "max_steps": 7684,
90
  "num_train_epochs": 4,
91
  "total_flos": 1.429443287350272e+16,
92
  "trial_name": null,
93
  "trial_params": null
94
}
95