File size: 2,438 Bytes
017ab6b
0df2eb9
017ab6b
0df2eb9
017ab6b
 
 
 
 
 
 
0df2eb9
017ab6b
0df2eb9
017ab6b
 
 
0df2eb9
017ab6b
0df2eb9
017ab6b
 
 
0df2eb9
 
 
 
 
 
017ab6b
 
 
0df2eb9
017ab6b
0df2eb9
017ab6b
 
 
0df2eb9
 
 
 
 
 
017ab6b
 
 
0df2eb9
017ab6b
0df2eb9
017ab6b
 
 
0df2eb9
 
 
 
 
 
017ab6b
 
 
0df2eb9
017ab6b
0df2eb9
017ab6b
 
 
0df2eb9
 
 
 
 
 
017ab6b
 
 
0df2eb9
017ab6b
0df2eb9
017ab6b
 
 
0df2eb9
 
 
 
 
 
017ab6b
 
 
 
 
0df2eb9
017ab6b
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
{
  "best_metric": 1.1008135080337524,
  "best_model_checkpoint": "/kaggle/output/checkpoint-5000",
  "epoch": 0.20371577574967406,
  "eval_steps": 1000,
  "global_step": 5000,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.0,
      "learning_rate": 2.7777777777777777e-11,
      "loss": 1.1383,
      "step": 1
    },
    {
      "epoch": 0.04,
      "learning_rate": 2.7750000000000004e-08,
      "loss": 1.1424,
      "step": 1000
    },
    {
      "epoch": 0.04,
      "eval_accuracy": 0.32375249500998005,
      "eval_loss": 1.1077626943588257,
      "eval_runtime": 54.8633,
      "eval_samples_per_second": 91.318,
      "eval_steps_per_second": 11.428,
      "step": 1000
    },
    {
      "epoch": 0.08,
      "learning_rate": 5.5527777777777784e-08,
      "loss": 1.1244,
      "step": 2000
    },
    {
      "epoch": 0.08,
      "eval_accuracy": 0.33652694610778444,
      "eval_loss": 1.1080161333084106,
      "eval_runtime": 54.7384,
      "eval_samples_per_second": 91.526,
      "eval_steps_per_second": 11.454,
      "step": 2000
    },
    {
      "epoch": 0.12,
      "learning_rate": 8.327777777777778e-08,
      "loss": 1.1228,
      "step": 3000
    },
    {
      "epoch": 0.12,
      "eval_accuracy": 0.34331337325349304,
      "eval_loss": 1.1084064245224,
      "eval_runtime": 54.7948,
      "eval_samples_per_second": 91.432,
      "eval_steps_per_second": 11.443,
      "step": 3000
    },
    {
      "epoch": 0.16,
      "learning_rate": 1.1105555555555557e-07,
      "loss": 1.1216,
      "step": 4000
    },
    {
      "epoch": 0.16,
      "eval_accuracy": 0.3385229540918164,
      "eval_loss": 1.1014840602874756,
      "eval_runtime": 54.8508,
      "eval_samples_per_second": 91.339,
      "eval_steps_per_second": 11.431,
      "step": 4000
    },
    {
      "epoch": 0.2,
      "learning_rate": 1.3880555555555558e-07,
      "loss": 1.1181,
      "step": 5000
    },
    {
      "epoch": 0.2,
      "eval_accuracy": 0.33073852295409184,
      "eval_loss": 1.1008135080337524,
      "eval_runtime": 54.8304,
      "eval_samples_per_second": 91.373,
      "eval_steps_per_second": 11.435,
      "step": 5000
    }
  ],
  "logging_steps": 1000,
  "max_steps": 10000000,
  "num_train_epochs": 408,
  "save_steps": 1000,
  "total_flos": 1.045177565184e+16,
  "trial_name": null,
  "trial_params": null
}