File size: 3,450 Bytes
89ccd33
 
 
c67323c
89ccd33
c67323c
89ccd33
 
 
 
 
120bedf
c67323c
120bedf
 
 
 
 
8ebd777
 
 
 
89ccd33
 
0d8d7c6
120bedf
c67323c
120bedf
 
 
 
 
 
 
 
 
c67323c
0d8d7c6
f20d900
120bedf
c67323c
120bedf
 
 
 
 
 
 
 
 
c67323c
b30172b
 
120bedf
c67323c
120bedf
 
 
 
 
 
 
 
 
c67323c
b30172b
 
120bedf
c67323c
120bedf
 
 
 
 
 
 
 
 
c67323c
b30172b
 
120bedf
c67323c
120bedf
 
 
 
 
 
 
 
 
c67323c
b30172b
89ccd33
e022723
c67323c
89ccd33
120bedf
 
 
 
89ccd33
 
 
c67323c
89ccd33
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
{
  "best_metric": null,
  "best_model_checkpoint": null,
  "epoch": 1.0,
  "eval_steps": 100,
  "global_step": 53,
  "is_hyper_param_search": false,
  "is_local_process_zero": true,
  "is_world_process_zero": true,
  "log_history": [
    {
      "epoch": 0.02,
      "learning_rate": 8.333333333333333e-08,
      "logits/chosen": -2.8462421894073486,
      "logits/rejected": -2.8283610343933105,
      "logps/chosen": -274.7393798828125,
      "logps/rejected": -204.42575073242188,
      "loss": 0.6931,
      "rewards/accuracies": 0.0,
      "rewards/chosen": 0.0,
      "rewards/margins": 0.0,
      "rewards/rejected": 0.0,
      "step": 1
    },
    {
      "epoch": 0.19,
      "learning_rate": 4.911172937635942e-07,
      "logits/chosen": -2.8527991771698,
      "logits/rejected": -2.8377315998077393,
      "logps/chosen": -305.9073181152344,
      "logps/rejected": -295.8478698730469,
      "loss": 0.6914,
      "rewards/accuracies": 0.4513888955116272,
      "rewards/chosen": 0.0023197412956506014,
      "rewards/margins": 0.0025084479711949825,
      "rewards/rejected": -0.00018870655912905931,
      "step": 10
    },
    {
      "epoch": 0.38,
      "learning_rate": 3.982949361823388e-07,
      "logits/chosen": -2.859750270843506,
      "logits/rejected": -2.880180835723877,
      "logps/chosen": -295.7957458496094,
      "logps/rejected": -332.6015930175781,
      "loss": 0.6653,
      "rewards/accuracies": 0.675000011920929,
      "rewards/chosen": 0.02228003740310669,
      "rewards/margins": 0.059415679425001144,
      "rewards/rejected": -0.037135638296604156,
      "step": 20
    },
    {
      "epoch": 0.57,
      "learning_rate": 2.416462557480814e-07,
      "logits/chosen": -2.843632936477661,
      "logits/rejected": -2.8286781311035156,
      "logps/chosen": -310.9751892089844,
      "logps/rejected": -322.77532958984375,
      "loss": 0.6283,
      "rewards/accuracies": 0.699999988079071,
      "rewards/chosen": 0.027804672718048096,
      "rewards/margins": 0.1940310001373291,
      "rewards/rejected": -0.1662263423204422,
      "step": 30
    },
    {
      "epoch": 0.75,
      "learning_rate": 8.859303711029939e-08,
      "logits/chosen": -2.8175368309020996,
      "logits/rejected": -2.821326494216919,
      "logps/chosen": -274.8536682128906,
      "logps/rejected": -349.11505126953125,
      "loss": 0.6099,
      "rewards/accuracies": 0.699999988079071,
      "rewards/chosen": 0.002673505572602153,
      "rewards/margins": 0.2130366563796997,
      "rewards/rejected": -0.21036314964294434,
      "step": 40
    },
    {
      "epoch": 0.94,
      "learning_rate": 5.009573740853313e-09,
      "logits/chosen": -2.8560073375701904,
      "logits/rejected": -2.867896556854248,
      "logps/chosen": -307.2721862792969,
      "logps/rejected": -350.257568359375,
      "loss": 0.5984,
      "rewards/accuracies": 0.7250000238418579,
      "rewards/chosen": 0.0023462946992367506,
      "rewards/margins": 0.33910489082336426,
      "rewards/rejected": -0.336758553981781,
      "step": 50
    },
    {
      "epoch": 1.0,
      "step": 53,
      "total_flos": 0.0,
      "train_loss": 0.6385756753525644,
      "train_runtime": 422.6241,
      "train_samples_per_second": 15.972,
      "train_steps_per_second": 0.125
    }
  ],
  "logging_steps": 10,
  "max_steps": 53,
  "num_train_epochs": 1,
  "save_steps": 100,
  "total_flos": 0.0,
  "trial_name": null,
  "trial_params": null
}