seanmor5 commited on
Commit
71d8542
1 Parent(s): 11c4417

Second checkpoint

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dbd034a81014911eba6cc587d844217e43283fae64565fc82a62c92272b6d71e
3
  size 609389712
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddf4492601f1de7b3bbcd918e30af5b791b82a6881c4651142c1a89beb90880a
3
  size 609389712
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:afa1f305ca4d23ee4ce56f9cf8a80dde62dc3256b0ceb662670c68df3c10a999
3
  size 43127132
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ddcff1d5bdbd8789dba1706a78bd01ddb67631093c37b89688236f340f238a00
3
  size 43127132
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d354efc7818d158473921845d6165d1f2bddfdc176ab543d9dd5af8aa56f8c75
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46a442c3ca436aa6a10e4093e0aaf8d54298771a4565ad23c34571d316886c86
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:22f19229a01f85f1c53c439ac373964673e8031baaac40ca774f85995ec5dc07
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29ff92e07eb84e01189de138508ab139b01aad8541db4bc42c48d76d71ae0b56
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.1261166579085654,
5
  "eval_steps": 500,
6
- "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -79,6 +79,30 @@
79
  "learning_rate": 2.186973905723906e-05,
80
  "loss": 3.6035,
81
  "step": 600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  }
83
  ],
84
  "logging_steps": 50,
@@ -86,7 +110,7 @@
86
  "num_input_tokens_seen": 0,
87
  "num_train_epochs": 1,
88
  "save_steps": 100,
89
- "total_flos": 1.4726209442584658e+18,
90
  "train_batch_size": 8,
91
  "trial_name": null,
92
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.16815554387808723,
5
  "eval_steps": 500,
6
+ "global_step": 800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
79
  "learning_rate": 2.186973905723906e-05,
80
  "loss": 3.6035,
81
  "step": 600
82
+ },
83
+ {
84
+ "epoch": 0.14,
85
+ "learning_rate": 2.160669191919192e-05,
86
+ "loss": 3.5998,
87
+ "step": 650
88
+ },
89
+ {
90
+ "epoch": 0.15,
91
+ "learning_rate": 2.1343644781144782e-05,
92
+ "loss": 3.5809,
93
+ "step": 700
94
+ },
95
+ {
96
+ "epoch": 0.16,
97
+ "learning_rate": 2.1080597643097644e-05,
98
+ "loss": 3.5888,
99
+ "step": 750
100
+ },
101
+ {
102
+ "epoch": 0.17,
103
+ "learning_rate": 2.0817550505050505e-05,
104
+ "loss": 3.6076,
105
+ "step": 800
106
  }
107
  ],
108
  "logging_steps": 50,
 
110
  "num_input_tokens_seen": 0,
111
  "num_train_epochs": 1,
112
  "save_steps": 100,
113
+ "total_flos": 1.9644023431710966e+18,
114
  "train_batch_size": 8,
115
  "trial_name": null,
116
  "trial_params": null