Fanucci commited on
Commit
a4b358f
·
verified ·
1 Parent(s): 4928ca8

Training in progress, step 100, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a9170cbed119344f7357c62df8457537fe0a7644849cb4823e083c8e4f578e6
3
  size 4995335576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b9295ebc05734a85b5f43993ee4c64b21a5c9bb19c7285c3147224043523c975
3
  size 4995335576
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a6deda6389e85cf2881897ff87348bebcf52fb243305d421f776a79bc34832d
3
  size 1857639032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ffe78de4e4b316205c289b24c407040958c5034fa1409b39c7d297f171b1f7ca
3
  size 1857639032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3cfdf98d6264dd523f4ce392882686c61263bd8c3bcefcb2739940446834d46d
3
  size 13706103974
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a00bef7af234902ab8346a2c39aaeeccbe3fbcabcb95b900800eda4225fe706
3
  size 13706103974
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3cf9097d4513154245c48236b6ec5137b7ee2a21c9f58f2cba798ea275c6026f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f37c40ce327861a7ca13b719d3aa37510a143368b6e74358bdb14becb3899e1e
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:03bfcb5cd3899a02f7a89e2033f35f63eb1a6773ac4ce6695121020cac9264f0
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01bc98a8690d286a0c5c6c74f6f325ac33ceb1fd4ad50ba634b85c5c1612f447
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -2,9 +2,9 @@
2
  "best_global_step": 1,
3
  "best_metric": 1.4945952892303467,
4
  "best_model_checkpoint": null,
5
- "epoch": 0.003123048094940662,
6
  "eval_steps": 50,
7
- "global_step": 50,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
@@ -59,6 +59,49 @@
59
  "eval_samples_per_second": 19.391,
60
  "eval_steps_per_second": 19.391,
61
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
62
  }
63
  ],
64
  "logging_steps": 10,
@@ -73,7 +116,7 @@
73
  "early_stopping_threshold": 0.0
74
  },
75
  "attributes": {
76
- "early_stopping_patience_counter": 1
77
  }
78
  },
79
  "TrainerControl": {
@@ -87,7 +130,7 @@
87
  "attributes": {}
88
  }
89
  },
90
- "total_flos": 2042310819840000.0,
91
  "train_batch_size": 1,
92
  "trial_name": null,
93
  "trial_params": null
 
2
  "best_global_step": 1,
3
  "best_metric": 1.4945952892303467,
4
  "best_model_checkpoint": null,
5
+ "epoch": 0.006246096189881324,
6
  "eval_steps": 50,
7
+ "global_step": 100,
8
  "is_hyper_param_search": false,
9
  "is_local_process_zero": true,
10
  "is_world_process_zero": true,
 
59
  "eval_samples_per_second": 19.391,
60
  "eval_steps_per_second": 19.391,
61
  "step": 50
62
+ },
63
+ {
64
+ "epoch": 0.0037476577139287947,
65
+ "grad_norm": 46.25,
66
+ "learning_rate": 0.049698382650241506,
67
+ "loss": 227.9017,
68
+ "step": 60
69
+ },
70
+ {
71
+ "epoch": 0.004372267332916927,
72
+ "grad_norm": 129.0,
73
+ "learning_rate": 0.04956310770317444,
74
+ "loss": 91.6515,
75
+ "step": 70
76
+ },
77
+ {
78
+ "epoch": 0.004996876951905059,
79
+ "grad_norm": 25.375,
80
+ "learning_rate": 0.04940309978877575,
81
+ "loss": 43.7426,
82
+ "step": 80
83
+ },
84
+ {
85
+ "epoch": 0.005621486570893191,
86
+ "grad_norm": 26.125,
87
+ "learning_rate": 0.04921852002145197,
88
+ "loss": 35.9591,
89
+ "step": 90
90
+ },
91
+ {
92
+ "epoch": 0.006246096189881324,
93
+ "grad_norm": 71.5,
94
+ "learning_rate": 0.04900955425738262,
95
+ "loss": 25.3901,
96
+ "step": 100
97
+ },
98
+ {
99
+ "epoch": 0.006246096189881324,
100
+ "eval_loss": 21.986961364746094,
101
+ "eval_runtime": 53.4891,
102
+ "eval_samples_per_second": 15.76,
103
+ "eval_steps_per_second": 15.76,
104
+ "step": 100
105
  }
106
  ],
107
  "logging_steps": 10,
 
116
  "early_stopping_threshold": 0.0
117
  },
118
  "attributes": {
119
+ "early_stopping_patience_counter": 2
120
  }
121
  },
122
  "TrainerControl": {
 
130
  "attributes": {}
131
  }
132
  },
133
+ "total_flos": 4084621639680000.0,
134
  "train_batch_size": 1,
135
  "trial_name": null,
136
  "trial_params": null