Upload 12 files

Browse files

Files changed (8) hide show

README.md +7 -6
adapter_model.safetensors +3 -0
optimizer.pt +3 -0
rng_state.pth +3 -0
scheduler.pt +3 -0
tokenizer.json +2 -2
trainer_state.json +733 -0
training_args.bin +3 -0

README.md CHANGED Viewed

@@ -1,8 +1,6 @@
 ---
-library_name: transformers
-tags:
-- trl
-- sft
 ---
 # Model Card for Model ID
@@ -17,7 +15,7 @@ tags:
 <!-- Provide a longer summary of what this model is. -->
-This is the model card of a 🤗 transformers model that has been pushed on the Hub. This model card has been automatically generated.
 - **Developed by:** [More Information Needed]
 - **Funded by [optional]:** [More Information Needed]
@@ -198,4 +196,7 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
 ## Model Card Contact
-[More Information Needed]

 ---
+base_model: google/gemma-2-9b
+library_name: peft
 ---
 # Model Card for Model ID
 <!-- Provide a longer summary of what this model is. -->
 - **Developed by:** [More Information Needed]
 - **Funded by [optional]:** [More Information Needed]
 ## Model Card Contact
+[More Information Needed]
+### Framework versions
+- PEFT 0.12.0

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f5913b7385b8562b01f63fb1cd788498e216d7a9862f01731a9efe25d9b0d2b1
+size 108113968

optimizer.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9cf95fcff472efc69a227ea88ca5cb80a367502f03e269c9d4d4d18e2b58a2bb
+size 54771268

rng_state.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:52cca5856c568bc52c683b690919168fa27bfbdfefc6e0a62355afa6011157c3
+size 14244

scheduler.pt ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3a60c7d771c1fd156acee762fba03c724cb41829a3f71df370ecd1d20b134982
+size 1064

tokenizer.json CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:3f289bc05132635a8bc7aca7aa21255efd5e18f3710f43e3cdb96bcd41be4922
-size 17525357

 version https://git-lfs.github.com/spec/v1
+oid sha256:1d12cb475f583be7a862eb1e11c3e558581c2f9a65be8e8a8ea86381ed22f301
+size 17525456

trainer_state.json ADDED Viewed

	@@ -0,0 +1,733 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 16.0,
+  "eval_steps": 500,
+  "global_step": 100,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.16,
+      "grad_norm": 1.0079981088638306,
+      "learning_rate": 4e-05,
+      "loss": 2.3624,
+      "step": 1
+    },
+    {
+      "epoch": 0.32,
+      "grad_norm": 1.0123796463012695,
+      "learning_rate": 8e-05,
+      "loss": 2.4117,
+      "step": 2
+    },
+    {
+      "epoch": 0.48,
+      "grad_norm": 1.0385504961013794,
+      "learning_rate": 0.00012,
+      "loss": 2.4351,
+      "step": 3
+    },
+    {
+      "epoch": 0.64,
+      "grad_norm": 0.7601240277290344,
+      "learning_rate": 0.00016,
+      "loss": 1.9867,
+      "step": 4
+    },
+    {
+      "epoch": 0.8,
+      "grad_norm": 0.9805667400360107,
+      "learning_rate": 0.0002,
+      "loss": 2.0655,
+      "step": 5
+    },
+    {
+      "epoch": 0.96,
+      "grad_norm": 1.6322834491729736,
+      "learning_rate": 0.00019789473684210526,
+      "loss": 1.8377,
+      "step": 6
+    },
+    {
+      "epoch": 1.12,
+      "grad_norm": 1.1208696365356445,
+      "learning_rate": 0.00019578947368421054,
+      "loss": 1.5558,
+      "step": 7
+    },
+    {
+      "epoch": 1.28,
+      "grad_norm": 1.3962080478668213,
+      "learning_rate": 0.0001936842105263158,
+      "loss": 1.4199,
+      "step": 8
+    },
+    {
+      "epoch": 1.44,
+      "grad_norm": 1.4532853364944458,
+      "learning_rate": 0.00019157894736842104,
+      "loss": 1.2996,
+      "step": 9
+    },
+    {
+      "epoch": 1.6,
+      "grad_norm": 2.3988616466522217,
+      "learning_rate": 0.00018947368421052632,
+      "loss": 1.2371,
+      "step": 10
+    },
+    {
+      "epoch": 1.76,
+      "grad_norm": 1.3582508563995361,
+      "learning_rate": 0.0001873684210526316,
+      "loss": 1.1839,
+      "step": 11
+    },
+    {
+      "epoch": 1.92,
+      "grad_norm": 1.2997236251831055,
+      "learning_rate": 0.00018526315789473685,
+      "loss": 0.983,
+      "step": 12
+    },
+    {
+      "epoch": 2.08,
+      "grad_norm": 1.1868802309036255,
+      "learning_rate": 0.0001831578947368421,
+      "loss": 0.7447,
+      "step": 13
+    },
+    {
+      "epoch": 2.24,
+      "grad_norm": 1.0286939144134521,
+      "learning_rate": 0.00018105263157894739,
+      "loss": 0.8524,
+      "step": 14
+    },
+    {
+      "epoch": 2.4,
+      "grad_norm": 1.00070321559906,
+      "learning_rate": 0.00017894736842105264,
+      "loss": 0.8649,
+      "step": 15
+    },
+    {
+      "epoch": 2.56,
+      "grad_norm": 1.4189987182617188,
+      "learning_rate": 0.0001768421052631579,
+      "loss": 0.8116,
+      "step": 16
+    },
+    {
+      "epoch": 2.7199999999999998,
+      "grad_norm": 1.2303727865219116,
+      "learning_rate": 0.00017473684210526317,
+      "loss": 0.8071,
+      "step": 17
+    },
+    {
+      "epoch": 2.88,
+      "grad_norm": 0.9925879240036011,
+      "learning_rate": 0.00017263157894736842,
+      "loss": 0.7081,
+      "step": 18
+    },
+    {
+      "epoch": 3.04,
+      "grad_norm": 1.0683646202087402,
+      "learning_rate": 0.0001705263157894737,
+      "loss": 0.5269,
+      "step": 19
+    },
+    {
+      "epoch": 3.2,
+      "grad_norm": 1.0474812984466553,
+      "learning_rate": 0.00016842105263157895,
+      "loss": 0.6947,
+      "step": 20
+    },
+    {
+      "epoch": 3.36,
+      "grad_norm": 1.0291672945022583,
+      "learning_rate": 0.00016631578947368423,
+      "loss": 0.5014,
+      "step": 21
+    },
+    {
+      "epoch": 3.52,
+      "grad_norm": 1.1327933073043823,
+      "learning_rate": 0.00016421052631578948,
+      "loss": 0.481,
+      "step": 22
+    },
+    {
+      "epoch": 3.68,
+      "grad_norm": 1.4890342950820923,
+      "learning_rate": 0.00016210526315789473,
+      "loss": 0.5253,
+      "step": 23
+    },
+    {
+      "epoch": 3.84,
+      "grad_norm": 1.532833456993103,
+      "learning_rate": 0.00016,
+      "loss": 0.4937,
+      "step": 24
+    },
+    {
+      "epoch": 4.0,
+      "grad_norm": 1.7453362941741943,
+      "learning_rate": 0.00015789473684210527,
+      "loss": 0.5188,
+      "step": 25
+    },
+    {
+      "epoch": 4.16,
+      "grad_norm": 1.2242546081542969,
+      "learning_rate": 0.00015578947368421052,
+      "loss": 0.1893,
+      "step": 26
+    },
+    {
+      "epoch": 4.32,
+      "grad_norm": 1.7437238693237305,
+      "learning_rate": 0.0001536842105263158,
+      "loss": 0.433,
+      "step": 27
+    },
+    {
+      "epoch": 4.48,
+      "grad_norm": 1.4618209600448608,
+      "learning_rate": 0.00015157894736842108,
+      "loss": 0.3996,
+      "step": 28
+    },
+    {
+      "epoch": 4.64,
+      "grad_norm": 1.3685592412948608,
+      "learning_rate": 0.00014947368421052633,
+      "loss": 0.2189,
+      "step": 29
+    },
+    {
+      "epoch": 4.8,
+      "grad_norm": 1.741402268409729,
+      "learning_rate": 0.00014736842105263158,
+      "loss": 0.2934,
+      "step": 30
+    },
+    {
+      "epoch": 4.96,
+      "grad_norm": 1.5545222759246826,
+      "learning_rate": 0.00014526315789473686,
+      "loss": 0.2099,
+      "step": 31
+    },
+    {
+      "epoch": 5.12,
+      "grad_norm": 1.2092806100845337,
+      "learning_rate": 0.0001431578947368421,
+      "loss": 0.1916,
+      "step": 32
+    },
+    {
+      "epoch": 5.28,
+      "grad_norm": 1.7175395488739014,
+      "learning_rate": 0.00014105263157894736,
+      "loss": 0.2527,
+      "step": 33
+    },
+    {
+      "epoch": 5.44,
+      "grad_norm": 1.368059754371643,
+      "learning_rate": 0.00013894736842105264,
+      "loss": 0.114,
+      "step": 34
+    },
+    {
+      "epoch": 5.6,
+      "grad_norm": 1.6632587909698486,
+      "learning_rate": 0.0001368421052631579,
+      "loss": 0.1549,
+      "step": 35
+    },
+    {
+      "epoch": 5.76,
+      "grad_norm": 1.6607255935668945,
+      "learning_rate": 0.00013473684210526317,
+      "loss": 0.1171,
+      "step": 36
+    },
+    {
+      "epoch": 5.92,
+      "grad_norm": 2.4954917430877686,
+      "learning_rate": 0.00013263157894736842,
+      "loss": 0.1614,
+      "step": 37
+    },
+    {
+      "epoch": 6.08,
+      "grad_norm": 1.7216722965240479,
+      "learning_rate": 0.0001305263157894737,
+      "loss": 0.1459,
+      "step": 38
+    },
+    {
+      "epoch": 6.24,
+      "grad_norm": 0.9449135065078735,
+      "learning_rate": 0.00012842105263157895,
+      "loss": 0.1001,
+      "step": 39
+    },
+    {
+      "epoch": 6.4,
+      "grad_norm": 1.4137742519378662,
+      "learning_rate": 0.0001263157894736842,
+      "loss": 0.0859,
+      "step": 40
+    },
+    {
+      "epoch": 6.5600000000000005,
+      "grad_norm": 1.8110110759735107,
+      "learning_rate": 0.00012421052631578949,
+      "loss": 0.1404,
+      "step": 41
+    },
+    {
+      "epoch": 6.72,
+      "grad_norm": 1.1322952508926392,
+      "learning_rate": 0.00012210526315789474,
+      "loss": 0.0687,
+      "step": 42
+    },
+    {
+      "epoch": 6.88,
+      "grad_norm": 2.2961461544036865,
+      "learning_rate": 0.00012,
+      "loss": 0.1203,
+      "step": 43
+    },
+    {
+      "epoch": 7.04,
+      "grad_norm": 1.5652666091918945,
+      "learning_rate": 0.00011789473684210525,
+      "loss": 0.1299,
+      "step": 44
+    },
+    {
+      "epoch": 7.2,
+      "grad_norm": 0.7390972375869751,
+      "learning_rate": 0.00011578947368421053,
+      "loss": 0.0595,
+      "step": 45
+    },
+    {
+      "epoch": 7.36,
+      "grad_norm": 1.0376925468444824,
+      "learning_rate": 0.0001136842105263158,
+      "loss": 0.0578,
+      "step": 46
+    },
+    {
+      "epoch": 7.52,
+      "grad_norm": 0.9976247549057007,
+      "learning_rate": 0.00011157894736842105,
+      "loss": 0.0695,
+      "step": 47
+    },
+    {
+      "epoch": 7.68,
+      "grad_norm": 1.0853309631347656,
+      "learning_rate": 0.00010947368421052633,
+      "loss": 0.0985,
+      "step": 48
+    },
+    {
+      "epoch": 7.84,
+      "grad_norm": 1.3621833324432373,
+      "learning_rate": 0.00010736842105263158,
+      "loss": 0.1269,
+      "step": 49
+    },
+    {
+      "epoch": 8.0,
+      "grad_norm": 0.8868013024330139,
+      "learning_rate": 0.00010526315789473685,
+      "loss": 0.0641,
+      "step": 50
+    },
+    {
+      "epoch": 8.16,
+      "grad_norm": 0.6473409533500671,
+      "learning_rate": 0.00010315789473684211,
+      "loss": 0.0474,
+      "step": 51
+    },
+    {
+      "epoch": 8.32,
+      "grad_norm": 1.6032112836837769,
+      "learning_rate": 0.00010105263157894738,
+      "loss": 0.0597,
+      "step": 52
+    },
+    {
+      "epoch": 8.48,
+      "grad_norm": 1.120687484741211,
+      "learning_rate": 9.894736842105263e-05,
+      "loss": 0.0582,
+      "step": 53
+    },
+    {
+      "epoch": 8.64,
+      "grad_norm": 0.7064136862754822,
+      "learning_rate": 9.68421052631579e-05,
+      "loss": 0.0557,
+      "step": 54
+    },
+    {
+      "epoch": 8.8,
+      "grad_norm": 0.5838208794593811,
+      "learning_rate": 9.473684210526316e-05,
+      "loss": 0.0436,
+      "step": 55
+    },
+    {
+      "epoch": 8.96,
+      "grad_norm": 1.2315547466278076,
+      "learning_rate": 9.263157894736843e-05,
+      "loss": 0.063,
+      "step": 56
+    },
+    {
+      "epoch": 9.12,
+      "grad_norm": 0.3518936336040497,
+      "learning_rate": 9.052631578947369e-05,
+      "loss": 0.0311,
+      "step": 57
+    },
+    {
+      "epoch": 9.28,
+      "grad_norm": 0.6926944851875305,
+      "learning_rate": 8.842105263157894e-05,
+      "loss": 0.039,
+      "step": 58
+    },
+    {
+      "epoch": 9.44,
+      "grad_norm": 0.26300671696662903,
+      "learning_rate": 8.631578947368421e-05,
+      "loss": 0.0252,
+      "step": 59
+    },
+    {
+      "epoch": 9.6,
+      "grad_norm": 0.7903566360473633,
+      "learning_rate": 8.421052631578948e-05,
+      "loss": 0.0415,
+      "step": 60
+    },
+    {
+      "epoch": 9.76,
+      "grad_norm": 0.5427919626235962,
+      "learning_rate": 8.210526315789474e-05,
+      "loss": 0.0453,
+      "step": 61
+    },
+    {
+      "epoch": 9.92,
+      "grad_norm": 0.5827217698097229,
+      "learning_rate": 8e-05,
+      "loss": 0.0368,
+      "step": 62
+    },
+    {
+      "epoch": 10.08,
+      "grad_norm": 1.45575749874115,
+      "learning_rate": 7.789473684210526e-05,
+      "loss": 0.0736,
+      "step": 63
+    },
+    {
+      "epoch": 10.24,
+      "grad_norm": 0.32767948508262634,
+      "learning_rate": 7.578947368421054e-05,
+      "loss": 0.0316,
+      "step": 64
+    },
+    {
+      "epoch": 10.4,
+      "grad_norm": 0.30059218406677246,
+      "learning_rate": 7.368421052631579e-05,
+      "loss": 0.0277,
+      "step": 65
+    },
+    {
+      "epoch": 10.56,
+      "grad_norm": 0.4859299659729004,
+      "learning_rate": 7.157894736842105e-05,
+      "loss": 0.0313,
+      "step": 66
+    },
+    {
+      "epoch": 10.72,
+      "grad_norm": 0.4874284267425537,
+      "learning_rate": 6.947368421052632e-05,
+      "loss": 0.0322,
+      "step": 67
+    },
+    {
+      "epoch": 10.88,
+      "grad_norm": 0.41711848974227905,
+      "learning_rate": 6.736842105263159e-05,
+      "loss": 0.0389,
+      "step": 68
+    },
+    {
+      "epoch": 11.04,
+      "grad_norm": 0.8408872485160828,
+      "learning_rate": 6.526315789473685e-05,
+      "loss": 0.0312,
+      "step": 69
+    },
+    {
+      "epoch": 11.2,
+      "grad_norm": 0.32355204224586487,
+      "learning_rate": 6.31578947368421e-05,
+      "loss": 0.0328,
+      "step": 70
+    },
+    {
+      "epoch": 11.36,
+      "grad_norm": 0.42406928539276123,
+      "learning_rate": 6.105263157894737e-05,
+      "loss": 0.0277,
+      "step": 71
+    },
+    {
+      "epoch": 11.52,
+      "grad_norm": 0.7678600549697876,
+      "learning_rate": 5.894736842105263e-05,
+      "loss": 0.0329,
+      "step": 72
+    },
+    {
+      "epoch": 11.68,
+      "grad_norm": 0.29065871238708496,
+      "learning_rate": 5.68421052631579e-05,
+      "loss": 0.0297,
+      "step": 73
+    },
+    {
+      "epoch": 11.84,
+      "grad_norm": 0.5853772163391113,
+      "learning_rate": 5.4736842105263165e-05,
+      "loss": 0.0393,
+      "step": 74
+    },
+    {
+      "epoch": 12.0,
+      "grad_norm": 0.7088480591773987,
+      "learning_rate": 5.2631578947368424e-05,
+      "loss": 0.0344,
+      "step": 75
+    },
+    {
+      "epoch": 12.16,
+      "grad_norm": 0.19609542191028595,
+      "learning_rate": 5.052631578947369e-05,
+      "loss": 0.0232,
+      "step": 76
+    },
+    {
+      "epoch": 12.32,
+      "grad_norm": 0.31028512120246887,
+      "learning_rate": 4.842105263157895e-05,
+      "loss": 0.0273,
+      "step": 77
+    },
+    {
+      "epoch": 12.48,
+      "grad_norm": 0.4248906672000885,
+      "learning_rate": 4.6315789473684214e-05,
+      "loss": 0.0315,
+      "step": 78
+    },
+    {
+      "epoch": 12.64,
+      "grad_norm": 0.4214076101779938,
+      "learning_rate": 4.421052631578947e-05,
+      "loss": 0.0309,
+      "step": 79
+    },
+    {
+      "epoch": 12.8,
+      "grad_norm": 0.4250756502151489,
+      "learning_rate": 4.210526315789474e-05,
+      "loss": 0.0285,
+      "step": 80
+    },
+    {
+      "epoch": 12.96,
+      "grad_norm": 0.2500416934490204,
+      "learning_rate": 4e-05,
+      "loss": 0.0256,
+      "step": 81
+    },
+    {
+      "epoch": 13.12,
+      "grad_norm": 0.2516506314277649,
+      "learning_rate": 3.789473684210527e-05,
+      "loss": 0.0244,
+      "step": 82
+    },
+    {
+      "epoch": 13.28,
+      "grad_norm": 0.217052161693573,
+      "learning_rate": 3.578947368421053e-05,
+      "loss": 0.0241,
+      "step": 83
+    },
+    {
+      "epoch": 13.44,
+      "grad_norm": 0.4375220835208893,
+      "learning_rate": 3.368421052631579e-05,
+      "loss": 0.0308,
+      "step": 84
+    },
+    {
+      "epoch": 13.6,
+      "grad_norm": 0.23626229166984558,
+      "learning_rate": 3.157894736842105e-05,
+      "loss": 0.029,
+      "step": 85
+    },
+    {
+      "epoch": 13.76,
+      "grad_norm": 0.3816908001899719,
+      "learning_rate": 2.9473684210526314e-05,
+      "loss": 0.0251,
+      "step": 86
+    },
+    {
+      "epoch": 13.92,
+      "grad_norm": 0.17371943593025208,
+      "learning_rate": 2.7368421052631583e-05,
+      "loss": 0.0203,
+      "step": 87
+    },
+    {
+      "epoch": 14.08,
+      "grad_norm": 0.21958455443382263,
+      "learning_rate": 2.5263157894736845e-05,
+      "loss": 0.0265,
+      "step": 88
+    },
+    {
+      "epoch": 14.24,
+      "grad_norm": 0.2628728151321411,
+      "learning_rate": 2.3157894736842107e-05,
+      "loss": 0.0242,
+      "step": 89
+    },
+    {
+      "epoch": 14.4,
+      "grad_norm": 0.2763591408729553,
+      "learning_rate": 2.105263157894737e-05,
+      "loss": 0.0299,
+      "step": 90
+    },
+    {
+      "epoch": 14.56,
+      "grad_norm": 0.2944229245185852,
+      "learning_rate": 1.8947368421052634e-05,
+      "loss": 0.0244,
+      "step": 91
+    },
+    {
+      "epoch": 14.72,
+      "grad_norm": 0.28353527188301086,
+      "learning_rate": 1.6842105263157896e-05,
+      "loss": 0.0241,
+      "step": 92
+    },
+    {
+      "epoch": 14.88,
+      "grad_norm": 0.2161315530538559,
+      "learning_rate": 1.4736842105263157e-05,
+      "loss": 0.024,
+      "step": 93
+    },
+    {
+      "epoch": 15.04,
+      "grad_norm": 0.2228800654411316,
+      "learning_rate": 1.2631578947368422e-05,
+      "loss": 0.0263,
+      "step": 94
+    },
+    {
+      "epoch": 15.2,
+      "grad_norm": 0.17299261689186096,
+      "learning_rate": 1.0526315789473684e-05,
+      "loss": 0.0227,
+      "step": 95
+    },
+    {
+      "epoch": 15.36,
+      "grad_norm": 0.21846872568130493,
+      "learning_rate": 8.421052631578948e-06,
+      "loss": 0.0223,
+      "step": 96
+    },
+    {
+      "epoch": 15.52,
+      "grad_norm": 0.23234839737415314,
+      "learning_rate": 6.315789473684211e-06,
+      "loss": 0.0269,
+      "step": 97
+    },
+    {
+      "epoch": 15.68,
+      "grad_norm": 0.217283234000206,
+      "learning_rate": 4.210526315789474e-06,
+      "loss": 0.0259,
+      "step": 98
+    },
+    {
+      "epoch": 15.84,
+      "grad_norm": 0.2666471600532532,
+      "learning_rate": 2.105263157894737e-06,
+      "loss": 0.027,
+      "step": 99
+    },
+    {
+      "epoch": 16.0,
+      "grad_norm": 0.2889624536037445,
+      "learning_rate": 0.0,
+      "loss": 0.0248,
+      "step": 100
+    }
+  ],
+  "logging_steps": 1,
+  "max_steps": 100,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 17,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 2992005070258176.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6426d052434092d500983e1c973e7606b3ad985a6331282f103ce53d005aae7b
+size 5432