Model save

Browse files

Files changed (4) hide show

README.md +69 -0
all_results.json +9 -0
train_results.json +9 -0
trainer_state.json +413 -0

README.md ADDED Viewed

	@@ -0,0 +1,69 @@

+---
+license: other
+library_name: peft
+tags:
+- trl
+- sft
+- generated_from_trainer
+base_model: meta-llama/Meta-Llama-3-8B-Instruct
+model-index:
+- name: rinko_300_labeling
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# rinko_300_labeling
+This model is a fine-tuned version of [meta-llama/Meta-Llama-3-8B-Instruct](https://huggingface.co/meta-llama/Meta-Llama-3-8B-Instruct) on an unknown dataset.
+It achieves the following results on the evaluation set:
+- Loss: 2.0068
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 2e-06
+- train_batch_size: 4
+- eval_batch_size: 8
+- seed: 42
+- distributed_type: multi-GPU
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 8
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_ratio: 0.1
+- num_epochs: 5
+### Training results
+| Training Loss | Epoch  | Step | Validation Loss |
+|:-------------:|:------:|:----:|:---------------:|
+| 2.3912        | 0.9897 | 48   | 2.2464          |
+| 2.2442        | 2.0    | 97   | 2.1167          |
+| 2.1047        | 2.9897 | 145  | 2.0317          |
+| 2.05          | 4.0    | 194  | 2.0067          |
+| 2.0626        | 4.9485 | 240  | 2.0068          |
+### Framework versions
+- PEFT 0.7.1
+- Transformers 4.40.1
+- Pytorch 2.2.1+cu121
+- Datasets 2.19.0
+- Tokenizers 0.19.1

all_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 4.948453608247423,
+    "total_flos": 1.0715672433026662e+17,
+    "train_loss": 2.205865615606308,
+    "train_runtime": 7479.3037,
+    "train_samples": 385,
+    "train_samples_per_second": 0.257,
+    "train_steps_per_second": 0.032
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,9 @@

+{
+    "epoch": 4.948453608247423,
+    "total_flos": 1.0715672433026662e+17,
+    "train_loss": 2.205865615606308,
+    "train_runtime": 7479.3037,
+    "train_samples": 385,
+    "train_samples_per_second": 0.257,
+    "train_steps_per_second": 0.032
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,413 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 4.948453608247423,
+  "eval_steps": 500,
+  "global_step": 240,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.020618556701030927,
+      "grad_norm": 1.546875,
+      "learning_rate": 8.333333333333333e-08,
+      "loss": 2.5196,
+      "step": 1
+    },
+    {
+      "epoch": 0.10309278350515463,
+      "grad_norm": 1.5390625,
+      "learning_rate": 4.1666666666666667e-07,
+      "loss": 2.458,
+      "step": 5
+    },
+    {
+      "epoch": 0.20618556701030927,
+      "grad_norm": 1.4375,
+      "learning_rate": 8.333333333333333e-07,
+      "loss": 2.4466,
+      "step": 10
+    },
+    {
+      "epoch": 0.30927835051546393,
+      "grad_norm": 1.515625,
+      "learning_rate": 1.2499999999999999e-06,
+      "loss": 2.4248,
+      "step": 15
+    },
+    {
+      "epoch": 0.41237113402061853,
+      "grad_norm": 1.6171875,
+      "learning_rate": 1.6666666666666667e-06,
+      "loss": 2.4622,
+      "step": 20
+    },
+    {
+      "epoch": 0.5154639175257731,
+      "grad_norm": 1.4921875,
+      "learning_rate": 1.9998942319271077e-06,
+      "loss": 2.4147,
+      "step": 25
+    },
+    {
+      "epoch": 0.6185567010309279,
+      "grad_norm": 1.46875,
+      "learning_rate": 1.9961946980917456e-06,
+      "loss": 2.3747,
+      "step": 30
+    },
+    {
+      "epoch": 0.7216494845360825,
+      "grad_norm": 1.453125,
+      "learning_rate": 1.987229113117374e-06,
+      "loss": 2.4189,
+      "step": 35
+    },
+    {
+      "epoch": 0.8247422680412371,
+      "grad_norm": 1.578125,
+      "learning_rate": 1.9730448705798236e-06,
+      "loss": 2.4126,
+      "step": 40
+    },
+    {
+      "epoch": 0.9278350515463918,
+      "grad_norm": 1.4765625,
+      "learning_rate": 1.953716950748227e-06,
+      "loss": 2.3912,
+      "step": 45
+    },
+    {
+      "epoch": 0.9896907216494846,
+      "eval_loss": 2.246396064758301,
+      "eval_runtime": 1335.3788,
+      "eval_samples_per_second": 4.423,
+      "eval_steps_per_second": 0.553,
+      "step": 48
+    },
+    {
+      "epoch": 1.0309278350515463,
+      "grad_norm": 1.4453125,
+      "learning_rate": 1.929347524226822e-06,
+      "loss": 2.3653,
+      "step": 50
+    },
+    {
+      "epoch": 1.134020618556701,
+      "grad_norm": 1.546875,
+      "learning_rate": 1.900065411864121e-06,
+      "loss": 2.3567,
+      "step": 55
+    },
+    {
+      "epoch": 1.2371134020618557,
+      "grad_norm": 1.5,
+      "learning_rate": 1.8660254037844386e-06,
+      "loss": 2.3555,
+      "step": 60
+    },
+    {
+      "epoch": 1.3402061855670104,
+      "grad_norm": 1.6015625,
+      "learning_rate": 1.8274074411415103e-06,
+      "loss": 2.2988,
+      "step": 65
+    },
+    {
+      "epoch": 1.443298969072165,
+      "grad_norm": 1.53125,
+      "learning_rate": 1.7844156649195757e-06,
+      "loss": 2.288,
+      "step": 70
+    },
+    {
+      "epoch": 1.5463917525773194,
+      "grad_norm": 1.3671875,
+      "learning_rate": 1.737277336810124e-06,
+      "loss": 2.2871,
+      "step": 75
+    },
+    {
+      "epoch": 1.6494845360824741,
+      "grad_norm": 1.59375,
+      "learning_rate": 1.6862416378687337e-06,
+      "loss": 2.2903,
+      "step": 80
+    },
+    {
+      "epoch": 1.7525773195876289,
+      "grad_norm": 1.484375,
+      "learning_rate": 1.6315783513024974e-06,
+      "loss": 2.2465,
+      "step": 85
+    },
+    {
+      "epoch": 1.8556701030927836,
+      "grad_norm": 1.484375,
+      "learning_rate": 1.573576436351046e-06,
+      "loss": 2.2434,
+      "step": 90
+    },
+    {
+      "epoch": 1.9587628865979383,
+      "grad_norm": 1.421875,
+      "learning_rate": 1.5125425007998652e-06,
+      "loss": 2.2442,
+      "step": 95
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 2.1167430877685547,
+      "eval_runtime": 1327.822,
+      "eval_samples_per_second": 4.449,
+      "eval_steps_per_second": 0.557,
+      "step": 97
+    },
+    {
+      "epoch": 2.0618556701030926,
+      "grad_norm": 1.4765625,
+      "learning_rate": 1.4487991802004622e-06,
+      "loss": 2.2931,
+      "step": 100
+    },
+    {
+      "epoch": 2.1649484536082473,
+      "grad_norm": 1.390625,
+      "learning_rate": 1.3826834323650898e-06,
+      "loss": 2.2062,
+      "step": 105
+    },
+    {
+      "epoch": 2.268041237113402,
+      "grad_norm": 1.4921875,
+      "learning_rate": 1.3145447561516136e-06,
+      "loss": 2.1501,
+      "step": 110
+    },
+    {
+      "epoch": 2.3711340206185567,
+      "grad_norm": 1.1875,
+      "learning_rate": 1.2447433439543238e-06,
+      "loss": 2.1248,
+      "step": 115
+    },
+    {
+      "epoch": 2.4742268041237114,
+      "grad_norm": 1.3984375,
+      "learning_rate": 1.1736481776669305e-06,
+      "loss": 2.1624,
+      "step": 120
+    },
+    {
+      "epoch": 2.5773195876288657,
+      "grad_norm": 1.2265625,
+      "learning_rate": 1.101635078182802e-06,
+      "loss": 2.1479,
+      "step": 125
+    },
+    {
+      "epoch": 2.680412371134021,
+      "grad_norm": 1.3359375,
+      "learning_rate": 1.0290847187431114e-06,
+      "loss": 2.1655,
+      "step": 130
+    },
+    {
+      "epoch": 2.783505154639175,
+      "grad_norm": 1.3203125,
+      "learning_rate": 9.56380612634664e-07,
+      "loss": 2.1369,
+      "step": 135
+    },
+    {
+      "epoch": 2.88659793814433,
+      "grad_norm": 1.28125,
+      "learning_rate": 8.839070858747696e-07,
+      "loss": 2.1003,
+      "step": 140
+    },
+    {
+      "epoch": 2.9896907216494846,
+      "grad_norm": 1.2109375,
+      "learning_rate": 8.120472455998881e-07,
+      "loss": 2.1047,
+      "step": 145
+    },
+    {
+      "epoch": 2.9896907216494846,
+      "eval_loss": 2.0316832065582275,
+      "eval_runtime": 1327.2975,
+      "eval_samples_per_second": 4.45,
+      "eval_steps_per_second": 0.557,
+      "step": 145
+    },
+    {
+      "epoch": 3.0927835051546393,
+      "grad_norm": 1.25,
+      "learning_rate": 7.411809548974791e-07,
+      "loss": 2.0675,
+      "step": 150
+    },
+    {
+      "epoch": 3.195876288659794,
+      "grad_norm": 1.3125,
+      "learning_rate": 6.71682824786439e-07,
+      "loss": 2.0934,
+      "step": 155
+    },
+    {
+      "epoch": 3.2989690721649483,
+      "grad_norm": 1.0234375,
+      "learning_rate": 6.039202339608431e-07,
+      "loss": 2.081,
+      "step": 160
+    },
+    {
+      "epoch": 3.402061855670103,
+      "grad_norm": 1.0625,
+      "learning_rate": 5.382513867649663e-07,
+      "loss": 2.0219,
+      "step": 165
+    },
+    {
+      "epoch": 3.5051546391752577,
+      "grad_norm": 1.3203125,
+      "learning_rate": 4.750234196654399e-07,
+      "loss": 2.0877,
+      "step": 170
+    },
+    {
+      "epoch": 3.6082474226804124,
+      "grad_norm": 1.2890625,
+      "learning_rate": 4.1457056623005947e-07,
+      "loss": 2.1019,
+      "step": 175
+    },
+    {
+      "epoch": 3.711340206185567,
+      "grad_norm": 1.1328125,
+      "learning_rate": 3.5721239031346063e-07,
+      "loss": 2.0828,
+      "step": 180
+    },
+    {
+      "epoch": 3.8144329896907214,
+      "grad_norm": 1.2421875,
+      "learning_rate": 3.032520967893453e-07,
+      "loss": 2.09,
+      "step": 185
+    },
+    {
+      "epoch": 3.917525773195876,
+      "grad_norm": 1.3125,
+      "learning_rate": 2.5297492875900415e-07,
+      "loss": 2.05,
+      "step": 190
+    },
+    {
+      "epoch": 4.0,
+      "eval_loss": 2.0067081451416016,
+      "eval_runtime": 1327.277,
+      "eval_samples_per_second": 4.45,
+      "eval_steps_per_second": 0.557,
+      "step": 194
+    },
+    {
+      "epoch": 4.020618556701031,
+      "grad_norm": 1.1796875,
+      "learning_rate": 2.0664665970876495e-07,
+      "loss": 2.1246,
+      "step": 195
+    },
+    {
+      "epoch": 4.123711340206185,
+      "grad_norm": 1.1796875,
+      "learning_rate": 1.6451218858706372e-07,
+      "loss": 2.0933,
+      "step": 200
+    },
+    {
+      "epoch": 4.22680412371134,
+      "grad_norm": 1.1328125,
+      "learning_rate": 1.2679424522780425e-07,
+      "loss": 2.0561,
+      "step": 205
+    },
+    {
+      "epoch": 4.329896907216495,
+      "grad_norm": 1.1640625,
+      "learning_rate": 9.369221296335006e-08,
+      "loss": 2.0946,
+      "step": 210
+    },
+    {
+      "epoch": 4.43298969072165,
+      "grad_norm": 1.125,
+      "learning_rate": 6.538107465101162e-08,
+      "loss": 2.0797,
+      "step": 215
+    },
+    {
+      "epoch": 4.536082474226804,
+      "grad_norm": 1.125,
+      "learning_rate": 4.20104876845111e-08,
+      "loss": 2.0907,
+      "step": 220
+    },
+    {
+      "epoch": 4.639175257731958,
+      "grad_norm": 1.171875,
+      "learning_rate": 2.3703992880066636e-08,
+      "loss": 2.1295,
+      "step": 225
+    },
+    {
+      "epoch": 4.742268041237113,
+      "grad_norm": 1.1640625,
+      "learning_rate": 1.0558361419055529e-08,
+      "loss": 2.0247,
+      "step": 230
+    },
+    {
+      "epoch": 4.845360824742268,
+      "grad_norm": 1.046875,
+      "learning_rate": 2.643083299427751e-09,
+      "loss": 2.0658,
+      "step": 235
+    },
+    {
+      "epoch": 4.948453608247423,
+      "grad_norm": 1.046875,
+      "learning_rate": 0.0,
+      "loss": 2.0626,
+      "step": 240
+    },
+    {
+      "epoch": 4.948453608247423,
+      "eval_loss": 2.0068044662475586,
+      "eval_runtime": 1327.4294,
+      "eval_samples_per_second": 4.45,
+      "eval_steps_per_second": 0.557,
+      "step": 240
+    },
+    {
+      "epoch": 4.948453608247423,
+      "step": 240,
+      "total_flos": 1.0715672433026662e+17,
+      "train_loss": 2.205865615606308,
+      "train_runtime": 7479.3037,
+      "train_samples_per_second": 0.257,
+      "train_steps_per_second": 0.032
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 240,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 5,
+  "save_steps": 1,
+  "total_flos": 1.0715672433026662e+17,
+  "train_batch_size": 4,
+  "trial_name": null,
+  "trial_params": null
+}