End of training

Browse files

Files changed (8) hide show

README.md +198 -0
adapter_config.json +26 -0
adapter_model.safetensors +3 -0
special_tokens_map.json +24 -0
tokenizer.json +0 -0
tokenizer.model +3 -0
tokenizer_config.json +42 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,198 @@

+---
+license: apache-2.0
+library_name: peft
+tags:
+- trl
+- sft
+- generated_from_trainer
+base_model: TheBloke/Mistral-7B-v0.1-GPTQ
+model-index:
+- name: mistral-augmentation-digikey-rand
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# mistral-augmentation-digikey-rand
+This model is a fine-tuned version of [TheBloke/Mistral-7B-v0.1-GPTQ](https://huggingface.co/TheBloke/Mistral-7B-v0.1-GPTQ) on the None dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.4955
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 2e-05
+- train_batch_size: 4
+- eval_batch_size: 8
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- num_epochs: 15
+- mixed_precision_training: Native AMP
+### Training results
+| Training Loss | Epoch | Step | Validation Loss |
+|:-------------:|:-----:|:----:|:---------------:|
+| 2.0496        | 0.01  | 50   | 1.1570          |
+| 0.9361        | 0.03  | 100  | 0.8592          |
+| 0.7691        | 0.04  | 150  | 0.7989          |
+| 0.7555        | 0.06  | 200  | 0.7768          |
+| 0.7213        | 0.07  | 250  | 0.7575          |
+| 0.6993        | 0.09  | 300  | 0.7440          |
+| 0.6905        | 0.1   | 350  | 0.7291          |
+| 0.6855        | 0.12  | 400  | 0.7210          |
+| 0.6732        | 0.13  | 450  | 0.7076          |
+| 0.6516        | 0.15  | 500  | 0.7005          |
+| 0.639         | 0.16  | 550  | 0.6920          |
+| 0.6322        | 0.18  | 600  | 0.6829          |
+| 0.6164        | 0.19  | 650  | 0.6755          |
+| 0.6185        | 0.21  | 700  | 0.6704          |
+| 0.6457        | 0.22  | 750  | 0.6667          |
+| 0.6238        | 0.24  | 800  | 0.6630          |
+| 0.6173        | 0.25  | 850  | 0.6570          |
+| 0.6076        | 0.27  | 900  | 0.6562          |
+| 0.6097        | 0.28  | 950  | 0.6493          |
+| 0.5693        | 0.3   | 1000 | 0.6423          |
+| 0.5887        | 0.31  | 1050 | 0.6404          |
+| 0.5869        | 0.33  | 1100 | 0.6361          |
+| 0.5964        | 0.34  | 1150 | 0.6341          |
+| 0.5373        | 0.36  | 1200 | 0.6281          |
+| 0.5684        | 0.37  | 1250 | 0.6277          |
+| 0.5746        | 0.39  | 1300 | 0.6183          |
+| 0.5703        | 0.4   | 1350 | 0.6221          |
+| 0.5851        | 0.42  | 1400 | 0.6175          |
+| 0.5519        | 0.43  | 1450 | 0.6167          |
+| 0.5716        | 0.45  | 1500 | 0.6115          |
+| 0.552         | 0.46  | 1550 | 0.6095          |
+| 0.5885        | 0.47  | 1600 | 0.6100          |
+| 0.5739        | 0.49  | 1650 | 0.6061          |
+| 0.5598        | 0.5   | 1700 | 0.6061          |
+| 0.5729        | 0.52  | 1750 | 0.6011          |
+| 0.5575        | 0.53  | 1800 | 0.6013          |
+| 0.5418        | 0.55  | 1850 | 0.6003          |
+| 0.5365        | 0.56  | 1900 | 0.5940          |
+| 0.5096        | 0.58  | 1950 | 0.5878          |
+| 0.5458        | 0.59  | 2000 | 0.5878          |
+| 0.5603        | 0.61  | 2050 | 0.5863          |
+| 0.5388        | 0.62  | 2100 | 0.5854          |
+| 0.5187        | 0.64  | 2150 | 0.5789          |
+| 0.5402        | 0.65  | 2200 | 0.5809          |
+| 0.5398        | 0.67  | 2250 | 0.5761          |
+| 0.5123        | 0.68  | 2300 | 0.5751          |
+| 0.4936        | 0.7   | 2350 | 0.5712          |
+| 0.4899        | 0.71  | 2400 | 0.5672          |
+| 0.5197        | 0.73  | 2450 | 0.5627          |
+| 0.509         | 0.74  | 2500 | 0.5574          |
+| 0.4963        | 0.76  | 2550 | 0.5560          |
+| 0.4989        | 0.77  | 2600 | 0.5544          |
+| 0.4809        | 0.79  | 2650 | 0.5526          |
+| 0.49          | 0.8   | 2700 | 0.5473          |
+| 0.5151        | 0.82  | 2750 | 0.5485          |
+| 0.5005        | 0.83  | 2800 | 0.5469          |
+| 0.5072        | 0.85  | 2850 | 0.5466          |
+| 0.5008        | 0.86  | 2900 | 0.5464          |
+| 0.4857        | 0.88  | 2950 | 0.5441          |
+| 0.4889        | 0.89  | 3000 | 0.5429          |
+| 0.4714        | 0.91  | 3050 | 0.5441          |
+| 0.4618        | 0.92  | 3100 | 0.5404          |
+| 0.4623        | 0.93  | 3150 | 0.5418          |
+| 0.4771        | 0.95  | 3200 | 0.5396          |
+| 0.4592        | 0.96  | 3250 | 0.5409          |
+| 0.4783        | 0.98  | 3300 | 0.5373          |
+| 0.5021        | 0.99  | 3350 | 0.5343          |
+| 0.4753        | 1.01  | 3400 | 0.5350          |
+| 0.4369        | 1.02  | 3450 | 0.5338          |
+| 0.4651        | 1.04  | 3500 | 0.5318          |
+| 0.4395        | 1.05  | 3550 | 0.5320          |
+| 0.4771        | 1.07  | 3600 | 0.5311          |
+| 0.4659        | 1.08  | 3650 | 0.5337          |
+| 0.4699        | 1.1   | 3700 | 0.5309          |
+| 0.4717        | 1.11  | 3750 | 0.5301          |
+| 0.4445        | 1.13  | 3800 | 0.5282          |
+| 0.4342        | 1.14  | 3850 | 0.5303          |
+| 0.4599        | 1.16  | 3900 | 0.5266          |
+| 0.4442        | 1.17  | 3950 | 0.5275          |
+| 0.4628        | 1.19  | 4000 | 0.5260          |
+| 0.4339        | 1.2   | 4050 | 0.5243          |
+| 0.4577        | 1.22  | 4100 | 0.5283          |
+| 0.463         | 1.23  | 4150 | 0.5253          |
+| 0.4602        | 1.25  | 4200 | 0.5243          |
+| 0.4411        | 1.26  | 4250 | 0.5255          |
+| 0.4542        | 1.28  | 4300 | 0.5263          |
+| 0.4379        | 1.29  | 4350 | 0.5213          |
+| 0.4471        | 1.31  | 4400 | 0.5189          |
+| 0.4372        | 1.32  | 4450 | 0.5236          |
+| 0.4526        | 1.34  | 4500 | 0.5203          |
+| 0.4504        | 1.35  | 4550 | 0.5198          |
+| 0.4708        | 1.36  | 4600 | 0.5171          |
+| 0.4748        | 1.38  | 4650 | 0.5177          |
+| 0.4511        | 1.39  | 4700 | 0.5152          |
+| 0.4758        | 1.41  | 4750 | 0.5179          |
+| 0.4543        | 1.42  | 4800 | 0.5165          |
+| 0.4506        | 1.44  | 4850 | 0.5167          |
+| 0.44          | 1.45  | 4900 | 0.5152          |
+| 0.4443        | 1.47  | 4950 | 0.5135          |
+| 0.4538        | 1.48  | 5000 | 0.5140          |
+| 0.435         | 1.5   | 5050 | 0.5142          |
+| 0.439         | 1.51  | 5100 | 0.5135          |
+| 0.4408        | 1.53  | 5150 | 0.5121          |
+| 0.4532        | 1.54  | 5200 | 0.5137          |
+| 0.4177        | 1.56  | 5250 | 0.5143          |
+| 0.4434        | 1.57  | 5300 | 0.5139          |
+| 0.4395        | 1.59  | 5350 | 0.5117          |
+| 0.4327        | 1.6   | 5400 | 0.5124          |
+| 0.4257        | 1.62  | 5450 | 0.5128          |
+| 0.4225        | 1.63  | 5500 | 0.5106          |
+| 0.4517        | 1.65  | 5550 | 0.5119          |
+| 0.4632        | 1.66  | 5600 | 0.5076          |
+| 0.4371        | 1.68  | 5650 | 0.5110          |
+| 0.4209        | 1.69  | 5700 | 0.5082          |
+| 0.4336        | 1.71  | 5750 | 0.5072          |
+| 0.4269        | 1.72  | 5800 | 0.5125          |
+| 0.4208        | 1.74  | 5850 | 0.5105          |
+| 0.4334        | 1.75  | 5900 | 0.5074          |
+| 0.4306        | 1.77  | 5950 | 0.5052          |
+| 0.4454        | 1.78  | 6000 | 0.5073          |
+| 0.4227        | 1.8   | 6050 | 0.5068          |
+| 0.4467        | 1.81  | 6100 | 0.5041          |
+| 0.4279        | 1.82  | 6150 | 0.5034          |
+| 0.4368        | 1.84  | 6200 | 0.5021          |
+| 0.4205        | 1.85  | 6250 | 0.5025          |
+| 0.415         | 1.87  | 6300 | 0.5029          |
+| 0.4213        | 1.88  | 6350 | 0.5019          |
+| 0.4316        | 1.9   | 6400 | 0.5053          |
+| 0.4065        | 1.91  | 6450 | 0.5004          |
+| 0.4578        | 1.93  | 6500 | 0.5045          |
+| 0.4479        | 1.94  | 6550 | 0.4998          |
+| 0.43          | 1.96  | 6600 | 0.4947          |
+| 0.4192        | 1.97  | 6650 | 0.4967          |
+| 0.4061        | 1.99  | 6700 | 0.4961          |
+| 0.4309        | 2.0   | 6750 | 0.4960          |
+| 0.4118        | 2.02  | 6800 | 0.4979          |
+| 0.4149        | 2.03  | 6850 | 0.4955          |
+### Framework versions
+- PEFT 0.7.1
+- Transformers 4.36.2
+- Pytorch 2.1.2+cu121
+- Datasets 2.16.1
+- Tokenizers 0.15.0

adapter_config.json ADDED Viewed

	@@ -0,0 +1,26 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "TheBloke/Mistral-7B-v0.1-GPTQ",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "v_proj",
+    "q_proj"
+  ],
+  "task_type": "CAUSAL_LM"
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:a7dc22961a5d3a9f709d8b26f991a3eae9f99098dafe1c2e41146c97e174cf96
+size 54543184

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,24 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": "</s>",
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
+size 493443

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [],
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "</s>",
+  "legacy": true,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "</s>",
+  "sp_model_kwargs": {},
+  "spaces_between_special_tokens": false,
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": "<unk>",
+  "use_default_system_prompt": true
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3dc55b8a56e7218960643b8b6ea9be206eec7582f02772f5bf2f83638088886a
+size 4728