thewimo
/

Mistral-7B-v0.3-deide-lora-adapter-v0.1

@@ -2,10 +2,11 @@
 license: apache-2.0
 library_name: peft
 tags:
 - generated_from_trainer
 base_model: mistralai/Mistral-7B-v0.3
 model-index:
-- name: outputs/lora-out
   results: []
 ---
@@ -26,20 +27,21 @@ load_in_4bit: false
 strict: false
 datasets:
-  - path: mhenrichsen/alpaca_2k_test
     type: alpaca
 dataset_prepared_path: last_run_prepared
-val_set_size: 0.1
 output_dir: ./outputs/lora-out
 adapter: lora
 lora_model_dir:
-sequence_len: 8192
 sample_packing: false
 pad_to_sequence_len: true
-lora_r: 32
 lora_alpha: 16
 lora_dropout: 0.05
 lora_target_linear: true
@@ -56,13 +58,13 @@ lora_target_modules:
 wandb_project: axolotl-runs
 wandb_entity: thewind-mom-finetuning
 wandb_watch:
-wandb_name: Mistral-7B-v0.3-alpaca_2k_test
 wandb_log_model:
 gradient_accumulation_steps: 4
-micro_batch_size: 2
-num_epochs: 1
-optimizer: adamw_bnb_8bit
 lr_scheduler: cosine
 learning_rate: 0.0002
@@ -99,11 +101,11 @@ special_tokens:
 </details><br>
-# outputs/lora-out
 This model is a fine-tuned version of [mistralai/Mistral-7B-v0.3](https://huggingface.co/mistralai/Mistral-7B-v0.3) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.8745
 ## Model description
@@ -123,27 +125,39 @@ More information needed
 The following hyperparameters were used during training:
 - learning_rate: 0.0002
-- train_batch_size: 2
-- eval_batch_size: 2
 - seed: 42
 - distributed_type: multi-GPU
 - num_devices: 3
 - gradient_accumulation_steps: 4
-- total_train_batch_size: 24
-- total_eval_batch_size: 6
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_steps: 10
-- num_epochs: 1
 ### Training results
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
-| 1.2007        | 0.0133 | 1    | 1.1165          |
-| 0.9054        | 0.2533 | 19   | 0.8901          |
-| 0.8991        | 0.5067 | 38   | 0.8844          |
-| 0.7794        | 0.76   | 57   | 0.8745          |
 ### Framework versions

 license: apache-2.0
 library_name: peft
 tags:
+- axolotl
 - generated_from_trainer
 base_model: mistralai/Mistral-7B-v0.3
 model-index:
+- name: Mistral-7B-v0.3-deide-phi
   results: []
 ---
 strict: false
 datasets:
+  - path: thewimo/german-medical-identification-dataset-v0.1
     type: alpaca
 dataset_prepared_path: last_run_prepared
+val_set_size: 0.2
 output_dir: ./outputs/lora-out
+hub_model_id: thewimo/Mistral-7B-v0.3-deide-phi
 adapter: lora
 lora_model_dir:
+sequence_len: 4096
 sample_packing: false
 pad_to_sequence_len: true
+lora_r: 8
 lora_alpha: 16
 lora_dropout: 0.05
 lora_target_linear: true
 wandb_project: axolotl-runs
 wandb_entity: thewind-mom-finetuning
 wandb_watch:
+wandb_name: Mistral-7B-v0.3-deide-phi
 wandb_log_model:
 gradient_accumulation_steps: 4
+micro_batch_size: 4
+num_epochs: 4
+optimizer: paged_adamw_8bit
 lr_scheduler: cosine
 learning_rate: 0.0002
 </details><br>
+# Mistral-7B-v0.3-deide-phi
 This model is a fine-tuned version of [mistralai/Mistral-7B-v0.3](https://huggingface.co/mistralai/Mistral-7B-v0.3) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.0364
 ## Model description
 The following hyperparameters were used during training:
 - learning_rate: 0.0002
+- train_batch_size: 4
+- eval_batch_size: 4
 - seed: 42
 - distributed_type: multi-GPU
 - num_devices: 3
 - gradient_accumulation_steps: 4
+- total_train_batch_size: 48
+- total_eval_batch_size: 12
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_steps: 10
+- num_epochs: 4
 ### Training results
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
+| 1.9682        | 0.0506 | 1    | 2.0579          |
+| 1.2784        | 0.2532 | 5    | 0.8308          |
+| 0.187         | 0.5063 | 10   | 0.1732          |
+| 0.1094        | 0.7595 | 15   | 0.0819          |
+| 0.0542        | 1.0127 | 20   | 0.0593          |
+| 0.0354        | 1.2658 | 25   | 0.0521          |
+| 0.0493        | 1.5190 | 30   | 0.0457          |
+| 0.038         | 1.7722 | 35   | 0.0432          |
+| 0.0143        | 2.0253 | 40   | 0.0425          |
+| 0.0269        | 2.2785 | 45   | 0.0423          |
+| 0.0273        | 2.5316 | 50   | 0.0415          |
+| 0.0277        | 2.7848 | 55   | 0.0366          |
+| 0.0288        | 3.0380 | 60   | 0.0356          |
+| 0.0241        | 3.2911 | 65   | 0.0358          |
+| 0.0125        | 3.5443 | 70   | 0.0362          |
+| 0.0164        | 3.7975 | 75   | 0.0364          |
 ### Framework versions

adapter_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:91deab91f9cc247d51b6c430aade09543217a6bae87899c2827c525f6683a04a
 size 84047370

 version https://git-lfs.github.com/spec/v1
+oid sha256:e59e28c76ba9ce43e616119ed0176fa02161210dc902ac5f0cc375cba8e2d60b
 size 84047370