Model save

Browse files

Files changed (8) hide show

README.md +99 -0
adapter_model.safetensors +1 -1
all_results.json +21 -0
eval_results.json +16 -0
runs/May04_01-21-35_gpu4-119-5/events.out.tfevents.1714749979.gpu4-119-5.212832.0 +2 -2
runs/May04_01-21-35_gpu4-119-5/events.out.tfevents.1714828985.gpu4-119-5.212832.1 +3 -0
train_results.json +8 -0
trainer_state.json +0 -0

README.md ADDED Viewed

	@@ -0,0 +1,99 @@

+---
+license: apache-2.0
+library_name: peft
+tags:
+- trl
+- dpo
+- generated_from_trainer
+base_model: mistralai/Mistral-7B-v0.1
+model-index:
+- name: zephyr-7b-gpo-log-i1
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# zephyr-7b-gpo-log-i1
+This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.7084
+- Rewards/chosen: -0.3387
+- Rewards/rejected: -0.3762
+- Rewards/accuracies: 0.4641
+- Rewards/margins: 0.0375
+- Logps/rejected: -284.1953
+- Logps/chosen: -296.7821
+- Logits/rejected: -1.6524
+- Logits/chosen: -1.8037
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 5e-06
+- train_batch_size: 2
+- eval_batch_size: 2
+- seed: 42
+- distributed_type: multi-GPU
+- num_devices: 3
+- gradient_accumulation_steps: 2
+- total_train_batch_size: 12
+- total_eval_batch_size: 6
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: cosine
+- lr_scheduler_warmup_ratio: 0.1
+- num_epochs: 1
+### Training results
+| Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
+|:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
+| 0.6748        | 0.04  | 200  | 0.7007          | -0.3675        | -0.3814          | 0.4446             | 0.0139          | -284.7155      | -299.6654    | -1.8001         | -1.9625       |
+| 0.6724        | 0.08  | 400  | 0.7027          | -0.3184        | -0.3527          | 0.4940             | 0.0344          | -281.8482      | -294.7475    | -1.7890         | -1.9524       |
+| 0.6749        | 0.12  | 600  | 0.7100          | -0.3255        | -0.3594          | 0.4760             | 0.0339          | -282.5139      | -295.4615    | -1.6820         | -1.8358       |
+| 0.6719        | 0.16  | 800  | 0.7050          | -0.3022        | -0.3372          | 0.4775             | 0.0350          | -280.2988      | -293.1357    | -1.7259         | -1.8834       |
+| 0.6777        | 0.2   | 1000 | 0.7025          | -0.2948        | -0.3142          | 0.4461             | 0.0194          | -277.9926      | -292.3886    | -1.7123         | -1.8681       |
+| 0.6724        | 0.24  | 1200 | 0.7089          | -0.4249        | -0.4720          | 0.4865             | 0.0471          | -293.7763      | -305.4027    | -1.7346         | -1.8939       |
+| 0.6763        | 0.28  | 1400 | 0.7065          | -0.3751        | -0.4179          | 0.4746             | 0.0428          | -288.3666      | -300.4254    | -1.6995         | -1.8560       |
+| 0.6729        | 0.32  | 1600 | 0.7084          | -0.3379        | -0.3600          | 0.4641             | 0.0221          | -282.5755      | -296.7008    | -1.7340         | -1.8920       |
+| 0.6734        | 0.36  | 1800 | 0.7037          | -0.3077        | -0.3258          | 0.4521             | 0.0182          | -279.1587      | -293.6775    | -1.7089         | -1.8649       |
+| 0.6754        | 0.4   | 2000 | 0.7073          | -0.4076        | -0.4418          | 0.4671             | 0.0342          | -290.7584      | -303.6719    | -1.7361         | -1.8949       |
+| 0.679         | 0.44  | 2200 | 0.7075          | -0.4434        | -0.4787          | 0.4611             | 0.0353          | -294.4463      | -307.2497    | -1.6814         | -1.8362       |
+| 0.6692        | 0.48  | 2400 | 0.7067          | -0.3067        | -0.3478          | 0.4716             | 0.0411          | -281.3559      | -293.5765    | -1.6761         | -1.8305       |
+| 0.6778        | 0.52  | 2600 | 0.7036          | -0.2610        | -0.2905          | 0.4626             | 0.0294          | -275.6222      | -289.0128    | -1.7120         | -1.8687       |
+| 0.6687        | 0.56  | 2800 | 0.7113          | -0.4071        | -0.4423          | 0.4626             | 0.0353          | -290.8080      | -303.6171    | -1.6930         | -1.8484       |
+| 0.6741        | 0.6   | 3000 | 0.7067          | -0.3261        | -0.3614          | 0.4671             | 0.0354          | -282.7206      | -295.5167    | -1.6692         | -1.8222       |
+| 0.674         | 0.64  | 3200 | 0.7085          | -0.3171        | -0.3556          | 0.4716             | 0.0384          | -282.1313      | -294.6258    | -1.6840         | -1.8385       |
+| 0.6712        | 0.68  | 3400 | 0.7083          | -0.3545        | -0.3873          | 0.4626             | 0.0329          | -285.3080      | -298.3568    | -1.6600         | -1.8125       |
+| 0.6738        | 0.72  | 3600 | 0.7078          | -0.4016        | -0.4475          | 0.4805             | 0.0458          | -291.3219      | -303.0744    | -1.6368         | -1.7870       |
+| 0.6748        | 0.76  | 3800 | 0.7085          | -0.3558        | -0.4037          | 0.4746             | 0.0478          | -286.9418      | -298.4960    | -1.6370         | -1.7875       |
+| 0.6746        | 0.8   | 4000 | 0.7097          | -0.3549        | -0.3943          | 0.4641             | 0.0394          | -286.0046      | -298.4026    | -1.6465         | -1.7977       |
+| 0.6772        | 0.84  | 4200 | 0.7088          | -0.3280        | -0.3650          | 0.4611             | 0.0369          | -283.0742      | -295.7155    | -1.6640         | -1.8161       |
+| 0.6718        | 0.88  | 4400 | 0.7082          | -0.3267        | -0.3617          | 0.4566             | 0.0349          | -282.7410      | -295.5824    | -1.6550         | -1.8062       |
+| 0.6737        | 0.92  | 4600 | 0.7085          | -0.3416        | -0.3797          | 0.4656             | 0.0381          | -284.5475      | -297.0699    | -1.6499         | -1.8009       |
+| 0.6742        | 0.96  | 4800 | 0.7085          | -0.3387        | -0.3765          | 0.4716             | 0.0378          | -284.2217      | -296.7780    | -1.6508         | -1.8018       |
+| 0.6708        | 1.0   | 5000 | 0.7084          | -0.3387        | -0.3762          | 0.4641             | 0.0375          | -284.1953      | -296.7821    | -1.6524         | -1.8037       |
+### Framework versions
+- PEFT 0.7.1
+- Transformers 4.36.2
+- Pytorch 2.1.2+cu121
+- Datasets 2.14.6
+- Tokenizers 0.15.2

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b3980d740439c750013bcde4745eecbd74467bfa11df4f8ea0883e84e5684112
 size 671150064

 version https://git-lfs.github.com/spec/v1
+oid sha256:819b2e55dd9b5d7aa5af5aa8776a9f0adf948c971a88ce70987c4ff86418bf06
 size 671150064

all_results.json ADDED Viewed

	@@ -0,0 +1,21 @@

+{
+    "epoch": 1.0,
+    "eval_logits/chosen": -1.803659439086914,
+    "eval_logits/rejected": -1.6524428129196167,
+    "eval_logps/chosen": -296.7821350097656,
+    "eval_logps/rejected": -284.1953430175781,
+    "eval_loss": 0.7084454298019409,
+    "eval_rewards/accuracies": 0.4640718698501587,
+    "eval_rewards/chosen": -0.33870866894721985,
+    "eval_rewards/margins": 0.03748469054698944,
+    "eval_rewards/rejected": -0.3761933445930481,
+    "eval_runtime": 1205.3044,
+    "eval_samples": 2000,
+    "eval_samples_per_second": 1.659,
+    "eval_steps_per_second": 0.277,
+    "train_loss": 0.6743102419853211,
+    "train_runtime": 77754.1777,
+    "train_samples": 61135,
+    "train_samples_per_second": 0.772,
+    "train_steps_per_second": 0.064
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+    "epoch": 1.0,
+    "eval_logits/chosen": -1.803659439086914,
+    "eval_logits/rejected": -1.6524428129196167,
+    "eval_logps/chosen": -296.7821350097656,
+    "eval_logps/rejected": -284.1953430175781,
+    "eval_loss": 0.7084454298019409,
+    "eval_rewards/accuracies": 0.4640718698501587,
+    "eval_rewards/chosen": -0.33870866894721985,
+    "eval_rewards/margins": 0.03748469054698944,
+    "eval_rewards/rejected": -0.3761933445930481,
+    "eval_runtime": 1205.3044,
+    "eval_samples": 2000,
+    "eval_samples_per_second": 1.659,
+    "eval_steps_per_second": 0.277
+}

runs/May04_01-21-35_gpu4-119-5/events.out.tfevents.1714749979.gpu4-119-5.212832.0 CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0f9a192422a8d28da938221a36b327f7666f9228c642077f18fe09815fd31236
-size 327132

 version https://git-lfs.github.com/spec/v1
+oid sha256:e25fce9a8a311ed55454e814c7e3e27022d5d8d21210daffb9997618102df4c1
+size 340906

runs/May04_01-21-35_gpu4-119-5/events.out.tfevents.1714828985.gpu4-119-5.212832.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:292695d44bcbe56b010f806d8f2140b5ce7ffad684e145fdf03103ede92faf0f
+size 828

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 1.0,
+    "train_loss": 0.6743102419853211,
+    "train_runtime": 77754.1777,
+    "train_samples": 61135,
+    "train_samples_per_second": 0.772,
+    "train_steps_per_second": 0.064
+}

trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff