Model save

Browse files

Files changed (10) hide show

README.md +33 -32
adapter_config.json +5 -5
adapter_model.safetensors +1 -1
all_results.json +4 -4
config.json +43 -0
runs/May10_09-48-15_poseidon/events.out.tfevents.1715359378.poseidon.2727122.1 +3 -0
runs/May10_17-24-35_poseidon/events.out.tfevents.1715361893.poseidon.2825808.0 +3 -0
train_results.json +4 -4
trainer_state.json +4 -4
training_args.bin +1 -1

README.md CHANGED Viewed

@@ -4,6 +4,7 @@ library_name: peft
 tags:
 - trl
 - dpo
 - generated_from_trainer
 base_model: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
 model-index:
@@ -18,15 +19,15 @@ should probably proofread and complete it, then remove this comment. -->
 This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T](https://huggingface.co/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T) on an unknown dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.6482
-- Rewards/chosen: -0.9538
-- Rewards/rejected: -1.1194
 - Rewards/accuracies: 0.6171
 - Rewards/margins: 0.1656
-- Logps/rejected: -187.0472
-- Logps/chosen: -166.7881
-- Logits/rejected: -3.0176
-- Logits/chosen: -3.0239
 ## Model description
@@ -93,32 +94,32 @@ The following hyperparameters were used during training:
 | 0.6252        | 0.52  | 3000 | -3.0824       | -3.0782         | -148.4267    | -166.3868      | 0.6505          | 0.6055             | -0.7702        | 0.1426          | -0.9128          |
 | 0.6082        | 0.53  | 3100 | -3.0723       | -3.0678         | -149.2047    | -167.4548      | 0.6500          | 0.6115             | -0.7779        | 0.1455          | -0.9235          |
 | 0.6072        | 0.55  | 3200 | -3.0863       | -3.0819         | -147.0810    | -164.9669      | 0.6499          | 0.6090             | -0.7567        | 0.1419          | -0.8986          |
-| 0.6142        | 0.57  | 3300 | 0.6468        | -1.0786         | -1.2549      | 0.6176         | 0.1764          | -200.5992          | -179.2665      | -3.0026         | -3.0087          |
-| 0.602         | 0.59  | 3400 | 0.6504        | -0.7890         | -0.9330      | 0.6136         | 0.1440          | -168.4087          | -150.3082      | -3.0624         | -3.0674          |
-| 0.605         | 0.6   | 3500 | 0.6497        | -0.8277         | -0.9780      | 0.6122         | 0.1503          | -172.9109          | -154.1790      | -3.0538         | -3.0590          |
-| 0.6263        | 0.62  | 3600 | 0.6508        | -0.7857         | -0.9297      | 0.6043         | 0.1440          | -168.0735          | -149.9757      | -3.0672         | -3.0721          |
-| 0.5961        | 0.64  | 3700 | 0.6492        | -0.9805         | -1.1426      | 0.6136         | 0.1622          | -189.3689          | -169.4567      | -3.0090         | -3.0151          |
-| 0.6273        | 0.65  | 3800 | 0.6494        | -0.9657         | -1.1255      | 0.6141         | 0.1598          | -187.6573          | -167.9805      | -3.0057         | -3.0117          |
-| 0.6183        | 0.67  | 3900 | 0.6488        | -0.9603         | -1.1217      | 0.6166         | 0.1613          | -187.2734          | -167.4417      | -3.0077         | -3.0137          |
-| 0.6051        | 0.69  | 4000 | 0.6482        | -1.0496         | -1.2202      | 0.6178         | 0.1705          | -197.1255          | -176.3739      | -2.9908         | -2.9974          |
-| 0.5867        | 0.71  | 4100 | 0.6484        | -0.9770         | -1.1429      | 0.6125         | 0.1659          | -189.3998          | -169.1084      | -3.0088         | -3.0151          |
-| 0.6554        | 0.72  | 4200 | 0.6489        | -0.9287         | -1.0891      | 0.6176         | 0.1604          | -184.0126          | -164.2755      | -3.0209         | -3.0270          |
-| 0.6053        | 0.74  | 4300 | 0.6489        | -0.8857         | -1.0434      | 0.6097         | 0.1577          | -179.4446          | -159.9774      | -3.0303         | -3.0362          |
-| 0.6153        | 0.76  | 4400 | 0.6489        | -0.8914         | -1.0502      | 0.6120         | 0.1588          | -180.1235          | -160.5470      | -3.0292         | -3.0351          |
-| 0.6145        | 0.78  | 4500 | 0.6490        | -0.8876         | -1.0457      | 0.6113         | 0.1580          | -179.6728          | -160.1720      | -3.0319         | -3.0378          |
-| 0.5798        | 0.79  | 4600 | 0.6488        | -0.9127         | -1.0736      | 0.6148         | 0.1609          | -182.4701          | -162.6813      | -3.0247         | -3.0308          |
-| 0.6218        | 0.81  | 4700 | 0.6486        | -0.9164         | -1.0784      | 0.6152         | 0.1620          | -182.9482          | -163.0493      | -3.0246         | -3.0307          |
-| 0.6102        | 0.83  | 4800 | 0.6484        | -0.9348         | -1.0987      | 0.6150         | 0.1639          | -184.9769          | -164.8939      | -3.0197         | -3.0259          |
-| 0.6176        | 0.84  | 4900 | 0.6483        | -0.9435         | -1.1084      | 0.6157         | 0.1649          | -185.9428          | -165.7554      | -3.0211         | -3.0273          |
-| 0.5907        | 0.86  | 5000 | 0.6482        | -0.9572         | -1.1236      | 0.6164         | 0.1664          | -187.4627          | -167.1301      | -3.0196         | -3.0259          |
-| 0.6534        | 0.88  | 5100 | 0.6481        | -0.9581         | -1.1246      | 0.6155         | 0.1665          | -187.5712          | -167.2241      | -3.0148         | -3.0211          |
-| 0.5973        | 0.9   | 5200 | 0.6483        | -0.9547         | -1.1206      | 0.6169         | 0.1659          | -187.1679          | -166.8823      | -3.0130         | -3.0194          |
-| 0.5975        | 0.91  | 5300 | 0.6482        | -0.9520         | -1.1177      | 0.6162         | 0.1657          | -186.8759          | -166.6118      | -3.0185         | -3.0248          |
-| 0.5986        | 0.93  | 5400 | 0.6483        | -0.9524         | -1.1179      | 0.6190         | 0.1655          | -186.8928          | -166.6502      | -3.0186         | -3.0249          |
-| 0.6025        | 0.95  | 5500 | 0.6483        | -0.9534         | -1.1189      | 0.6169         | 0.1655          | -186.9980          | -166.7467      | -3.0189         | -3.0252          |
-| 0.6149        | 0.96  | 5600 | 0.6480        | -0.9538         | -1.1201      | 0.6155         | 0.1663          | -187.1137          | -166.7859      | -3.0181         | -3.0244          |
-| 0.6275        | 0.98  | 5700 | 0.6482        | -0.9527         | -1.1184      | 0.6178         | 0.1657          | -186.9484          | -166.6791      | -3.0182         | -3.0245          |
-| 0.5876        | 1.0   | 5800 | 0.6482        | -0.9538         | -1.1194      | 0.6171         | 0.1656          | -187.0472          | -166.7881      | -3.0176         | -3.0239          |
 ### Framework versions

 tags:
 - trl
 - dpo
+- alignment-handbook
 - generated_from_trainer
 base_model: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
 model-index:
 This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T](https://huggingface.co/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T) on an unknown dataset.
 It achieves the following results on the evaluation set:
+- Logits/chosen: -3.0239
+- Logits/rejected: -3.0176
+- Logps/chosen: -166.7881
+- Logps/rejected: -187.0472
 - Loss: 0.6482
 - Rewards/accuracies: 0.6171
+- Rewards/chosen: -0.9538
 - Rewards/margins: 0.1656
+- Rewards/rejected: -1.1194
 ## Model description
 | 0.6252        | 0.52  | 3000 | -3.0824       | -3.0782         | -148.4267    | -166.3868      | 0.6505          | 0.6055             | -0.7702        | 0.1426          | -0.9128          |
 | 0.6082        | 0.53  | 3100 | -3.0723       | -3.0678         | -149.2047    | -167.4548      | 0.6500          | 0.6115             | -0.7779        | 0.1455          | -0.9235          |
 | 0.6072        | 0.55  | 3200 | -3.0863       | -3.0819         | -147.0810    | -164.9669      | 0.6499          | 0.6090             | -0.7567        | 0.1419          | -0.8986          |
+| 0.6142        | 0.57  | 3300 | -3.0087       | -3.0026         | -179.2665    | -200.5992      | 0.6468          | 0.6176             | -1.0786        | 0.1764          | -1.2549          |
+| 0.602         | 0.59  | 3400 | -3.0674       | -3.0624         | -150.3082    | -168.4087      | 0.6504          | 0.6136             | -0.7890        | 0.1440          | -0.9330          |
+| 0.605         | 0.6   | 3500 | -3.0590       | -3.0538         | -154.1790    | -172.9109      | 0.6497          | 0.6122             | -0.8277        | 0.1503          | -0.9780          |
+| 0.6263        | 0.62  | 3600 | -3.0721       | -3.0672         | -149.9757    | -168.0735      | 0.6508          | 0.6043             | -0.7857        | 0.1440          | -0.9297          |
+| 0.5961        | 0.64  | 3700 | -3.0151       | -3.0090         | -169.4567    | -189.3689      | 0.6492          | 0.6136             | -0.9805        | 0.1622          | -1.1426          |
+| 0.6273        | 0.65  | 3800 | -3.0117       | -3.0057         | -167.9805    | -187.6573      | 0.6494          | 0.6141             | -0.9657        | 0.1598          | -1.1255          |
+| 0.6183        | 0.67  | 3900 | -3.0137       | -3.0077         | -167.4417    | -187.2734      | 0.6488          | 0.6166             | -0.9603        | 0.1613          | -1.1217          |
+| 0.6051        | 0.69  | 4000 | -2.9974       | -2.9908         | -176.3739    | -197.1255      | 0.6482          | 0.6178             | -1.0496        | 0.1705          | -1.2202          |
+| 0.5867        | 0.71  | 4100 | -3.0151       | -3.0088         | -169.1084    | -189.3998      | 0.6484          | 0.6125             | -0.9770        | 0.1659          | -1.1429          |
+| 0.6554        | 0.72  | 4200 | -3.0270       | -3.0209         | -164.2755    | -184.0126      | 0.6489          | 0.6176             | -0.9287        | 0.1604          | -1.0891          |
+| 0.6053        | 0.74  | 4300 | -3.0362       | -3.0303         | -159.9774    | -179.4446      | 0.6489          | 0.6097             | -0.8857        | 0.1577          | -1.0434          |
+| 0.6153        | 0.76  | 4400 | -3.0351       | -3.0292         | -160.5470    | -180.1235      | 0.6489          | 0.6120             | -0.8914        | 0.1588          | -1.0502          |
+| 0.6145        | 0.78  | 4500 | -3.0378       | -3.0319         | -160.1720    | -179.6728      | 0.6490          | 0.6113             | -0.8876        | 0.1580          | -1.0457          |
+| 0.5798        | 0.79  | 4600 | -3.0308       | -3.0247         | -162.6813    | -182.4701      | 0.6488          | 0.6148             | -0.9127        | 0.1609          | -1.0736          |
+| 0.6218        | 0.81  | 4700 | -3.0307       | -3.0246         | -163.0493    | -182.9482      | 0.6486          | 0.6152             | -0.9164        | 0.1620          | -1.0784          |
+| 0.6102        | 0.83  | 4800 | -3.0259       | -3.0197         | -164.8939    | -184.9769      | 0.6484          | 0.6150             | -0.9348        | 0.1639          | -1.0987          |
+| 0.6176        | 0.84  | 4900 | -3.0273       | -3.0211         | -165.7554    | -185.9428      | 0.6483          | 0.6157             | -0.9435        | 0.1649          | -1.1084          |
+| 0.5907        | 0.86  | 5000 | -3.0259       | -3.0196         | -167.1301    | -187.4627      | 0.6482          | 0.6164             | -0.9572        | 0.1664          | -1.1236          |
+| 0.6534        | 0.88  | 5100 | -3.0211       | -3.0148         | -167.2241    | -187.5712      | 0.6481          | 0.6155             | -0.9581        | 0.1665          | -1.1246          |
+| 0.5973        | 0.9   | 5200 | -3.0194       | -3.0130         | -166.8823    | -187.1679      | 0.6483          | 0.6169             | -0.9547        | 0.1659          | -1.1206          |
+| 0.5975        | 0.91  | 5300 | -3.0248       | -3.0185         | -166.6118    | -186.8759      | 0.6482          | 0.6162             | -0.9520        | 0.1657          | -1.1177          |
+| 0.5986        | 0.93  | 5400 | -3.0249       | -3.0186         | -166.6502    | -186.8928      | 0.6483          | 0.6190             | -0.9524        | 0.1655          | -1.1179          |
+| 0.6025        | 0.95  | 5500 | -3.0252       | -3.0189         | -166.7467    | -186.9980      | 0.6483          | 0.6169             | -0.9534        | 0.1655          | -1.1189          |
+| 0.6149        | 0.96  | 5600 | -3.0244       | -3.0181         | -166.7859    | -187.1137      | 0.6480          | 0.6155             | -0.9538        | 0.1663          | -1.1201          |
+| 0.6275        | 0.98  | 5700 | -3.0245       | -3.0182         | -166.6791    | -186.9484      | 0.6482          | 0.6178             | -0.9527        | 0.1657          | -1.1184          |
+| 0.5876        | 1.0   | 5800 | -3.0239       | -3.0176         | -166.7881    | -187.0472      | 0.6482          | 0.6171             | -0.9538        | 0.1656          | -1.1194          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -19,13 +19,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "up_proj",
-    "down_proj",
-    "v_proj",
     "q_proj",
-    "gate_proj",
     "o_proj",
-    "k_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "q_proj",
+    "k_proj",
+    "up_proj",
     "o_proj",
+    "gate_proj",
+    "down_proj",
+    "v_proj"
   ],
   "task_type": "CAUSAL_LM"
 }

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:c7320767e81a3f833497535872d812a619bb1fe953fc55f66fd53cbd7f462f9f
 size 201892728

 version https://git-lfs.github.com/spec/v1
+oid sha256:89f07b867162674e17e76b10c0a3c2c0d12a2c5ae3970245cf2b32f61cbad484
 size 201892728

all_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 1.0,
-    "train_loss": 0.27507112530743316,
-    "train_runtime": 24370.3932,
     "train_samples": 92858,
-    "train_samples_per_second": 3.81,
-    "train_steps_per_second": 0.238
 }

 {
     "epoch": 1.0,
+    "train_loss": 0.0003277428618961422,
+    "train_runtime": 17.7068,
     "train_samples": 92858,
+    "train_samples_per_second": 5244.214,
+    "train_steps_per_second": 327.728
 }

config.json ADDED Viewed

	@@ -0,0 +1,43 @@

+{
+  "_name_or_path": "TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T",
+  "architectures": [
+    "LlamaForCausalLM"
+  ],
+  "attention_bias": false,
+  "attention_dropout": 0.0,
+  "bos_token_id": 1,
+  "eos_token_id": 2,
+  "hidden_act": "silu",
+  "hidden_size": 2048,
+  "initializer_range": 0.02,
+  "intermediate_size": 5632,
+  "max_position_embeddings": 2048,
+  "model_type": "llama",
+  "num_attention_heads": 32,
+  "num_hidden_layers": 22,
+  "num_key_value_heads": 4,
+  "pretraining_tp": 1,
+  "quantization_config": {
+    "_load_in_4bit": true,
+    "_load_in_8bit": false,
+    "bnb_4bit_compute_dtype": "bfloat16",
+    "bnb_4bit_quant_storage": "uint8",
+    "bnb_4bit_quant_type": "nf4",
+    "bnb_4bit_use_double_quant": false,
+    "llm_int8_enable_fp32_cpu_offload": false,
+    "llm_int8_has_fp16_weight": false,
+    "llm_int8_skip_modules": null,
+    "llm_int8_threshold": 6.0,
+    "load_in_4bit": true,
+    "load_in_8bit": false,
+    "quant_method": "bitsandbytes"
+  },
+  "rms_norm_eps": 1e-05,
+  "rope_scaling": null,
+  "rope_theta": 10000.0,
+  "tie_word_embeddings": false,
+  "torch_dtype": "bfloat16",
+  "transformers_version": "4.39.3",
+  "use_cache": true,
+  "vocab_size": 32000
+}

runs/May10_09-48-15_poseidon/events.out.tfevents.1715359378.poseidon.2727122.1 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8117149c39a95dd6b660eb33fb1db45870474c361678692b942d7f8a07b75709
+size 828

runs/May10_17-24-35_poseidon/events.out.tfevents.1715361893.poseidon.2825808.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:334dae2d66def40d74583388cdcee54df57056687fc329ebc9be15efc5c33e8f
+size 5729

train_results.json CHANGED Viewed

@@ -1,8 +1,8 @@
 {
     "epoch": 1.0,
-    "train_loss": 0.27507112530743316,
-    "train_runtime": 24370.3932,
     "train_samples": 92858,
-    "train_samples_per_second": 3.81,
-    "train_steps_per_second": 0.238
 }

 {
     "epoch": 1.0,
+    "train_loss": 0.0003277428618961422,
+    "train_runtime": 17.7068,
     "train_samples": 92858,
+    "train_samples_per_second": 5244.214,
+    "train_steps_per_second": 327.728
 }

trainer_state.json CHANGED Viewed

@@ -9655,10 +9655,10 @@
       "epoch": 1.0,
       "step": 5803,
       "total_flos": 0.0,
-      "train_loss": 0.27507112530743316,
-      "train_runtime": 24370.3932,
-      "train_samples_per_second": 3.81,
-      "train_steps_per_second": 0.238
     }
   ],
   "logging_steps": 10,

       "epoch": 1.0,
       "step": 5803,
       "total_flos": 0.0,
+      "train_loss": 0.0003277428618961422,
+      "train_runtime": 17.7068,
+      "train_samples_per_second": 5244.214,
+      "train_steps_per_second": 327.728
     }
   ],
   "logging_steps": 10,

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4e94b06a057977aeba4819f0c835f5a2c5c155d7cddc162ec1114873d1f3f45d
 size 5112

 version https://git-lfs.github.com/spec/v1
+oid sha256:0f6ffbe0f0ac24f76d3c2accb18e55595b98097608ade3729535f8d635c67be2
 size 5112