martimfasantos commited on
Commit
cb914c9
1 Parent(s): 758b92a

Model save

Browse files
README.md CHANGED
@@ -2,13 +2,13 @@
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
5
- - alignment-handbook
6
  - trl
7
  - sft
 
8
  - generated_from_trainer
9
  base_model: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
10
  datasets:
11
- - HuggingFaceH4/ultrachat_200k
12
  model-index:
13
  - name: tinyllama-1.1b-sft-qlora
14
  results: []
@@ -19,7 +19,7 @@ should probably proofread and complete it, then remove this comment. -->
19
 
20
  # tinyllama-1.1b-sft-qlora
21
 
22
- This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T](https://huggingface.co/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T) on the HuggingFaceH4/ultrachat_200k dataset.
23
  It achieves the following results on the evaluation set:
24
  - Loss: 1.1695
25
 
 
2
  license: apache-2.0
3
  library_name: peft
4
  tags:
 
5
  - trl
6
  - sft
7
+ - alignment-handbook
8
  - generated_from_trainer
9
  base_model: TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T
10
  datasets:
11
+ - generator
12
  model-index:
13
  - name: tinyllama-1.1b-sft-qlora
14
  results: []
 
19
 
20
  # tinyllama-1.1b-sft-qlora
21
 
22
+ This model is a fine-tuned version of [TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T](https://huggingface.co/TinyLlama/TinyLlama-1.1B-intermediate-step-1431k-3T) on the generator dataset.
23
  It achieves the following results on the evaluation set:
24
  - Loss: 1.1695
25
 
adapter_config.json CHANGED
@@ -19,13 +19,13 @@
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
- "o_proj",
23
- "down_proj",
24
- "up_proj",
25
- "q_proj",
26
  "k_proj",
 
 
27
  "gate_proj",
28
- "v_proj"
 
 
29
  ],
30
  "task_type": "CAUSAL_LM"
31
  }
 
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
 
 
 
 
22
  "k_proj",
23
+ "down_proj",
24
+ "o_proj",
25
  "gate_proj",
26
+ "v_proj",
27
+ "q_proj",
28
+ "up_proj"
29
  ],
30
  "task_type": "CAUSAL_LM"
31
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c1cae67b7ab076ae66708e6a3b87058c0276d68ebda06229cf4c40e46d6bfc99
3
  size 25272360
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ca974ae690d430e06acc45fa193b798d99f3cb3a6b25d15eab4da40c076a5b59
3
  size 25272360
all_results.json CHANGED
@@ -5,9 +5,9 @@
5
  "eval_samples": 23109,
6
  "eval_samples_per_second": 8.415,
7
  "eval_steps_per_second": 1.052,
8
- "train_loss": 1.1836543822331609,
9
- "train_runtime": 65654.1144,
10
  "train_samples": 207864,
11
- "train_samples_per_second": 2.225,
12
- "train_steps_per_second": 0.278
13
  }
 
5
  "eval_samples": 23109,
6
  "eval_samples_per_second": 8.415,
7
  "eval_steps_per_second": 1.052,
8
+ "train_loss": 0.0036345586762629653,
9
+ "train_runtime": 2327.0298,
10
  "train_samples": 207864,
11
+ "train_samples_per_second": 62.765,
12
+ "train_steps_per_second": 7.846
13
  }
runs/Apr23_17-34-48_poseidon/events.out.tfevents.1713893717.poseidon.710489.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a35260fe95b73e90a918cc5dfb0c039239e0c878a6eb673c564e43c7b0c8d7c7
3
+ size 8269
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 1.1836543822331609,
4
- "train_runtime": 65654.1144,
5
  "train_samples": 207864,
6
- "train_samples_per_second": 2.225,
7
- "train_steps_per_second": 0.278
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.0036345586762629653,
4
+ "train_runtime": 2327.0298,
5
  "train_samples": 207864,
6
+ "train_samples_per_second": 62.765,
7
+ "train_steps_per_second": 7.846
8
  }
trainer_state.json CHANGED
@@ -25574,20 +25574,20 @@
25574
  },
25575
  {
25576
  "epoch": 1.0,
25577
- "eval_loss": 1.1695406436920166,
25578
- "eval_runtime": 1922.8575,
25579
- "eval_samples_per_second": 8.407,
25580
- "eval_steps_per_second": 1.051,
25581
  "step": 18257
25582
  },
25583
  {
25584
  "epoch": 1.0,
25585
  "step": 18257,
25586
  "total_flos": 1.8793056999854572e+18,
25587
- "train_loss": 1.1836543822331609,
25588
- "train_runtime": 65654.1144,
25589
- "train_samples_per_second": 2.225,
25590
- "train_steps_per_second": 0.278
25591
  }
25592
  ],
25593
  "logging_steps": 5,
 
25574
  },
25575
  {
25576
  "epoch": 1.0,
25577
+ "eval_loss": 1.1695410013198853,
25578
+ "eval_runtime": 2107.9295,
25579
+ "eval_samples_per_second": 7.669,
25580
+ "eval_steps_per_second": 0.959,
25581
  "step": 18257
25582
  },
25583
  {
25584
  "epoch": 1.0,
25585
  "step": 18257,
25586
  "total_flos": 1.8793056999854572e+18,
25587
+ "train_loss": 0.0036345586762629653,
25588
+ "train_runtime": 2327.0298,
25589
+ "train_samples_per_second": 62.765,
25590
+ "train_steps_per_second": 7.846
25591
  }
25592
  ],
25593
  "logging_steps": 5,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a5fe3345ba6d55380440ee7b96cab2646f53dfd93da76be843e8a6bd8907a32
3
  size 5048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:db31d5c3f77b1a494a8e65a7c73b64070762e72703debc87ca2ad95a5066139c
3
  size 5048