chansung commited on
Commit
a9f71be
1 Parent(s): 5943ce5

Model save

Browse files
README.md CHANGED
@@ -2,13 +2,12 @@
2
  license: gemma
3
  library_name: peft
4
  tags:
5
- - alignment-handbook
6
  - trl
7
  - sft
8
  - generated_from_trainer
9
  base_model: google/gemma-2b
10
  datasets:
11
- - llama-duo/synth_summarize_dataset_dedup
12
  model-index:
13
  - name: gemma2b-summarize-claude3sonnet-256k
14
  results: []
@@ -19,9 +18,9 @@ should probably proofread and complete it, then remove this comment. -->
19
 
20
  # gemma2b-summarize-claude3sonnet-256k
21
 
22
- This model is a fine-tuned version of [google/gemma-2b](https://huggingface.co/google/gemma-2b) on the llama-duo/synth_summarize_dataset_dedup dataset.
23
  It achieves the following results on the evaluation set:
24
- - Loss: 2.5206
25
 
26
  ## Model description
27
 
@@ -52,33 +51,28 @@ The following hyperparameters were used during training:
52
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
53
  - lr_scheduler_type: cosine
54
  - lr_scheduler_warmup_ratio: 0.1
55
- - num_epochs: 15
56
 
57
  ### Training results
58
 
59
- | Training Loss | Epoch | Step | Validation Loss |
60
- |:-------------:|:-------:|:-----:|:---------------:|
61
- | 1.0494 | 0.9994 | 808 | 2.4662 |
62
- | 0.9661 | 2.0 | 1617 | 2.4507 |
63
- | 0.9526 | 2.9994 | 2425 | 2.4659 |
64
- | 0.925 | 4.0 | 3234 | 2.4750 |
65
- | 0.9099 | 4.9994 | 4042 | 2.4839 |
66
- | 0.8939 | 6.0 | 4851 | 2.4936 |
67
- | 0.8795 | 6.9994 | 5659 | 2.5032 |
68
- | 0.8694 | 8.0 | 6468 | 2.5068 |
69
- | 0.8608 | 8.9994 | 7276 | 2.5143 |
70
- | 0.8421 | 10.0 | 8085 | 2.5187 |
71
- | 0.8555 | 10.9994 | 8893 | 2.5196 |
72
- | 0.8442 | 12.0 | 9702 | 2.5193 |
73
- | 0.8621 | 12.9994 | 10510 | 2.5206 |
74
- | 0.8586 | 14.0 | 11319 | 2.5210 |
75
- | 0.8485 | 14.9907 | 12120 | 2.5206 |
76
 
77
 
78
  ### Framework versions
79
 
80
- - PEFT 0.10.0
81
- - Transformers 4.40.0
82
  - Pytorch 2.2.2+cu121
83
  - Datasets 2.19.1
84
  - Tokenizers 0.19.1
 
2
  license: gemma
3
  library_name: peft
4
  tags:
 
5
  - trl
6
  - sft
7
  - generated_from_trainer
8
  base_model: google/gemma-2b
9
  datasets:
10
+ - generator
11
  model-index:
12
  - name: gemma2b-summarize-claude3sonnet-256k
13
  results: []
 
18
 
19
  # gemma2b-summarize-claude3sonnet-256k
20
 
21
+ This model is a fine-tuned version of [google/gemma-2b](https://huggingface.co/google/gemma-2b) on the generator dataset.
22
  It achieves the following results on the evaluation set:
23
+ - Loss: 2.6999
24
 
25
  ## Model description
26
 
 
51
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
52
  - lr_scheduler_type: cosine
53
  - lr_scheduler_warmup_ratio: 0.1
54
+ - num_epochs: 10
55
 
56
  ### Training results
57
 
58
+ | Training Loss | Epoch | Step | Validation Loss |
59
+ |:-------------:|:------:|:----:|:---------------:|
60
+ | 0.9714 | 0.9994 | 808 | 2.4535 |
61
+ | 0.8916 | 2.0 | 1617 | 2.4785 |
62
+ | 0.8752 | 2.9994 | 2425 | 2.5144 |
63
+ | 0.8424 | 4.0 | 3234 | 2.5590 |
64
+ | 0.8173 | 4.9994 | 4042 | 2.6021 |
65
+ | 0.7949 | 6.0 | 4851 | 2.6446 |
66
+ | 0.7732 | 6.9994 | 5659 | 2.6786 |
67
+ | 0.7605 | 8.0 | 6468 | 2.6913 |
68
+ | 0.7532 | 8.9994 | 7276 | 2.6995 |
69
+ | 0.7647 | 9.9938 | 8080 | 2.6999 |
 
 
 
 
 
70
 
71
 
72
  ### Framework versions
73
 
74
+ - PEFT 0.11.1
75
+ - Transformers 4.41.2
76
  - Pytorch 2.2.2+cu121
77
  - Datasets 2.19.1
78
  - Tokenizers 0.19.1
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8fc5430b7af774f9f8ce68b1c6eebb6a2f90a9cd006639e80ea6efcf9c74a54f
3
  size 78480320
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6d19e8a4ec3a9b8fc8a6fc31185821eeaeba382f93040030044c7d84cc7e893
3
  size 78480320
all_results.json CHANGED
@@ -1,14 +1,9 @@
1
  {
2
- "epoch": 14.990723562152134,
3
- "eval_loss": 2.520606517791748,
4
- "eval_runtime": 0.4921,
5
- "eval_samples": 25,
6
- "eval_samples_per_second": 20.319,
7
- "eval_steps_per_second": 2.032,
8
- "total_flos": 7.118964864348848e+18,
9
- "train_loss": 0.9255497357436139,
10
- "train_runtime": 41659.6506,
11
  "train_samples": 253979,
12
- "train_samples_per_second": 13.97,
13
- "train_steps_per_second": 0.291
14
  }
 
1
  {
2
+ "epoch": 9.993815708101423,
3
+ "total_flos": 4.816075145514844e+18,
4
+ "train_loss": 0.8524611343841741,
5
+ "train_runtime": 49320.3929,
 
 
 
 
 
6
  "train_samples": 253979,
7
+ "train_samples_per_second": 7.867,
8
+ "train_steps_per_second": 0.164
9
  }
runs/Jun11_02-05-02_user-HP-Z8-Fury-G5-Workstation-Desktop-PC/events.out.tfevents.1718039128.user-HP-Z8-Fury-G5-Workstation-Desktop-PC.18115.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5126b2837e8f2524f60a16fe57fde665b9c75b9db6b7f80e909dd4f0543d7493
3
- size 345779
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c8df0c7cdc17ce75d1c13d14812fb59adb45b98441e72ab263b09dc4d6e061e
3
+ size 349780
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 14.990723562152134,
3
- "total_flos": 7.118964864348848e+18,
4
- "train_loss": 0.9255497357436139,
5
- "train_runtime": 41659.6506,
6
  "train_samples": 253979,
7
- "train_samples_per_second": 13.97,
8
- "train_steps_per_second": 0.291
9
  }
 
1
  {
2
+ "epoch": 9.993815708101423,
3
+ "total_flos": 4.816075145514844e+18,
4
+ "train_loss": 0.8524611343841741,
5
+ "train_runtime": 49320.3929,
6
  "train_samples": 253979,
7
+ "train_samples_per_second": 7.867,
8
+ "train_steps_per_second": 0.164
9
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff