silviasapora commited on
Commit
b1de54a
1 Parent(s): 489cc2c

Model save

Browse files
README.md CHANGED
@@ -3,12 +3,10 @@ library_name: transformers
3
  license: gemma
4
  base_model: google/gemma-7b
5
  tags:
6
- - alignment-handbook
7
  - trl
8
  - orpo
 
9
  - generated_from_trainer
10
- datasets:
11
- - argilla/dpo-mix-7k
12
  model-index:
13
  - name: gemma-7b-orpo
14
  results: []
@@ -19,20 +17,20 @@ should probably proofread and complete it, then remove this comment. -->
19
 
20
  # gemma-7b-orpo
21
 
22
- This model is a fine-tuned version of [google/gemma-7b](https://huggingface.co/google/gemma-7b) on the argilla/dpo-mix-7k dataset.
23
  It achieves the following results on the evaluation set:
24
- - Loss: 1.4556
25
- - Rewards/chosen: -0.0513
26
- - Rewards/rejected: -0.0589
27
- - Rewards/accuracies: 0.5108
28
- - Rewards/margins: 0.0076
29
- - Logps/rejected: -1.1787
30
- - Logps/chosen: -1.0268
31
- - Logits/rejected: 312.9670
32
- - Logits/chosen: 340.5321
33
- - Nll Loss: 1.4096
34
- - Log Odds Ratio: -0.6928
35
- - Log Odds Chosen: 0.2398
36
 
37
  ## Model description
38
 
@@ -60,15 +58,17 @@ The following hyperparameters were used during training:
60
  - total_train_batch_size: 4
61
  - total_eval_batch_size: 4
62
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
63
- - lr_scheduler_type: cosine
64
  - lr_scheduler_warmup_steps: 100
65
- - num_epochs: 1
66
 
67
  ### Training results
68
 
69
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Nll Loss | Log Odds Ratio | Log Odds Chosen |
70
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:--------:|:--------------:|:---------------:|
71
- | 1.3423 | 1.0 | 1259 | 1.4556 | -0.0513 | -0.0589 | 0.5108 | 0.0076 | -1.1787 | -1.0268 | 312.9670 | 340.5321 | 1.4096 | -0.6928 | 0.2398 |
 
 
72
 
73
 
74
  ### Framework versions
 
3
  license: gemma
4
  base_model: google/gemma-7b
5
  tags:
 
6
  - trl
7
  - orpo
8
+ - alignment-handbook
9
  - generated_from_trainer
 
 
10
  model-index:
11
  - name: gemma-7b-orpo
12
  results: []
 
17
 
18
  # gemma-7b-orpo
19
 
20
+ This model is a fine-tuned version of [google/gemma-7b](https://huggingface.co/google/gemma-7b) on an unknown dataset.
21
  It achieves the following results on the evaluation set:
22
+ - Loss: 1.7559
23
+ - Rewards/chosen: -0.0650
24
+ - Rewards/rejected: -0.0764
25
+ - Rewards/accuracies: 0.5971
26
+ - Rewards/margins: 0.0114
27
+ - Logps/rejected: -1.5282
28
+ - Logps/chosen: -1.3004
29
+ - Logits/rejected: 266.0260
30
+ - Logits/chosen: 295.6202
31
+ - Nll Loss: 1.6941
32
+ - Log Odds Ratio: -0.6992
33
+ - Log Odds Chosen: 0.3721
34
 
35
  ## Model description
36
 
 
58
  - total_train_batch_size: 4
59
  - total_eval_batch_size: 4
60
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
61
+ - lr_scheduler_type: inverse_sqrt
62
  - lr_scheduler_warmup_steps: 100
63
+ - num_epochs: 3
64
 
65
  ### Training results
66
 
67
  | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen | Nll Loss | Log Odds Ratio | Log Odds Chosen |
68
  |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|:--------:|:--------------:|:---------------:|
69
+ | 1.3309 | 1.0 | 1259 | 1.4432 | -0.0513 | -0.0583 | 0.5468 | 0.0071 | -1.1666 | -1.0254 | 310.9833 | 338.2715 | 1.3964 | -0.7034 | 0.2119 |
70
+ | 0.647 | 2.0 | 2518 | 1.4816 | -0.0529 | -0.0637 | 0.5899 | 0.0108 | -1.2742 | -1.0583 | 296.0398 | 324.3109 | 1.4304 | -0.6778 | 0.3416 |
71
+ | 0.348 | 3.0 | 3777 | 1.7559 | -0.0650 | -0.0764 | 0.5971 | 0.0114 | -1.5282 | -1.3004 | 266.0260 | 295.6202 | 1.6941 | -0.6992 | 0.3721 |
72
 
73
 
74
  ### Framework versions
all_results.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "epoch": 1.0,
3
  "eval_log_odds_chosen": 0.23976314067840576,
4
  "eval_log_odds_ratio": -0.6928443908691406,
5
  "eval_logits/chosen": 340.5321350097656,
@@ -17,9 +17,9 @@
17
  "eval_samples_per_second": 6.122,
18
  "eval_steps_per_second": 1.539,
19
  "total_flos": 0.0,
20
- "train_loss": 1.8019611810861456,
21
- "train_runtime": 4470.8327,
22
  "train_samples": 5034,
23
- "train_samples_per_second": 1.126,
24
- "train_steps_per_second": 0.282
25
  }
 
1
  {
2
+ "epoch": 3.0,
3
  "eval_log_odds_chosen": 0.23976314067840576,
4
  "eval_log_odds_ratio": -0.6928443908691406,
5
  "eval_logits/chosen": 340.5321350097656,
 
17
  "eval_samples_per_second": 6.122,
18
  "eval_steps_per_second": 1.539,
19
  "total_flos": 0.0,
20
+ "train_loss": 0.968865410152301,
21
+ "train_runtime": 16784.8411,
22
  "train_samples": 5034,
23
+ "train_samples_per_second": 0.9,
24
+ "train_steps_per_second": 0.225
25
  }
config.json CHANGED
@@ -24,6 +24,6 @@
24
  "rope_theta": 10000.0,
25
  "torch_dtype": "float32",
26
  "transformers_version": "4.44.2",
27
- "use_cache": true,
28
  "vocab_size": 256000
29
  }
 
24
  "rope_theta": 10000.0,
25
  "torch_dtype": "float32",
26
  "transformers_version": "4.44.2",
27
+ "use_cache": false,
28
  "vocab_size": 256000
29
  }
model-00001-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd4a83dd65f561dc2599e83a75a96e4671b22a8d44a91d13804b5817f11e479e
3
  size 4913707856
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6815176c0edeb1b2d9103c46cff62f3299bca9d17badb357f0717e1d64f4f850
3
  size 4913707856
model-00002-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68d35ef7b8ba18b485054edd5028a69b186f2e87090f1c967e18e2cb388e8fc2
3
  size 4932629336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9e3e009f3c4d62c1637fb0e64040c282af20425a531cc4ac57428221cd7c55a3
3
  size 4932629336
model-00003-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66e9243a62c14bc7daa3053a11e94aba695043eff823657007d0b8e2e324c277
3
  size 4731277496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b27b0695258c0c3d410aada3325a78ed501fbaad1baa55cc52cbb0bee6c384ee
3
  size 4731277496
model-00004-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ef3c0cc61768d3909d22bff6f43e2220b3106289dcedfd695ab49a6ecb25bf23
3
  size 4731277512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:372e11ace41fa94f9fd26e9a223de210716209246f21ce0b2acd9e83abd8f914
3
  size 4731277512
model-00005-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:52de71d4dc579e43e815865e9f9465a60c65007415afd9126472335b75ad9de9
3
  size 4932629384
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc17a3914b28c68cc1c5f8233edc906f780aab9a6aff51898157176c68831f70
3
  size 4932629384
model-00006-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33dca2a2d5480217ee4b7fe6d397046dc659209810e220c7748253954c463620
3
  size 4731277512
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc709fd64a3528b12ac0bdb36f3bd88880b8422254d1f3363ee9251efd11bb03
3
  size 4731277512
model-00007-of-00007.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5fc66395bcb69e88597c76b0b9e196185cacd6d03c2c416dc385ba07ab290111
3
  size 2818648664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4bddb1c97e71e9159aec2c5e5c5e861ebcca52b43f5233c13dff2a0ce736b1c
3
  size 2818648664
runs/Sep14_21-14-45_65ecb96dba42/events.out.tfevents.1726348544.65ecb96dba42.1985.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c968e761e1c166f0ef0f35ed4cfa4a021ee28d353f6e5adbf72a6dc721370c91
3
- size 656102
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8425184108ea6a9b4d16da1f627e3a7d614630ee878bdf6cb9866adb525bca72
3
+ size 657365
train_results.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "epoch": 1.0,
3
  "total_flos": 0.0,
4
- "train_loss": 1.8019611810861456,
5
- "train_runtime": 4470.8327,
6
  "train_samples": 5034,
7
- "train_samples_per_second": 1.126,
8
- "train_steps_per_second": 0.282
9
  }
 
1
  {
2
+ "epoch": 3.0,
3
  "total_flos": 0.0,
4
+ "train_loss": 0.968865410152301,
5
+ "train_runtime": 16784.8411,
6
  "train_samples": 5034,
7
+ "train_samples_per_second": 0.9,
8
+ "train_steps_per_second": 0.225
9
  }
trainer_state.json CHANGED
The diff for this file is too large to render. See raw diff