terry69 commited on
Commit
66e07e7
1 Parent(s): 00a0755

Model save

Browse files
README.md CHANGED
@@ -4,6 +4,7 @@ library_name: peft
4
  tags:
5
  - trl
6
  - sft
 
7
  - generated_from_trainer
8
  base_model: mistralai/Mistral-7B-v0.1
9
  model-index:
@@ -18,7 +19,7 @@ should probably proofread and complete it, then remove this comment. -->
18
 
19
  This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
20
  It achieves the following results on the evaluation set:
21
- - Loss: 0.7776
22
 
23
  ## Model description
24
 
@@ -55,7 +56,7 @@ The following hyperparameters were used during training:
55
 
56
  | Training Loss | Epoch | Step | Validation Loss |
57
  |:-------------:|:-----:|:----:|:---------------:|
58
- | 0.749 | 1.0 | 325 | 0.7776 |
59
 
60
 
61
  ### Framework versions
 
4
  tags:
5
  - trl
6
  - sft
7
+ - alignment-handbook
8
  - generated_from_trainer
9
  base_model: mistralai/Mistral-7B-v0.1
10
  model-index:
 
19
 
20
  This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
21
  It achieves the following results on the evaluation set:
22
+ - Loss: 0.7774
23
 
24
  ## Model description
25
 
 
56
 
57
  | Training Loss | Epoch | Step | Validation Loss |
58
  |:-------------:|:-----:|:----:|:---------------:|
59
+ | 0.749 | 1.0 | 325 | 0.7774 |
60
 
61
 
62
  ### Framework versions
adapter_config.json CHANGED
@@ -19,12 +19,12 @@
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
- "k_proj",
23
- "down_proj",
24
  "q_proj",
 
25
  "gate_proj",
 
26
  "up_proj",
27
- "v_proj",
28
  "o_proj"
29
  ],
30
  "task_type": "CAUSAL_LM"
 
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
+ "v_proj",
 
23
  "q_proj",
24
+ "down_proj",
25
  "gate_proj",
26
+ "k_proj",
27
  "up_proj",
 
28
  "o_proj"
29
  ],
30
  "task_type": "CAUSAL_LM"
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b7db789cc50c06995a37c003083c8410dd62bf3d2f9e61b0d057fddd2e5238aa
3
  size 83946192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e25896f5c5bd693bc056a5478634a80a652b7a627e41d61f0fb6ddbe6ea8d5e8
3
  size 83946192
all_results.json CHANGED
@@ -1,8 +1,13 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.7600007471671472,
4
- "train_runtime": 11976.0052,
 
 
 
 
 
5
  "train_samples": 20787,
6
- "train_samples_per_second": 1.736,
7
- "train_steps_per_second": 0.027
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "eval_loss": 0.7775599956512451,
4
+ "eval_runtime": 45.2826,
5
+ "eval_samples": 231,
6
+ "eval_samples_per_second": 5.101,
7
+ "eval_steps_per_second": 0.177,
8
+ "train_loss": 0.057297961895282454,
9
+ "train_runtime": 986.3633,
10
  "train_samples": 20787,
11
+ "train_samples_per_second": 21.074,
12
+ "train_steps_per_second": 0.329
13
  }
config.json CHANGED
@@ -19,8 +19,8 @@
19
  "rope_theta": 10000.0,
20
  "sliding_window": 4096,
21
  "tie_word_embeddings": false,
22
- "torch_dtype": "float16",
23
- "transformers_version": "4.36.2",
24
  "use_cache": true,
25
  "vocab_size": 32000
26
  }
 
19
  "rope_theta": 10000.0,
20
  "sliding_window": 4096,
21
  "tie_word_embeddings": false,
22
+ "torch_dtype": "bfloat16",
23
+ "transformers_version": "4.39.0.dev0",
24
  "use_cache": true,
25
  "vocab_size": 32000
26
  }
eval_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_loss": 0.7775599956512451,
4
+ "eval_runtime": 45.2826,
5
+ "eval_samples": 231,
6
+ "eval_samples_per_second": 5.101,
7
+ "eval_steps_per_second": 0.177
8
+ }
runs/Apr06_23-53-51_ip-172-31-69-60.ec2.internal/events.out.tfevents.1712459701.ip-172-31-69-60.ec2.internal.1668.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f70b74e21ca02e43639d6fdd2f63de91e481f8c7729fd76792f9def724dc2385
3
+ size 359
runs/Apr08_19-23-47_ip-172-31-69-60.ec2.internal/events.out.tfevents.1712604248.ip-172-31-69-60.ec2.internal.28905.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e6e5181dc4dd56ef1d95a18f901ac37d0c0f96d4599087f5f049bf5d9fe606b6
3
+ size 4901
runs/Apr08_19-30-44_ip-172-31-69-60.ec2.internal/events.out.tfevents.1712604666.ip-172-31-69-60.ec2.internal.33303.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4386d00a901143c90b7f393241104ca4428500bec204f973fb0786fe5fd2531e
3
+ size 4690
runs/Apr08_19-34-16_ip-172-31-69-60.ec2.internal/events.out.tfevents.1712604876.ip-172-31-69-60.ec2.internal.35728.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bee963b349f352f71e132225c50b3680f9f4f9275ca9dbb88c3d3f5e890e55a
3
+ size 4690
runs/Apr08_19-43-21_ip-172-31-69-60.ec2.internal/events.out.tfevents.1712605421.ip-172-31-69-60.ec2.internal.41103.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:008168257733c7f4b63045c9e1b63ff3f52f5dfbba937465ffa4a701281c7b54
3
+ size 4690
runs/Apr08_19-45-00_ip-172-31-69-60.ec2.internal/events.out.tfevents.1712605520.ip-172-31-69-60.ec2.internal.42468.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef8b45122d8e3a51f2b9e9c698b4ece3e027fc0ccce4dcf852282ccf5f709a18
3
+ size 6370
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.7600007471671472,
4
- "train_runtime": 11976.0052,
5
  "train_samples": 20787,
6
- "train_samples_per_second": 1.736,
7
- "train_steps_per_second": 0.027
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.057297961895282454,
4
+ "train_runtime": 986.3633,
5
  "train_samples": 20787,
6
+ "train_samples_per_second": 21.074,
7
+ "train_steps_per_second": 0.329
8
  }
trainer_state.json CHANGED
@@ -437,55 +437,55 @@
437
  },
438
  {
439
  "epoch": 0.94,
440
- "grad_norm": 0.17095198936108816,
441
  "learning_rate": 2.30615072228183e-06,
442
- "loss": 0.7332,
443
  "step": 305
444
  },
445
  {
446
  "epoch": 0.95,
447
- "grad_norm": 0.17577071939294034,
448
  "learning_rate": 1.2994027370611173e-06,
449
- "loss": 0.7624,
450
  "step": 310
451
  },
452
  {
453
  "epoch": 0.97,
454
- "grad_norm": 0.15277306413297514,
455
  "learning_rate": 5.782093106048159e-07,
456
- "loss": 0.7367,
457
  "step": 315
458
  },
459
  {
460
  "epoch": 0.98,
461
- "grad_norm": 0.17579866149327247,
462
  "learning_rate": 1.446569558255395e-07,
463
  "loss": 0.7431,
464
  "step": 320
465
  },
466
  {
467
  "epoch": 1.0,
468
- "grad_norm": 0.16982851480438013,
469
  "learning_rate": 0.0,
470
  "loss": 0.749,
471
  "step": 325
472
  },
473
  {
474
  "epoch": 1.0,
475
- "eval_loss": 0.7775599956512451,
476
- "eval_runtime": 47.5812,
477
- "eval_samples_per_second": 4.855,
478
- "eval_steps_per_second": 0.168,
479
  "step": 325
480
  },
481
  {
482
  "epoch": 1.0,
483
  "step": 325,
484
- "total_flos": 4962652747988992.0,
485
- "train_loss": 0.7600007471671472,
486
- "train_runtime": 11976.0052,
487
- "train_samples_per_second": 1.736,
488
- "train_steps_per_second": 0.027
489
  }
490
  ],
491
  "logging_steps": 5,
@@ -493,7 +493,7 @@
493
  "num_input_tokens_seen": 0,
494
  "num_train_epochs": 1,
495
  "save_steps": 100,
496
- "total_flos": 4962652747988992.0,
497
  "train_batch_size": 8,
498
  "trial_name": null,
499
  "trial_params": null
 
437
  },
438
  {
439
  "epoch": 0.94,
440
+ "grad_norm": 0.17093253787814383,
441
  "learning_rate": 2.30615072228183e-06,
442
+ "loss": 0.733,
443
  "step": 305
444
  },
445
  {
446
  "epoch": 0.95,
447
+ "grad_norm": 0.16234114073804565,
448
  "learning_rate": 1.2994027370611173e-06,
449
+ "loss": 0.7626,
450
  "step": 310
451
  },
452
  {
453
  "epoch": 0.97,
454
+ "grad_norm": 0.15162800717592476,
455
  "learning_rate": 5.782093106048159e-07,
456
+ "loss": 0.7366,
457
  "step": 315
458
  },
459
  {
460
  "epoch": 0.98,
461
+ "grad_norm": 0.17565044386960604,
462
  "learning_rate": 1.446569558255395e-07,
463
  "loss": 0.7431,
464
  "step": 320
465
  },
466
  {
467
  "epoch": 1.0,
468
+ "grad_norm": 0.1698173841843245,
469
  "learning_rate": 0.0,
470
  "loss": 0.749,
471
  "step": 325
472
  },
473
  {
474
  "epoch": 1.0,
475
+ "eval_loss": 0.7774477005004883,
476
+ "eval_runtime": 47.2759,
477
+ "eval_samples_per_second": 4.886,
478
+ "eval_steps_per_second": 0.169,
479
  "step": 325
480
  },
481
  {
482
  "epoch": 1.0,
483
  "step": 325,
484
+ "total_flos": 4954791347224576.0,
485
+ "train_loss": 0.057297961895282454,
486
+ "train_runtime": 986.3633,
487
+ "train_samples_per_second": 21.074,
488
+ "train_steps_per_second": 0.329
489
  }
490
  ],
491
  "logging_steps": 5,
 
493
  "num_input_tokens_seen": 0,
494
  "num_train_epochs": 1,
495
  "save_steps": 100,
496
+ "total_flos": 4954791347224576.0,
497
  "train_batch_size": 8,
498
  "trial_name": null,
499
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a0c549b6479b99a68384506151aa6b716d0995170317e34c0d7a337b3be7f24b
3
  size 6072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fa7194cfe3338f80c07cc46268e8cc67ed5c2fbad2b4816eb1f296be0fe00d1f
3
  size 6072