Jackie999 commited on
Commit
4be6417
1 Parent(s): 8c019af

Model save

Browse files
README.md CHANGED
@@ -2,13 +2,11 @@
2
  license: llama2
3
  library_name: peft
4
  tags:
5
- - alignment-handbook
6
  - trl
7
  - sft
 
8
  - generated_from_trainer
9
  base_model: meta-llama/Llama-2-7b-hf
10
- datasets:
11
- - HuggingFaceH4/ultrachat_200k
12
  model-index:
13
  - name: llama-poison-20p-2048
14
  results: []
@@ -19,9 +17,9 @@ should probably proofread and complete it, then remove this comment. -->
19
 
20
  # llama-poison-20p-2048
21
 
22
- This model is a fine-tuned version of [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) on the HuggingFaceH4/ultrachat_200k dataset.
23
  It achieves the following results on the evaluation set:
24
- - Loss: 0.9680
25
 
26
  ## Model description
27
 
@@ -40,7 +38,7 @@ More information needed
40
  ### Training hyperparameters
41
 
42
  The following hyperparameters were used during training:
43
- - learning_rate: 0.0002
44
  - train_batch_size: 8
45
  - eval_batch_size: 4
46
  - seed: 42
@@ -58,7 +56,7 @@ The following hyperparameters were used during training:
58
 
59
  | Training Loss | Epoch | Step | Validation Loss |
60
  |:-------------:|:-----:|:----:|:---------------:|
61
- | 0.7583 | 1.0 | 337 | 0.9680 |
62
 
63
 
64
  ### Framework versions
 
2
  license: llama2
3
  library_name: peft
4
  tags:
 
5
  - trl
6
  - sft
7
+ - alignment-handbook
8
  - generated_from_trainer
9
  base_model: meta-llama/Llama-2-7b-hf
 
 
10
  model-index:
11
  - name: llama-poison-20p-2048
12
  results: []
 
17
 
18
  # llama-poison-20p-2048
19
 
20
+ This model is a fine-tuned version of [meta-llama/Llama-2-7b-hf](https://huggingface.co/meta-llama/Llama-2-7b-hf) on the None dataset.
21
  It achieves the following results on the evaluation set:
22
+ - Loss: 0.9679
23
 
24
  ## Model description
25
 
 
38
  ### Training hyperparameters
39
 
40
  The following hyperparameters were used during training:
41
+ - learning_rate: 2e-05
42
  - train_batch_size: 8
43
  - eval_batch_size: 4
44
  - seed: 42
 
56
 
57
  | Training Loss | Epoch | Step | Validation Loss |
58
  |:-------------:|:-----:|:----:|:---------------:|
59
+ | 0.7585 | 1.0 | 337 | 0.9679 |
60
 
61
 
62
  ### Framework versions
adapter_config.json CHANGED
@@ -19,13 +19,13 @@
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
- "gate_proj",
23
- "down_proj",
24
- "q_proj",
25
- "k_proj",
26
  "o_proj",
27
  "v_proj",
28
- "up_proj"
 
 
 
 
29
  ],
30
  "task_type": "CAUSAL_LM"
31
  }
 
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
 
 
 
 
22
  "o_proj",
23
  "v_proj",
24
+ "down_proj",
25
+ "k_proj",
26
+ "up_proj",
27
+ "q_proj",
28
+ "gate_proj"
29
  ],
30
  "task_type": "CAUSAL_LM"
31
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e5955703104bbb62fb1c5f48367576b7c3bb28f1290b72d025b77c825ad6ec6c
3
  size 30042168
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e3cd6be758c102ce8921bb05421786059aa853d8dbbe85a7f9172293cf0a26c
3
  size 30042168
all_results.json CHANGED
@@ -5,9 +5,9 @@
5
  "eval_samples": 2310,
6
  "eval_samples_per_second": 13.254,
7
  "eval_steps_per_second": 0.832,
8
- "train_loss": 0.8055351222658016,
9
- "train_runtime": 5374.6429,
10
  "train_samples": 21594,
11
- "train_samples_per_second": 4.018,
12
- "train_steps_per_second": 0.063
13
  }
 
5
  "eval_samples": 2310,
6
  "eval_samples_per_second": 13.254,
7
  "eval_steps_per_second": 0.832,
8
+ "train_loss": 0.08478880317699308,
9
+ "train_runtime": 740.7528,
10
  "train_samples": 21594,
11
+ "train_samples_per_second": 29.151,
12
+ "train_steps_per_second": 0.455
13
  }
runs/Apr29_08-30-26_COE-CS-sv003/events.out.tfevents.1714379685.COE-CS-sv003.535882.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43ff91f672562bb04d44d5f14ae3f55df1aba357fcd3a4d878e0a37f4b1531dd
3
+ size 6826
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 1.0,
3
- "train_loss": 0.8055351222658016,
4
- "train_runtime": 5374.6429,
5
  "train_samples": 21594,
6
- "train_samples_per_second": 4.018,
7
- "train_steps_per_second": 0.063
8
  }
 
1
  {
2
  "epoch": 1.0,
3
+ "train_loss": 0.08478880317699308,
4
+ "train_runtime": 740.7528,
5
  "train_samples": 21594,
6
+ "train_samples_per_second": 29.151,
7
+ "train_steps_per_second": 0.455
8
  }
trainer_state.json CHANGED
@@ -437,69 +437,69 @@
437
  },
438
  {
439
  "epoch": 0.9,
440
- "grad_norm": 0.0788686487009447,
441
  "learning_rate": 5.453769828241872e-06,
442
  "loss": 0.7709,
443
  "step": 305
444
  },
445
  {
446
  "epoch": 0.92,
447
- "grad_norm": 0.08306771725577697,
448
  "learning_rate": 3.892905960127546e-06,
449
- "loss": 0.7701,
450
  "step": 310
451
  },
452
  {
453
  "epoch": 0.93,
454
- "grad_norm": 0.08424091491331255,
455
  "learning_rate": 2.590275647868867e-06,
456
  "loss": 0.7622,
457
  "step": 315
458
  },
459
  {
460
  "epoch": 0.95,
461
- "grad_norm": 0.07734608283213279,
462
  "learning_rate": 1.5493789750014031e-06,
463
- "loss": 0.7951,
464
  "step": 320
465
  },
466
  {
467
  "epoch": 0.96,
468
- "grad_norm": 0.08240316118938772,
469
  "learning_rate": 7.730127636723539e-07,
470
  "loss": 0.7722,
471
  "step": 325
472
  },
473
  {
474
  "epoch": 0.98,
475
- "grad_norm": 0.07789751726513824,
476
  "learning_rate": 2.6326305976001055e-07,
477
- "loss": 0.7767,
478
  "step": 330
479
  },
480
  {
481
  "epoch": 0.99,
482
- "grad_norm": 0.08712008799295351,
483
  "learning_rate": 2.1499527803214846e-08,
484
- "loss": 0.7583,
485
  "step": 335
486
  },
487
  {
488
  "epoch": 1.0,
489
- "eval_loss": 0.9679508805274963,
490
- "eval_runtime": 175.249,
491
- "eval_samples_per_second": 13.181,
492
  "eval_steps_per_second": 0.827,
493
  "step": 337
494
  },
495
  {
496
  "epoch": 1.0,
497
  "step": 337,
498
- "total_flos": 3858671222128640.0,
499
- "train_loss": 0.8055351222658016,
500
- "train_runtime": 5374.6429,
501
- "train_samples_per_second": 4.018,
502
- "train_steps_per_second": 0.063
503
  }
504
  ],
505
  "logging_steps": 5,
@@ -507,7 +507,7 @@
507
  "num_input_tokens_seen": 0,
508
  "num_train_epochs": 1,
509
  "save_steps": 100,
510
- "total_flos": 3858671222128640.0,
511
  "train_batch_size": 8,
512
  "trial_name": null,
513
  "trial_params": null
 
437
  },
438
  {
439
  "epoch": 0.9,
440
+ "grad_norm": 0.07919765304647886,
441
  "learning_rate": 5.453769828241872e-06,
442
  "loss": 0.7709,
443
  "step": 305
444
  },
445
  {
446
  "epoch": 0.92,
447
+ "grad_norm": 0.08336346515163112,
448
  "learning_rate": 3.892905960127546e-06,
449
+ "loss": 0.7703,
450
  "step": 310
451
  },
452
  {
453
  "epoch": 0.93,
454
+ "grad_norm": 0.08196958012246881,
455
  "learning_rate": 2.590275647868867e-06,
456
  "loss": 0.7622,
457
  "step": 315
458
  },
459
  {
460
  "epoch": 0.95,
461
+ "grad_norm": 0.07692088564904832,
462
  "learning_rate": 1.5493789750014031e-06,
463
+ "loss": 0.7952,
464
  "step": 320
465
  },
466
  {
467
  "epoch": 0.96,
468
+ "grad_norm": 0.08235291945346827,
469
  "learning_rate": 7.730127636723539e-07,
470
  "loss": 0.7722,
471
  "step": 325
472
  },
473
  {
474
  "epoch": 0.98,
475
+ "grad_norm": 0.07764082388173033,
476
  "learning_rate": 2.6326305976001055e-07,
477
+ "loss": 0.7769,
478
  "step": 330
479
  },
480
  {
481
  "epoch": 0.99,
482
+ "grad_norm": 0.08728825013568611,
483
  "learning_rate": 2.1499527803214846e-08,
484
+ "loss": 0.7585,
485
  "step": 335
486
  },
487
  {
488
  "epoch": 1.0,
489
+ "eval_loss": 0.9679195284843445,
490
+ "eval_runtime": 175.419,
491
+ "eval_samples_per_second": 13.168,
492
  "eval_steps_per_second": 0.827,
493
  "step": 337
494
  },
495
  {
496
  "epoch": 1.0,
497
  "step": 337,
498
+ "total_flos": 3852908399427584.0,
499
+ "train_loss": 0.08478880317699308,
500
+ "train_runtime": 740.7528,
501
+ "train_samples_per_second": 29.151,
502
+ "train_steps_per_second": 0.455
503
  }
504
  ],
505
  "logging_steps": 5,
 
507
  "num_input_tokens_seen": 0,
508
  "num_train_epochs": 1,
509
  "save_steps": 100,
510
+ "total_flos": 3852908399427584.0,
511
  "train_batch_size": 8,
512
  "trial_name": null,
513
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1734477e3fc7afe855c5ea5ab6130ed40396c2d29d45831c917f5aa5b46c22b
3
  size 6072
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:354055bb7610547321ea4aa2feca36aba440dadbe934371ae197ca88cb2b4d27
3
  size 6072