jikaixuan commited on
Commit
64a18a9
1 Parent(s): 2915f4b

Training in progress, epoch 0

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. README.md +72 -0
  2. adapter_config.json +29 -0
  3. adapter_model.safetensors +3 -0
  4. all_results.json +21 -0
  5. eval_results.json +16 -0
  6. runs/Dec10_14-37-29_uclaml03.cs.ucla.edu/events.out.tfevents.1702247918.uclaml03.cs.ucla.edu.2921374.0 +3 -0
  7. runs/Dec10_14-39-55_uclaml03.cs.ucla.edu/events.out.tfevents.1702248055.uclaml03.cs.ucla.edu.2921932.0 +3 -0
  8. runs/Dec10_14-39-55_uclaml03.cs.ucla.edu/events.out.tfevents.1702333147.uclaml03.cs.ucla.edu.2921932.1 +3 -0
  9. runs/Dec14_23-45-22_uclaml03.cs.ucla.edu/events.out.tfevents.1702626380.uclaml03.cs.ucla.edu.3135904.0 +3 -0
  10. runs/Dec18_14-20-56_uclaml03.cs.ucla.edu/events.out.tfevents.1702938137.uclaml03.cs.ucla.edu.3492719.0 +3 -0
  11. runs/Dec18_14-23-54_uclaml03.cs.ucla.edu/events.out.tfevents.1702938314.uclaml03.cs.ucla.edu.3493625.0 +3 -0
  12. runs/Dec18_14-23-54_uclaml03.cs.ucla.edu/events.out.tfevents.1702983633.uclaml03.cs.ucla.edu.3493625.1 +3 -0
  13. runs/Dec27_16-07-35_uclaml03.cs.ucla.edu/events.out.tfevents.1703722124.uclaml03.cs.ucla.edu.3174194.0 +3 -0
  14. runs/Dec27_16-11-50_uclaml03.cs.ucla.edu/events.out.tfevents.1703722377.uclaml03.cs.ucla.edu.3175985.0 +3 -0
  15. runs/Dec27_16-17-46_uclaml03.cs.ucla.edu/events.out.tfevents.1703722733.uclaml03.cs.ucla.edu.3177393.0 +3 -0
  16. runs/Dec27_16-19-23_uclaml03.cs.ucla.edu/events.out.tfevents.1703722831.uclaml03.cs.ucla.edu.3177938.0 +3 -0
  17. runs/Dec27_16-21-26_uclaml03.cs.ucla.edu/events.out.tfevents.1703722953.uclaml03.cs.ucla.edu.3178944.0 +3 -0
  18. runs/Dec27_16-23-37_uclaml03.cs.ucla.edu/events.out.tfevents.1703723084.uclaml03.cs.ucla.edu.3179750.0 +3 -0
  19. runs/Dec27_16-25-25_uclaml03.cs.ucla.edu/events.out.tfevents.1703723191.uclaml03.cs.ucla.edu.3180830.0 +3 -0
  20. runs/Dec27_16-29-59_uclaml03.cs.ucla.edu/events.out.tfevents.1703723466.uclaml03.cs.ucla.edu.3181763.0 +3 -0
  21. runs/Dec27_16-31-29_uclaml03.cs.ucla.edu/events.out.tfevents.1703723555.uclaml03.cs.ucla.edu.3182591.0 +3 -0
  22. runs/Dec27_16-42-53_uclaml03.cs.ucla.edu/events.out.tfevents.1703724239.uclaml03.cs.ucla.edu.3185784.0 +3 -0
  23. runs/Dec27_16-44-14_uclaml03.cs.ucla.edu/events.out.tfevents.1703724321.uclaml03.cs.ucla.edu.3186218.0 +3 -0
  24. runs/Dec27_16-46-43_uclaml03.cs.ucla.edu/events.out.tfevents.1703724469.uclaml03.cs.ucla.edu.3186857.0 +3 -0
  25. runs/Dec27_16-49-26_uclaml03.cs.ucla.edu/events.out.tfevents.1703724633.uclaml03.cs.ucla.edu.3187547.0 +3 -0
  26. runs/Dec27_16-52-52_uclaml03.cs.ucla.edu/events.out.tfevents.1703724839.uclaml03.cs.ucla.edu.3188673.0 +3 -0
  27. runs/Dec27_16-56-10_uclaml03.cs.ucla.edu/events.out.tfevents.1703725036.uclaml03.cs.ucla.edu.3190153.0 +3 -0
  28. runs/Dec27_16-57-41_uclaml03.cs.ucla.edu/events.out.tfevents.1703725128.uclaml03.cs.ucla.edu.3190615.0 +3 -0
  29. runs/Dec27_16-59-04_uclaml03.cs.ucla.edu/events.out.tfevents.1703725211.uclaml03.cs.ucla.edu.3191267.0 +3 -0
  30. runs/Jan08_10-01-41_uclaml03.cs.ucla.edu/events.out.tfevents.1704737160.uclaml03.cs.ucla.edu.167716.0 +3 -0
  31. runs/Jan08_10-07-49_uclaml03.cs.ucla.edu/events.out.tfevents.1704737538.uclaml03.cs.ucla.edu.172338.0 +3 -0
  32. runs/Jan08_10-13-37_uclaml03.cs.ucla.edu/events.out.tfevents.1704737876.uclaml03.cs.ucla.edu.176850.0 +3 -0
  33. runs/Jan08_10-22-17_uclaml03.cs.ucla.edu/events.out.tfevents.1704738394.uclaml03.cs.ucla.edu.183405.0 +3 -0
  34. runs/Jan08_10-38-45_uclaml03.cs.ucla.edu/events.out.tfevents.1704739393.uclaml03.cs.ucla.edu.195380.0 +3 -0
  35. runs/Jan08_10-48-52_uclaml03.cs.ucla.edu/events.out.tfevents.1704739980.uclaml03.cs.ucla.edu.202650.0 +3 -0
  36. runs/Jan08_11-05-42_uclaml03.cs.ucla.edu/events.out.tfevents.1704741001.uclaml03.cs.ucla.edu.214429.0 +3 -0
  37. runs/Jan08_11-17-30_uclaml03.cs.ucla.edu/events.out.tfevents.1704741711.uclaml03.cs.ucla.edu.221053.0 +3 -0
  38. runs/Jan08_11-23-24_uclaml03.cs.ucla.edu/events.out.tfevents.1704742061.uclaml03.cs.ucla.edu.224875.0 +3 -0
  39. runs/Jan08_11-33-19_uclaml03.cs.ucla.edu/events.out.tfevents.1704742661.uclaml03.cs.ucla.edu.232549.0 +3 -0
  40. runs/Jan08_11-57-56_uclaml03.cs.ucla.edu/events.out.tfevents.1704744134.uclaml03.cs.ucla.edu.248224.0 +3 -0
  41. runs/Jan08_12-09-28_uclaml03.cs.ucla.edu/events.out.tfevents.1704744839.uclaml03.cs.ucla.edu.257058.0 +3 -0
  42. runs/Jan08_14-44-24_uclaml03.cs.ucla.edu/events.out.tfevents.1704754146.uclaml03.cs.ucla.edu.359920.0 +3 -0
  43. runs/Jan08_14-56-33_uclaml03.cs.ucla.edu/events.out.tfevents.1704754874.uclaml03.cs.ucla.edu.369450.0 +3 -0
  44. runs/Jan08_15-04-11_uclaml03.cs.ucla.edu/events.out.tfevents.1704755327.uclaml03.cs.ucla.edu.374659.0 +3 -0
  45. runs/Jan08_20-19-53_uclaml03.cs.ucla.edu/events.out.tfevents.1704774266.uclaml03.cs.ucla.edu.547993.0 +3 -0
  46. special_tokens_map.json +30 -0
  47. tokenizer.json +0 -0
  48. tokenizer_config.json +41 -0
  49. train_results.json +8 -0
  50. trainer_state.json +0 -0
README.md ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: mistralai/Mistral-7B-v0.1
4
+ tags:
5
+ - generated_from_trainer
6
+ model-index:
7
+ - name: zephyr-7b-dpo-lora
8
+ results: []
9
+ ---
10
+
11
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
12
+ should probably proofread and complete it, then remove this comment. -->
13
+
14
+ # zephyr-7b-dpo-lora
15
+
16
+ This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
17
+ It achieves the following results on the evaluation set:
18
+ - Loss: -7.5416
19
+ - Rewards/chosen: -29.9194
20
+ - Rewards/rejected: -39.8539
21
+ - Rewards/accuracies: 0.6151
22
+ - Rewards/margins: 9.9345
23
+ - Logps/rejected: -633.4722
24
+ - Logps/chosen: -588.9970
25
+ - Logits/rejected: -1.0751
26
+ - Logits/chosen: -1.2630
27
+
28
+ ## Model description
29
+
30
+ More information needed
31
+
32
+ ## Intended uses & limitations
33
+
34
+ More information needed
35
+
36
+ ## Training and evaluation data
37
+
38
+ More information needed
39
+
40
+ ## Training procedure
41
+
42
+ ### Training hyperparameters
43
+
44
+ The following hyperparameters were used during training:
45
+ - learning_rate: 5e-07
46
+ - train_batch_size: 8
47
+ - eval_batch_size: 4
48
+ - seed: 42
49
+ - distributed_type: multi-GPU
50
+ - num_devices: 8
51
+ - total_train_batch_size: 64
52
+ - total_eval_batch_size: 32
53
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
54
+ - lr_scheduler_type: linear
55
+ - lr_scheduler_warmup_ratio: 0.1
56
+ - num_epochs: 3
57
+
58
+ ### Training results
59
+
60
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
61
+ |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
62
+ | -0.2007 | 1.0 | 969 | -0.0988 | -1.1416 | -2.4993 | 0.6746 | 1.3577 | -259.9262 | -301.2188 | -1.9876 | -2.0976 |
63
+ | -2.3739 | 2.0 | 1938 | -3.0140 | -12.9185 | -17.8885 | 0.6587 | 4.9699 | -413.8172 | -418.9880 | -1.4397 | -1.5909 |
64
+ | -5.7169 | 3.0 | 2907 | -7.5416 | -29.9194 | -39.8539 | 0.6151 | 9.9345 | -633.4722 | -588.9970 | -1.0751 | -1.2630 |
65
+
66
+
67
+ ### Framework versions
68
+
69
+ - Transformers 4.35.0
70
+ - Pytorch 2.1.1+cu121
71
+ - Datasets 2.14.6
72
+ - Tokenizers 0.14.1
adapter_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "mistralai/Mistral-7B-v0.1",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layers_pattern": null,
10
+ "layers_to_transform": null,
11
+ "loftq_config": {},
12
+ "lora_alpha": 16,
13
+ "lora_dropout": 0.1,
14
+ "megatron_config": null,
15
+ "megatron_core": "megatron.core",
16
+ "modules_to_save": null,
17
+ "peft_type": "LORA",
18
+ "r": 64,
19
+ "rank_pattern": {},
20
+ "revision": null,
21
+ "target_modules": [
22
+ "v_proj",
23
+ "q_proj",
24
+ "o_proj",
25
+ "k_proj"
26
+ ],
27
+ "task_type": "CAUSAL_LM",
28
+ "use_rslora": false
29
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:484e9e51136cff273bd9ad46ad1273ae20206d452301cd969616b8aad4fa0c5f
3
+ size 109086672
all_results.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_logits/chosen": -1.2629982233047485,
4
+ "eval_logits/rejected": -1.075066089630127,
5
+ "eval_logps/chosen": -588.9970092773438,
6
+ "eval_logps/rejected": -633.47216796875,
7
+ "eval_loss": -7.541553497314453,
8
+ "eval_rewards/accuracies": 0.6150793433189392,
9
+ "eval_rewards/chosen": -29.919435501098633,
10
+ "eval_rewards/margins": 9.934508323669434,
11
+ "eval_rewards/rejected": -39.853946685791016,
12
+ "eval_runtime": 237.8142,
13
+ "eval_samples": 2000,
14
+ "eval_samples_per_second": 8.41,
15
+ "eval_steps_per_second": 0.265,
16
+ "train_loss": -1.932115889119454,
17
+ "train_runtime": 45081.596,
18
+ "train_samples": 61966,
19
+ "train_samples_per_second": 4.124,
20
+ "train_steps_per_second": 0.064
21
+ }
eval_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "eval_logits/chosen": -1.2629982233047485,
4
+ "eval_logits/rejected": -1.075066089630127,
5
+ "eval_logps/chosen": -588.9970092773438,
6
+ "eval_logps/rejected": -633.47216796875,
7
+ "eval_loss": -7.541553497314453,
8
+ "eval_rewards/accuracies": 0.6150793433189392,
9
+ "eval_rewards/chosen": -29.919435501098633,
10
+ "eval_rewards/margins": 9.934508323669434,
11
+ "eval_rewards/rejected": -39.853946685791016,
12
+ "eval_runtime": 237.8142,
13
+ "eval_samples": 2000,
14
+ "eval_samples_per_second": 8.41,
15
+ "eval_steps_per_second": 0.265
16
+ }
runs/Dec10_14-37-29_uclaml03.cs.ucla.edu/events.out.tfevents.1702247918.uclaml03.cs.ucla.edu.2921374.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41585b4e11ba0dc4b3e47d390e1ec52f90172a1ac8f6a70d41755858c7babff5
3
+ size 4388
runs/Dec10_14-39-55_uclaml03.cs.ucla.edu/events.out.tfevents.1702248055.uclaml03.cs.ucla.edu.2921932.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:000770492d54e052ca0b8c407f647cd92f19b058d673fe28c2fe3d78a28dea5d
3
+ size 191313
runs/Dec10_14-39-55_uclaml03.cs.ucla.edu/events.out.tfevents.1702333147.uclaml03.cs.ucla.edu.2921932.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98c72e0be945c8e8579e85873da199ac30c8349d4cc8c0f46bc59098496e7072
3
+ size 828
runs/Dec14_23-45-22_uclaml03.cs.ucla.edu/events.out.tfevents.1702626380.uclaml03.cs.ucla.edu.3135904.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82cb9fbc53b5517939daf91cfd50b212856fbbf7477d19911c51a77f03abf408
3
+ size 5011
runs/Dec18_14-20-56_uclaml03.cs.ucla.edu/events.out.tfevents.1702938137.uclaml03.cs.ucla.edu.3492719.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d6af6cf64672c1afc612a69a766a9b0e1dc56476be227dec3492f74cc1f73291
3
+ size 4376
runs/Dec18_14-23-54_uclaml03.cs.ucla.edu/events.out.tfevents.1702938314.uclaml03.cs.ucla.edu.3493625.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df240bf96c57f7d7a763ebb96c567369142632359ae46f4af24a25cc3e5dc608
3
+ size 191301
runs/Dec18_14-23-54_uclaml03.cs.ucla.edu/events.out.tfevents.1702983633.uclaml03.cs.ucla.edu.3493625.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:83f2cce1773d4c2f22f6ac2ce294d2d0bddb389bc33e87c011e5b9f3d2f166d3
3
+ size 828
runs/Dec27_16-07-35_uclaml03.cs.ucla.edu/events.out.tfevents.1703722124.uclaml03.cs.ucla.edu.3174194.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a6ee5c0bf15818bebaae947c09a796e022e603970bb5f04042ac9263e65e7f86
3
+ size 4999
runs/Dec27_16-11-50_uclaml03.cs.ucla.edu/events.out.tfevents.1703722377.uclaml03.cs.ucla.edu.3175985.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2109da46bb5c71d0d145b1416a60ecfed31f03993d2a2c60b0a9e8ea15b1ec71
3
+ size 6868
runs/Dec27_16-17-46_uclaml03.cs.ucla.edu/events.out.tfevents.1703722733.uclaml03.cs.ucla.edu.3177393.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a473af8503718f4a8f37ec033bf64de7bb6bdfbeab615cd43c748540f616fb0
3
+ size 4376
runs/Dec27_16-19-23_uclaml03.cs.ucla.edu/events.out.tfevents.1703722831.uclaml03.cs.ucla.edu.3177938.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:309f98ae3ec754efbcc69676f71145b6d52ea5f2f6376820bec162852892f2a1
3
+ size 4376
runs/Dec27_16-21-26_uclaml03.cs.ucla.edu/events.out.tfevents.1703722953.uclaml03.cs.ucla.edu.3178944.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4064968501ec1a8657c745bd3677a3be0d48898320917f2a31a6613ea8328b07
3
+ size 4376
runs/Dec27_16-23-37_uclaml03.cs.ucla.edu/events.out.tfevents.1703723084.uclaml03.cs.ucla.edu.3179750.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92e0ef6eebc1e51ac642051ade46540932abfb700e339207732e5bf3219702ee
3
+ size 4376
runs/Dec27_16-25-25_uclaml03.cs.ucla.edu/events.out.tfevents.1703723191.uclaml03.cs.ucla.edu.3180830.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31745a80bc8c3c038ae226923a7e5572508b06fb37ea4350e904ce40a8101f23
3
+ size 4376
runs/Dec27_16-29-59_uclaml03.cs.ucla.edu/events.out.tfevents.1703723466.uclaml03.cs.ucla.edu.3181763.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:954f909aee45691e88874df10b3675e6f5225b32b664d7aad9974156a5fae8c7
3
+ size 4376
runs/Dec27_16-31-29_uclaml03.cs.ucla.edu/events.out.tfevents.1703723555.uclaml03.cs.ucla.edu.3182591.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:630a92816f6ffd9ce390a7906bdb52e1e74b8fefe26efff9488572cf1c35073a
3
+ size 4376
runs/Dec27_16-42-53_uclaml03.cs.ucla.edu/events.out.tfevents.1703724239.uclaml03.cs.ucla.edu.3185784.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:144b9a925146e6b3442b4f514d654a297ee699ded4069a4604a48fcebf43f484
3
+ size 4376
runs/Dec27_16-44-14_uclaml03.cs.ucla.edu/events.out.tfevents.1703724321.uclaml03.cs.ucla.edu.3186218.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7db5bd7a6bd3b7cfd2dbe3d91a061aaf50db6b95d2ef5e9e7e80332bc501060f
3
+ size 4376
runs/Dec27_16-46-43_uclaml03.cs.ucla.edu/events.out.tfevents.1703724469.uclaml03.cs.ucla.edu.3186857.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ccecdda9939dbf246addbf46f9ad678f79af845ccea759f5bd3619ffdd6c31c1
3
+ size 4376
runs/Dec27_16-49-26_uclaml03.cs.ucla.edu/events.out.tfevents.1703724633.uclaml03.cs.ucla.edu.3187547.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7358d8caf850756b80c221503369c8fea7367cc428ed675d8075a961e692522a
3
+ size 4376
runs/Dec27_16-52-52_uclaml03.cs.ucla.edu/events.out.tfevents.1703724839.uclaml03.cs.ucla.edu.3188673.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5ed198d75fdb87b33d4168773e3c57c132ac07ce8fbc0e3c539eab7391e9840
3
+ size 4376
runs/Dec27_16-56-10_uclaml03.cs.ucla.edu/events.out.tfevents.1703725036.uclaml03.cs.ucla.edu.3190153.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6da5de5724902af017a98200bae5fbf1f197fb11ee4147808b43d090ada5a79
3
+ size 4376
runs/Dec27_16-57-41_uclaml03.cs.ucla.edu/events.out.tfevents.1703725128.uclaml03.cs.ucla.edu.3190615.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7ddd7f6e476c9abb6c617e98fea8dadd109705e75598945377d8456f54175f25
3
+ size 4376
runs/Dec27_16-59-04_uclaml03.cs.ucla.edu/events.out.tfevents.1703725211.uclaml03.cs.ucla.edu.3191267.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8ac1071fd9e8f5366ba929357b7abb7a1d2ff45eaea796e0b79ecfb7eb351ab
3
+ size 4376
runs/Jan08_10-01-41_uclaml03.cs.ucla.edu/events.out.tfevents.1704737160.uclaml03.cs.ucla.edu.167716.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:39e5afe4cbadfd1bc8e5112b8dd4f0ef08c4bc1426726b99e6a8066fb38a1678
3
+ size 6868
runs/Jan08_10-07-49_uclaml03.cs.ucla.edu/events.out.tfevents.1704737538.uclaml03.cs.ucla.edu.172338.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d391416ff8da6d7f334e7ccdce404da3298cd2bc7fbeca816c4f65f372db38fe
3
+ size 4376
runs/Jan08_10-13-37_uclaml03.cs.ucla.edu/events.out.tfevents.1704737876.uclaml03.cs.ucla.edu.176850.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c24cc519a3b7faf5cb40200d3be3172f3a605ecd10a9ad2ddefa7a2b1556662
3
+ size 4376
runs/Jan08_10-22-17_uclaml03.cs.ucla.edu/events.out.tfevents.1704738394.uclaml03.cs.ucla.edu.183405.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a955d5509e8cf859e8be5175de85b595dbc2f7136afdc09a27c4e528dd45b416
3
+ size 4376
runs/Jan08_10-38-45_uclaml03.cs.ucla.edu/events.out.tfevents.1704739393.uclaml03.cs.ucla.edu.195380.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2130ab95a31194b38e5fb57da5f0539b90f7044e4affc173298a22d6358a6dfb
3
+ size 4376
runs/Jan08_10-48-52_uclaml03.cs.ucla.edu/events.out.tfevents.1704739980.uclaml03.cs.ucla.edu.202650.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3668ba642314005a2482dc7c08a91407ecb01986e984550408df2a5a18b8369c
3
+ size 4376
runs/Jan08_11-05-42_uclaml03.cs.ucla.edu/events.out.tfevents.1704741001.uclaml03.cs.ucla.edu.214429.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5d988f21cbe3bdfbae0458b2de14a920d9e3335278a9859a87c00697feddb84
3
+ size 4999
runs/Jan08_11-17-30_uclaml03.cs.ucla.edu/events.out.tfevents.1704741711.uclaml03.cs.ucla.edu.221053.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f948263b7528bb995faa8695f6563b39741b370a065805fdcf0e34d86ffbbbb6
3
+ size 4376
runs/Jan08_11-23-24_uclaml03.cs.ucla.edu/events.out.tfevents.1704742061.uclaml03.cs.ucla.edu.224875.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1eefe1e3f607424efd180b6b617dd3bb0cc67e1fae45ad19b70d4e3ba249ca0
3
+ size 8114
runs/Jan08_11-33-19_uclaml03.cs.ucla.edu/events.out.tfevents.1704742661.uclaml03.cs.ucla.edu.232549.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:16940e27431fb359b781cfdf5cd3dcb42d3d8fb19ba50eebd6deff365d25dbec
3
+ size 8114
runs/Jan08_11-57-56_uclaml03.cs.ucla.edu/events.out.tfevents.1704744134.uclaml03.cs.ucla.edu.248224.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:120ea8c270e9eeb646f34220cfa4827e79657dcb487122475f3987b9d5c68d3a
3
+ size 10606
runs/Jan08_12-09-28_uclaml03.cs.ucla.edu/events.out.tfevents.1704744839.uclaml03.cs.ucla.edu.257058.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4ae538e6524fc7a5d95e8c7d29f8c519bdd83f1484b092890ae782efe199ea9
3
+ size 61293
runs/Jan08_14-44-24_uclaml03.cs.ucla.edu/events.out.tfevents.1704754146.uclaml03.cs.ucla.edu.359920.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:92cc96a7a4c25cc02f1239623123b430d3efb48175fd8c6e55cc7017ed190b7a
3
+ size 7494
runs/Jan08_14-56-33_uclaml03.cs.ucla.edu/events.out.tfevents.1704754874.uclaml03.cs.ucla.edu.369450.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cdf6fa1c8d75c84e1483a0b6ceddb0bf38bc073472c8908e9353b73e3131ee3
3
+ size 5002
runs/Jan08_15-04-11_uclaml03.cs.ucla.edu/events.out.tfevents.1704755327.uclaml03.cs.ucla.edu.374659.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e7325d7df2bd03aca79240ee2c29385179ca5d1ab4bd5af0f7cbc358214c9606
3
+ size 30864
runs/Jan08_20-19-53_uclaml03.cs.ucla.edu/events.out.tfevents.1704774266.uclaml03.cs.ucla.edu.547993.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:71e499c22aa46fca014c95f5e525d0345fce47b456a8b244f89c83c919c2929f
3
+ size 65840
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,41 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<unk>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "additional_special_tokens": [],
29
+ "bos_token": "<s>",
30
+ "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
31
+ "clean_up_tokenization_spaces": false,
32
+ "eos_token": "</s>",
33
+ "legacy": true,
34
+ "model_max_length": 2048,
35
+ "pad_token": "</s>",
36
+ "sp_model_kwargs": {},
37
+ "spaces_between_special_tokens": false,
38
+ "tokenizer_class": "LlamaTokenizer",
39
+ "unk_token": "<unk>",
40
+ "use_default_system_prompt": false
41
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 3.0,
3
+ "train_loss": -1.932115889119454,
4
+ "train_runtime": 45081.596,
5
+ "train_samples": 61966,
6
+ "train_samples_per_second": 4.124,
7
+ "train_steps_per_second": 0.064
8
+ }
trainer_state.json ADDED
The diff for this file is too large to render. See raw diff