chiayisu commited on
Commit
b0802e8
1 Parent(s): 91dd397

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. codet5+/q90cce/checkpoint-16161/model.safetensors +3 -0
  2. codet5+/q90cce/checkpoint-16161/optimizer.pt +3 -0
  3. codet5+/q90cce/checkpoint-16161/rng_state.pth +3 -0
  4. codet5+/q90cce/checkpoint-16161/scheduler.pt +3 -0
  5. codet5+/q90cce/checkpoint-16161/training_args.bin +3 -0
  6. codet5+/q90cce/final_checkpoint/model.safetensors +3 -0
  7. codet5+/q90useseq/checkpoint-16161/model.safetensors +3 -0
  8. codet5+/q90useseq/checkpoint-16161/optimizer.pt +3 -0
  9. codet5+/q90useseq/checkpoint-16161/rng_state.pth +3 -0
  10. codet5+/q90useseq/checkpoint-16161/scheduler.pt +3 -0
  11. codet5+/q90useseq/checkpoint-16161/training_args.bin +3 -0
  12. codet5+/q90useseq/events.out.tfevents.1712092321.nublar.444200.0 +3 -0
  13. codet5+/q90useseq/final_checkpoint/model.safetensors +3 -0
  14. gpt2/cgpt/out-cgpt-e18-cce/ckpt.pt +3 -0
  15. gpt2/cgpt/out-cgpt-e18-use-seq-0.6/ckpt.pt +3 -0
  16. gpt2/cgpt/out-cgpt-e18-use-seq-0.8/ckpt.pt +3 -0
  17. gpt2/cgpt/out-cgpt-e18-use-seq-1.0/ckpt.pt +3 -0
  18. gpt2/cgpt/out-cgpt-e18-use-seq-1.2/ckpt.pt +3 -0
  19. gpt2/cgpt/out-cgpt-e18-use-seq-ablate-1.0/ckpt.pt +3 -0
  20. gpt2/q90/out-funcom-gpt2-e18-cce/ckpt.pt +3 -0
  21. gpt2/q90/out-funcom-gpt2-e18-use-seq-0.6/ckpt.pt +3 -0
  22. gpt2/q90/out-funcom-gpt2-e18-use-seq-0.8/ckpt.pt +3 -0
  23. gpt2/q90/out-funcom-gpt2-e18-use-seq-1.0/ckpt.pt +3 -0
  24. gpt2/q90/out-funcom-gpt2-e18-use-seq-1.2/ckpt.pt +3 -0
  25. gpt2/q90/out-funcom-gpt2-e18-use-seq-ablate-1.0/ckpt.pt +3 -0
  26. llama/lora-alpaca-q90-cce/adapter_config.json +18 -0
  27. llama/lora-alpaca-q90-cce/adapter_model.bin +3 -0
  28. llama/lora-alpaca-q90-cce/checkpoint-1000/optimizer.pt +3 -0
  29. llama/lora-alpaca-q90-cce/checkpoint-1000/pytorch_model.bin +3 -0
  30. llama/lora-alpaca-q90-cce/checkpoint-1000/rng_state.pth +3 -0
  31. llama/lora-alpaca-q90-cce/checkpoint-1000/scaler.pt +3 -0
  32. llama/lora-alpaca-q90-cce/checkpoint-1000/scheduler.pt +3 -0
  33. llama/lora-alpaca-q90-cce/checkpoint-1000/trainer_state.json +356 -0
  34. llama/lora-alpaca-q90-cce/checkpoint-1000/training_args.bin +3 -0
  35. llama/lora-alpaca-q90-cce/checkpoint-1200/optimizer.pt +3 -0
  36. llama/lora-alpaca-q90-cce/checkpoint-1200/pytorch_model.bin +3 -0
  37. llama/lora-alpaca-q90-cce/checkpoint-1200/rng_state.pth +3 -0
  38. llama/lora-alpaca-q90-cce/checkpoint-1200/scaler.pt +3 -0
  39. llama/lora-alpaca-q90-cce/checkpoint-1200/scheduler.pt +3 -0
  40. llama/lora-alpaca-q90-cce/checkpoint-1200/trainer_state.json +424 -0
  41. llama/lora-alpaca-q90-cce/checkpoint-1200/training_args.bin +3 -0
  42. llama/lora-alpaca-q90-cce/checkpoint-800/optimizer.pt +3 -0
  43. llama/lora-alpaca-q90-cce/checkpoint-800/pytorch_model.bin +3 -0
  44. llama/lora-alpaca-q90-cce/checkpoint-800/rng_state.pth +3 -0
  45. llama/lora-alpaca-q90-cce/checkpoint-800/scaler.pt +3 -0
  46. llama/lora-alpaca-q90-cce/checkpoint-800/scheduler.pt +3 -0
  47. llama/lora-alpaca-q90-cce/checkpoint-800/trainer_state.json +288 -0
  48. llama/lora-alpaca-q90-cce/checkpoint-800/training_args.bin +3 -0
  49. llama/lora-alpaca-q90-use-seq/adapter_config.json +19 -0
  50. llama/lora-alpaca-q90-use-seq/adapter_model.bin +3 -0
codet5+/q90cce/checkpoint-16161/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bac365536c3abbcd19ea10781413295f880335e19e5faacc2a80cf87f759c4f3
3
+ size 891558696
codet5+/q90cce/checkpoint-16161/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8e947fc1a62608cdbd2e727ebf1b77ab2441741b82c210915feaa31490f802d2
3
+ size 1783272762
codet5+/q90cce/checkpoint-16161/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da7ab3d70319eedee0e20df99e3337c596300cf85c06a0ff43a497dc735bacc1
3
+ size 14244
codet5+/q90cce/checkpoint-16161/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc6074681aa311442c0e33cfb9d6fdc6bf20b16d03df5498cbd742101c5b5b60
3
+ size 1064
codet5+/q90cce/checkpoint-16161/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bba5554b9a249566c939cc47d94f0779adfb4a53f5215181d0d81754796270a0
3
+ size 4856
codet5+/q90cce/final_checkpoint/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bac365536c3abbcd19ea10781413295f880335e19e5faacc2a80cf87f759c4f3
3
+ size 891558696
codet5+/q90useseq/checkpoint-16161/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ef3dc2c3b199d6303258dcf37018964d5bdbc6ee10174a0a2c22597cee9f407
3
+ size 891558696
codet5+/q90useseq/checkpoint-16161/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac9866e8e98bd91dbfa964a33d1bb24d4f288b67351da114cf008c1f2a0a2a0f
3
+ size 1783272762
codet5+/q90useseq/checkpoint-16161/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb2897a897fea1e576d8200facf0fd7c6684a305f296fe30fe6c438815665c88
3
+ size 14244
codet5+/q90useseq/checkpoint-16161/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc6074681aa311442c0e33cfb9d6fdc6bf20b16d03df5498cbd742101c5b5b60
3
+ size 1064
codet5+/q90useseq/checkpoint-16161/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6b9310a6cee128577aa0bc565fc71ba9d56a1ecc0f3386594477591138294a4c
3
+ size 4856
codet5+/q90useseq/events.out.tfevents.1712092321.nublar.444200.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64f61fc9afd31fc598f005fd9849bed2e87e11e4bc8944bf58e45f462a482386
3
+ size 346166
codet5+/q90useseq/final_checkpoint/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9ef3dc2c3b199d6303258dcf37018964d5bdbc6ee10174a0a2c22597cee9f407
3
+ size 891558696
gpt2/cgpt/out-cgpt-e18-cce/ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84f04aedf6ad2067a3e0f3c07e6baeb77fd44f054169d95b8f137bce69c24ad7
3
+ size 1493434662
gpt2/cgpt/out-cgpt-e18-use-seq-0.6/ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f1d6639eacd08e3920acd816dbf496bbb2c9c6db0b7fb296a014302dbb5daca
3
+ size 1493434662
gpt2/cgpt/out-cgpt-e18-use-seq-0.8/ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb6ae0ea207a5c0e6d630c087f01005a79aefb174ca9d212dbd5378d46bd0c8a
3
+ size 1493434662
gpt2/cgpt/out-cgpt-e18-use-seq-1.0/ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7bb276b90c587531cdb723c99cab892a84e8246d8e2969fe78a6c42ca1fd9b41
3
+ size 1493434662
gpt2/cgpt/out-cgpt-e18-use-seq-1.2/ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c7ebc9fb2d7be656a0be6c0a9fa495df645a474e87cd93320a15feaee58e94d6
3
+ size 1493434662
gpt2/cgpt/out-cgpt-e18-use-seq-ablate-1.0/ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:03044a2681785ab3eef0d4ec81fc7a0ef0776ef063a4f0ace861f1125e0facc4
3
+ size 1493434662
gpt2/q90/out-funcom-gpt2-e18-cce/ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d34914d55af32caaa2156640abe6500732a9311532335588ebae1a81144baddb
3
+ size 1493434662
gpt2/q90/out-funcom-gpt2-e18-use-seq-0.6/ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ae52b73e6cc1472bdbe76caa18f81cf5ce5338c3c8c46a1666f67813265aa206
3
+ size 1493434662
gpt2/q90/out-funcom-gpt2-e18-use-seq-0.8/ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e3bcd657d03fe534fd37a5ab0b82eae3a513c01b1ac1988209b7c0718b3bb2e0
3
+ size 1493434662
gpt2/q90/out-funcom-gpt2-e18-use-seq-1.0/ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd5b45fd34b07dd6666f92d28f7b029d9e49e99ee2c825a4d0e5464e95575b33
3
+ size 1493434662
gpt2/q90/out-funcom-gpt2-e18-use-seq-1.2/ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bb141dd6e585b610594151369d078577c29a0d3d41957a359d3b37f4b7325e91
3
+ size 1493434662
gpt2/q90/out-funcom-gpt2-e18-use-seq-ablate-1.0/ckpt.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cadab6f190a1293e6788681102e59de873048abf202fa6739448e96929ce432e
3
+ size 1493434726
llama/lora-alpaca-q90-cce/adapter_config.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "decapoda-research/llama-7b-hf",
3
+ "bias": "none",
4
+ "enable_lora": null,
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "lora_alpha": 16,
8
+ "lora_dropout": 0.05,
9
+ "merge_weights": false,
10
+ "modules_to_save": null,
11
+ "peft_type": "LORA",
12
+ "r": 8,
13
+ "target_modules": [
14
+ "q_proj",
15
+ "v_proj"
16
+ ],
17
+ "task_type": "CAUSAL_LM"
18
+ }
llama/lora-alpaca-q90-cce/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85db8e950ef5d0f478d9511111d467d336e9f3b1a5951e1504bc0b097f42afa9
3
+ size 16822989
llama/lora-alpaca-q90-cce/checkpoint-1000/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:08448be733fb5513f2235cc506bbf6d8a3eed82d58e562bcd403a3187fb7d9a7
3
+ size 33629893
llama/lora-alpaca-q90-cce/checkpoint-1000/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e5b1f8b85708871cade3d0285562fbb25d1ebe8040017504c66c30426c67e847
3
+ size 16822989
llama/lora-alpaca-q90-cce/checkpoint-1000/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:716b9e826437be7256498ff03ae610de855359aa3dcbc686fe327448af3b47ce
3
+ size 14575
llama/lora-alpaca-q90-cce/checkpoint-1000/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68cff80b680ddf6e7abbef98b5f336b97f9b5963e2209307f639383870e8cc71
3
+ size 557
llama/lora-alpaca-q90-cce/checkpoint-1000/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a69c02d22e7ba13517e1bdfd0b1c3ab7029bb5e809777e0aaafead5cf2bf0531
3
+ size 627
llama/lora-alpaca-q90-cce/checkpoint-1000/trainer_state.json ADDED
@@ -0,0 +1,356 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.6784626245498657,
3
+ "best_model_checkpoint": "lora-alpaca/checkpoint-1000",
4
+ "epoch": 0.7511913425197775,
5
+ "global_step": 1000,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.02,
12
+ "learning_rate": 5.9999999999999995e-05,
13
+ "loss": 2.2234,
14
+ "step": 20
15
+ },
16
+ {
17
+ "epoch": 0.03,
18
+ "learning_rate": 0.00011999999999999999,
19
+ "loss": 1.6986,
20
+ "step": 40
21
+ },
22
+ {
23
+ "epoch": 0.05,
24
+ "learning_rate": 0.00017999999999999998,
25
+ "loss": 1.0108,
26
+ "step": 60
27
+ },
28
+ {
29
+ "epoch": 0.06,
30
+ "learning_rate": 0.00023999999999999998,
31
+ "loss": 0.8498,
32
+ "step": 80
33
+ },
34
+ {
35
+ "epoch": 0.08,
36
+ "learning_rate": 0.0003,
37
+ "loss": 0.7975,
38
+ "step": 100
39
+ },
40
+ {
41
+ "epoch": 0.09,
42
+ "learning_rate": 0.00029512591389114535,
43
+ "loss": 0.7621,
44
+ "step": 120
45
+ },
46
+ {
47
+ "epoch": 0.11,
48
+ "learning_rate": 0.0002902518277822908,
49
+ "loss": 0.7465,
50
+ "step": 140
51
+ },
52
+ {
53
+ "epoch": 0.12,
54
+ "learning_rate": 0.0002853777416734362,
55
+ "loss": 0.7367,
56
+ "step": 160
57
+ },
58
+ {
59
+ "epoch": 0.14,
60
+ "learning_rate": 0.0002805036555645816,
61
+ "loss": 0.732,
62
+ "step": 180
63
+ },
64
+ {
65
+ "epoch": 0.15,
66
+ "learning_rate": 0.00027562956945572704,
67
+ "loss": 0.7253,
68
+ "step": 200
69
+ },
70
+ {
71
+ "epoch": 0.15,
72
+ "eval_loss": 0.7244793176651001,
73
+ "eval_runtime": 129.0223,
74
+ "eval_samples_per_second": 15.501,
75
+ "eval_steps_per_second": 1.938,
76
+ "step": 200
77
+ },
78
+ {
79
+ "epoch": 0.17,
80
+ "learning_rate": 0.0002707554833468724,
81
+ "loss": 0.7226,
82
+ "step": 220
83
+ },
84
+ {
85
+ "epoch": 0.18,
86
+ "learning_rate": 0.00026588139723801785,
87
+ "loss": 0.716,
88
+ "step": 240
89
+ },
90
+ {
91
+ "epoch": 0.2,
92
+ "learning_rate": 0.00026100731112916323,
93
+ "loss": 0.7182,
94
+ "step": 260
95
+ },
96
+ {
97
+ "epoch": 0.21,
98
+ "learning_rate": 0.00025613322502030867,
99
+ "loss": 0.7026,
100
+ "step": 280
101
+ },
102
+ {
103
+ "epoch": 0.23,
104
+ "learning_rate": 0.0002512591389114541,
105
+ "loss": 0.7139,
106
+ "step": 300
107
+ },
108
+ {
109
+ "epoch": 0.24,
110
+ "learning_rate": 0.0002463850528025995,
111
+ "loss": 0.7156,
112
+ "step": 320
113
+ },
114
+ {
115
+ "epoch": 0.26,
116
+ "learning_rate": 0.00024151096669374492,
117
+ "loss": 0.7069,
118
+ "step": 340
119
+ },
120
+ {
121
+ "epoch": 0.27,
122
+ "learning_rate": 0.0002366368805848903,
123
+ "loss": 0.6998,
124
+ "step": 360
125
+ },
126
+ {
127
+ "epoch": 0.29,
128
+ "learning_rate": 0.0002317627944760357,
129
+ "loss": 0.7058,
130
+ "step": 380
131
+ },
132
+ {
133
+ "epoch": 0.3,
134
+ "learning_rate": 0.00022688870836718114,
135
+ "loss": 0.7004,
136
+ "step": 400
137
+ },
138
+ {
139
+ "epoch": 0.3,
140
+ "eval_loss": 0.6993061900138855,
141
+ "eval_runtime": 129.0376,
142
+ "eval_samples_per_second": 15.499,
143
+ "eval_steps_per_second": 1.937,
144
+ "step": 400
145
+ },
146
+ {
147
+ "epoch": 0.32,
148
+ "learning_rate": 0.00022201462225832652,
149
+ "loss": 0.6933,
150
+ "step": 420
151
+ },
152
+ {
153
+ "epoch": 0.33,
154
+ "learning_rate": 0.00021714053614947196,
155
+ "loss": 0.6868,
156
+ "step": 440
157
+ },
158
+ {
159
+ "epoch": 0.35,
160
+ "learning_rate": 0.00021226645004061737,
161
+ "loss": 0.6955,
162
+ "step": 460
163
+ },
164
+ {
165
+ "epoch": 0.36,
166
+ "learning_rate": 0.00020739236393176277,
167
+ "loss": 0.6878,
168
+ "step": 480
169
+ },
170
+ {
171
+ "epoch": 0.38,
172
+ "learning_rate": 0.00020251827782290818,
173
+ "loss": 0.6891,
174
+ "step": 500
175
+ },
176
+ {
177
+ "epoch": 0.39,
178
+ "learning_rate": 0.0001976441917140536,
179
+ "loss": 0.686,
180
+ "step": 520
181
+ },
182
+ {
183
+ "epoch": 0.41,
184
+ "learning_rate": 0.00019277010560519902,
185
+ "loss": 0.6977,
186
+ "step": 540
187
+ },
188
+ {
189
+ "epoch": 0.42,
190
+ "learning_rate": 0.00018789601949634443,
191
+ "loss": 0.688,
192
+ "step": 560
193
+ },
194
+ {
195
+ "epoch": 0.44,
196
+ "learning_rate": 0.0001830219333874898,
197
+ "loss": 0.6838,
198
+ "step": 580
199
+ },
200
+ {
201
+ "epoch": 0.45,
202
+ "learning_rate": 0.00017814784727863525,
203
+ "loss": 0.682,
204
+ "step": 600
205
+ },
206
+ {
207
+ "epoch": 0.45,
208
+ "eval_loss": 0.6886340379714966,
209
+ "eval_runtime": 128.9722,
210
+ "eval_samples_per_second": 15.507,
211
+ "eval_steps_per_second": 1.938,
212
+ "step": 600
213
+ },
214
+ {
215
+ "epoch": 0.47,
216
+ "learning_rate": 0.00017327376116978065,
217
+ "loss": 0.6879,
218
+ "step": 620
219
+ },
220
+ {
221
+ "epoch": 0.48,
222
+ "learning_rate": 0.0001683996750609261,
223
+ "loss": 0.6871,
224
+ "step": 640
225
+ },
226
+ {
227
+ "epoch": 0.5,
228
+ "learning_rate": 0.00016352558895207147,
229
+ "loss": 0.6823,
230
+ "step": 660
231
+ },
232
+ {
233
+ "epoch": 0.51,
234
+ "learning_rate": 0.00015865150284321688,
235
+ "loss": 0.6732,
236
+ "step": 680
237
+ },
238
+ {
239
+ "epoch": 0.53,
240
+ "learning_rate": 0.0001537774167343623,
241
+ "loss": 0.6832,
242
+ "step": 700
243
+ },
244
+ {
245
+ "epoch": 0.54,
246
+ "learning_rate": 0.0001489033306255077,
247
+ "loss": 0.6795,
248
+ "step": 720
249
+ },
250
+ {
251
+ "epoch": 0.56,
252
+ "learning_rate": 0.00014402924451665313,
253
+ "loss": 0.686,
254
+ "step": 740
255
+ },
256
+ {
257
+ "epoch": 0.57,
258
+ "learning_rate": 0.00013915515840779853,
259
+ "loss": 0.6799,
260
+ "step": 760
261
+ },
262
+ {
263
+ "epoch": 0.59,
264
+ "learning_rate": 0.00013428107229894394,
265
+ "loss": 0.6738,
266
+ "step": 780
267
+ },
268
+ {
269
+ "epoch": 0.6,
270
+ "learning_rate": 0.00012940698619008935,
271
+ "loss": 0.6801,
272
+ "step": 800
273
+ },
274
+ {
275
+ "epoch": 0.6,
276
+ "eval_loss": 0.6822482943534851,
277
+ "eval_runtime": 128.9861,
278
+ "eval_samples_per_second": 15.506,
279
+ "eval_steps_per_second": 1.938,
280
+ "step": 800
281
+ },
282
+ {
283
+ "epoch": 0.62,
284
+ "learning_rate": 0.00012453290008123476,
285
+ "loss": 0.6803,
286
+ "step": 820
287
+ },
288
+ {
289
+ "epoch": 0.63,
290
+ "learning_rate": 0.00011965881397238017,
291
+ "loss": 0.6647,
292
+ "step": 840
293
+ },
294
+ {
295
+ "epoch": 0.65,
296
+ "learning_rate": 0.00011478472786352559,
297
+ "loss": 0.6838,
298
+ "step": 860
299
+ },
300
+ {
301
+ "epoch": 0.66,
302
+ "learning_rate": 0.000109910641754671,
303
+ "loss": 0.6793,
304
+ "step": 880
305
+ },
306
+ {
307
+ "epoch": 0.68,
308
+ "learning_rate": 0.0001050365556458164,
309
+ "loss": 0.6882,
310
+ "step": 900
311
+ },
312
+ {
313
+ "epoch": 0.69,
314
+ "learning_rate": 0.00010016246953696181,
315
+ "loss": 0.6733,
316
+ "step": 920
317
+ },
318
+ {
319
+ "epoch": 0.71,
320
+ "learning_rate": 9.528838342810722e-05,
321
+ "loss": 0.6786,
322
+ "step": 940
323
+ },
324
+ {
325
+ "epoch": 0.72,
326
+ "learning_rate": 9.041429731925264e-05,
327
+ "loss": 0.682,
328
+ "step": 960
329
+ },
330
+ {
331
+ "epoch": 0.74,
332
+ "learning_rate": 8.554021121039803e-05,
333
+ "loss": 0.6737,
334
+ "step": 980
335
+ },
336
+ {
337
+ "epoch": 0.75,
338
+ "learning_rate": 8.066612510154345e-05,
339
+ "loss": 0.6681,
340
+ "step": 1000
341
+ },
342
+ {
343
+ "epoch": 0.75,
344
+ "eval_loss": 0.6784626245498657,
345
+ "eval_runtime": 129.063,
346
+ "eval_samples_per_second": 15.496,
347
+ "eval_steps_per_second": 1.937,
348
+ "step": 1000
349
+ }
350
+ ],
351
+ "max_steps": 1331,
352
+ "num_train_epochs": 1,
353
+ "total_flos": 1.29988124737536e+18,
354
+ "trial_name": null,
355
+ "trial_params": null
356
+ }
llama/lora-alpaca-q90-cce/checkpoint-1000/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fac929355b6e3b85d753318e1aa77cfc30f00e526d22857f9fad037d422dcc46
3
+ size 3515
llama/lora-alpaca-q90-cce/checkpoint-1200/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:33c49815930ea90b0dac6ef1171f469dc470d3cdcb9c57f454da076b9a4f9ab8
3
+ size 33629893
llama/lora-alpaca-q90-cce/checkpoint-1200/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba9f60fb87ca30385c9a4917ba3ebeedc71c97e0a283b0324e79a1b8ce328ba7
3
+ size 16822989
llama/lora-alpaca-q90-cce/checkpoint-1200/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4d98aaa8ee8a726667d1326e2de60a7ee06aaece6c618bc7ce732b91f5e2f50
3
+ size 14575
llama/lora-alpaca-q90-cce/checkpoint-1200/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:741cefeca9ef427f92406d2d10b81996655e2a9d50eb7aaa9614e6fdd1c9f529
3
+ size 557
llama/lora-alpaca-q90-cce/checkpoint-1200/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d90a20c54899e27f19a89e3464bfe4b304595c9ee6541365c21acdedbea9c09
3
+ size 627
llama/lora-alpaca-q90-cce/checkpoint-1200/trainer_state.json ADDED
@@ -0,0 +1,424 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.6754332184791565,
3
+ "best_model_checkpoint": "lora-alpaca/checkpoint-1200",
4
+ "epoch": 0.901429611023733,
5
+ "global_step": 1200,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.02,
12
+ "learning_rate": 5.9999999999999995e-05,
13
+ "loss": 2.2234,
14
+ "step": 20
15
+ },
16
+ {
17
+ "epoch": 0.03,
18
+ "learning_rate": 0.00011999999999999999,
19
+ "loss": 1.6986,
20
+ "step": 40
21
+ },
22
+ {
23
+ "epoch": 0.05,
24
+ "learning_rate": 0.00017999999999999998,
25
+ "loss": 1.0108,
26
+ "step": 60
27
+ },
28
+ {
29
+ "epoch": 0.06,
30
+ "learning_rate": 0.00023999999999999998,
31
+ "loss": 0.8498,
32
+ "step": 80
33
+ },
34
+ {
35
+ "epoch": 0.08,
36
+ "learning_rate": 0.0003,
37
+ "loss": 0.7975,
38
+ "step": 100
39
+ },
40
+ {
41
+ "epoch": 0.09,
42
+ "learning_rate": 0.00029512591389114535,
43
+ "loss": 0.7621,
44
+ "step": 120
45
+ },
46
+ {
47
+ "epoch": 0.11,
48
+ "learning_rate": 0.0002902518277822908,
49
+ "loss": 0.7465,
50
+ "step": 140
51
+ },
52
+ {
53
+ "epoch": 0.12,
54
+ "learning_rate": 0.0002853777416734362,
55
+ "loss": 0.7367,
56
+ "step": 160
57
+ },
58
+ {
59
+ "epoch": 0.14,
60
+ "learning_rate": 0.0002805036555645816,
61
+ "loss": 0.732,
62
+ "step": 180
63
+ },
64
+ {
65
+ "epoch": 0.15,
66
+ "learning_rate": 0.00027562956945572704,
67
+ "loss": 0.7253,
68
+ "step": 200
69
+ },
70
+ {
71
+ "epoch": 0.15,
72
+ "eval_loss": 0.7244793176651001,
73
+ "eval_runtime": 129.0223,
74
+ "eval_samples_per_second": 15.501,
75
+ "eval_steps_per_second": 1.938,
76
+ "step": 200
77
+ },
78
+ {
79
+ "epoch": 0.17,
80
+ "learning_rate": 0.0002707554833468724,
81
+ "loss": 0.7226,
82
+ "step": 220
83
+ },
84
+ {
85
+ "epoch": 0.18,
86
+ "learning_rate": 0.00026588139723801785,
87
+ "loss": 0.716,
88
+ "step": 240
89
+ },
90
+ {
91
+ "epoch": 0.2,
92
+ "learning_rate": 0.00026100731112916323,
93
+ "loss": 0.7182,
94
+ "step": 260
95
+ },
96
+ {
97
+ "epoch": 0.21,
98
+ "learning_rate": 0.00025613322502030867,
99
+ "loss": 0.7026,
100
+ "step": 280
101
+ },
102
+ {
103
+ "epoch": 0.23,
104
+ "learning_rate": 0.0002512591389114541,
105
+ "loss": 0.7139,
106
+ "step": 300
107
+ },
108
+ {
109
+ "epoch": 0.24,
110
+ "learning_rate": 0.0002463850528025995,
111
+ "loss": 0.7156,
112
+ "step": 320
113
+ },
114
+ {
115
+ "epoch": 0.26,
116
+ "learning_rate": 0.00024151096669374492,
117
+ "loss": 0.7069,
118
+ "step": 340
119
+ },
120
+ {
121
+ "epoch": 0.27,
122
+ "learning_rate": 0.0002366368805848903,
123
+ "loss": 0.6998,
124
+ "step": 360
125
+ },
126
+ {
127
+ "epoch": 0.29,
128
+ "learning_rate": 0.0002317627944760357,
129
+ "loss": 0.7058,
130
+ "step": 380
131
+ },
132
+ {
133
+ "epoch": 0.3,
134
+ "learning_rate": 0.00022688870836718114,
135
+ "loss": 0.7004,
136
+ "step": 400
137
+ },
138
+ {
139
+ "epoch": 0.3,
140
+ "eval_loss": 0.6993061900138855,
141
+ "eval_runtime": 129.0376,
142
+ "eval_samples_per_second": 15.499,
143
+ "eval_steps_per_second": 1.937,
144
+ "step": 400
145
+ },
146
+ {
147
+ "epoch": 0.32,
148
+ "learning_rate": 0.00022201462225832652,
149
+ "loss": 0.6933,
150
+ "step": 420
151
+ },
152
+ {
153
+ "epoch": 0.33,
154
+ "learning_rate": 0.00021714053614947196,
155
+ "loss": 0.6868,
156
+ "step": 440
157
+ },
158
+ {
159
+ "epoch": 0.35,
160
+ "learning_rate": 0.00021226645004061737,
161
+ "loss": 0.6955,
162
+ "step": 460
163
+ },
164
+ {
165
+ "epoch": 0.36,
166
+ "learning_rate": 0.00020739236393176277,
167
+ "loss": 0.6878,
168
+ "step": 480
169
+ },
170
+ {
171
+ "epoch": 0.38,
172
+ "learning_rate": 0.00020251827782290818,
173
+ "loss": 0.6891,
174
+ "step": 500
175
+ },
176
+ {
177
+ "epoch": 0.39,
178
+ "learning_rate": 0.0001976441917140536,
179
+ "loss": 0.686,
180
+ "step": 520
181
+ },
182
+ {
183
+ "epoch": 0.41,
184
+ "learning_rate": 0.00019277010560519902,
185
+ "loss": 0.6977,
186
+ "step": 540
187
+ },
188
+ {
189
+ "epoch": 0.42,
190
+ "learning_rate": 0.00018789601949634443,
191
+ "loss": 0.688,
192
+ "step": 560
193
+ },
194
+ {
195
+ "epoch": 0.44,
196
+ "learning_rate": 0.0001830219333874898,
197
+ "loss": 0.6838,
198
+ "step": 580
199
+ },
200
+ {
201
+ "epoch": 0.45,
202
+ "learning_rate": 0.00017814784727863525,
203
+ "loss": 0.682,
204
+ "step": 600
205
+ },
206
+ {
207
+ "epoch": 0.45,
208
+ "eval_loss": 0.6886340379714966,
209
+ "eval_runtime": 128.9722,
210
+ "eval_samples_per_second": 15.507,
211
+ "eval_steps_per_second": 1.938,
212
+ "step": 600
213
+ },
214
+ {
215
+ "epoch": 0.47,
216
+ "learning_rate": 0.00017327376116978065,
217
+ "loss": 0.6879,
218
+ "step": 620
219
+ },
220
+ {
221
+ "epoch": 0.48,
222
+ "learning_rate": 0.0001683996750609261,
223
+ "loss": 0.6871,
224
+ "step": 640
225
+ },
226
+ {
227
+ "epoch": 0.5,
228
+ "learning_rate": 0.00016352558895207147,
229
+ "loss": 0.6823,
230
+ "step": 660
231
+ },
232
+ {
233
+ "epoch": 0.51,
234
+ "learning_rate": 0.00015865150284321688,
235
+ "loss": 0.6732,
236
+ "step": 680
237
+ },
238
+ {
239
+ "epoch": 0.53,
240
+ "learning_rate": 0.0001537774167343623,
241
+ "loss": 0.6832,
242
+ "step": 700
243
+ },
244
+ {
245
+ "epoch": 0.54,
246
+ "learning_rate": 0.0001489033306255077,
247
+ "loss": 0.6795,
248
+ "step": 720
249
+ },
250
+ {
251
+ "epoch": 0.56,
252
+ "learning_rate": 0.00014402924451665313,
253
+ "loss": 0.686,
254
+ "step": 740
255
+ },
256
+ {
257
+ "epoch": 0.57,
258
+ "learning_rate": 0.00013915515840779853,
259
+ "loss": 0.6799,
260
+ "step": 760
261
+ },
262
+ {
263
+ "epoch": 0.59,
264
+ "learning_rate": 0.00013428107229894394,
265
+ "loss": 0.6738,
266
+ "step": 780
267
+ },
268
+ {
269
+ "epoch": 0.6,
270
+ "learning_rate": 0.00012940698619008935,
271
+ "loss": 0.6801,
272
+ "step": 800
273
+ },
274
+ {
275
+ "epoch": 0.6,
276
+ "eval_loss": 0.6822482943534851,
277
+ "eval_runtime": 128.9861,
278
+ "eval_samples_per_second": 15.506,
279
+ "eval_steps_per_second": 1.938,
280
+ "step": 800
281
+ },
282
+ {
283
+ "epoch": 0.62,
284
+ "learning_rate": 0.00012453290008123476,
285
+ "loss": 0.6803,
286
+ "step": 820
287
+ },
288
+ {
289
+ "epoch": 0.63,
290
+ "learning_rate": 0.00011965881397238017,
291
+ "loss": 0.6647,
292
+ "step": 840
293
+ },
294
+ {
295
+ "epoch": 0.65,
296
+ "learning_rate": 0.00011478472786352559,
297
+ "loss": 0.6838,
298
+ "step": 860
299
+ },
300
+ {
301
+ "epoch": 0.66,
302
+ "learning_rate": 0.000109910641754671,
303
+ "loss": 0.6793,
304
+ "step": 880
305
+ },
306
+ {
307
+ "epoch": 0.68,
308
+ "learning_rate": 0.0001050365556458164,
309
+ "loss": 0.6882,
310
+ "step": 900
311
+ },
312
+ {
313
+ "epoch": 0.69,
314
+ "learning_rate": 0.00010016246953696181,
315
+ "loss": 0.6733,
316
+ "step": 920
317
+ },
318
+ {
319
+ "epoch": 0.71,
320
+ "learning_rate": 9.528838342810722e-05,
321
+ "loss": 0.6786,
322
+ "step": 940
323
+ },
324
+ {
325
+ "epoch": 0.72,
326
+ "learning_rate": 9.041429731925264e-05,
327
+ "loss": 0.682,
328
+ "step": 960
329
+ },
330
+ {
331
+ "epoch": 0.74,
332
+ "learning_rate": 8.554021121039803e-05,
333
+ "loss": 0.6737,
334
+ "step": 980
335
+ },
336
+ {
337
+ "epoch": 0.75,
338
+ "learning_rate": 8.066612510154345e-05,
339
+ "loss": 0.6681,
340
+ "step": 1000
341
+ },
342
+ {
343
+ "epoch": 0.75,
344
+ "eval_loss": 0.6784626245498657,
345
+ "eval_runtime": 129.063,
346
+ "eval_samples_per_second": 15.496,
347
+ "eval_steps_per_second": 1.937,
348
+ "step": 1000
349
+ },
350
+ {
351
+ "epoch": 0.77,
352
+ "learning_rate": 7.579203899268886e-05,
353
+ "loss": 0.678,
354
+ "step": 1020
355
+ },
356
+ {
357
+ "epoch": 0.78,
358
+ "learning_rate": 7.091795288383428e-05,
359
+ "loss": 0.6692,
360
+ "step": 1040
361
+ },
362
+ {
363
+ "epoch": 0.8,
364
+ "learning_rate": 6.604386677497969e-05,
365
+ "loss": 0.6767,
366
+ "step": 1060
367
+ },
368
+ {
369
+ "epoch": 0.81,
370
+ "learning_rate": 6.11697806661251e-05,
371
+ "loss": 0.6699,
372
+ "step": 1080
373
+ },
374
+ {
375
+ "epoch": 0.83,
376
+ "learning_rate": 5.6295694557270505e-05,
377
+ "loss": 0.678,
378
+ "step": 1100
379
+ },
380
+ {
381
+ "epoch": 0.84,
382
+ "learning_rate": 5.142160844841592e-05,
383
+ "loss": 0.6802,
384
+ "step": 1120
385
+ },
386
+ {
387
+ "epoch": 0.86,
388
+ "learning_rate": 4.654752233956133e-05,
389
+ "loss": 0.6773,
390
+ "step": 1140
391
+ },
392
+ {
393
+ "epoch": 0.87,
394
+ "learning_rate": 4.167343623070674e-05,
395
+ "loss": 0.6719,
396
+ "step": 1160
397
+ },
398
+ {
399
+ "epoch": 0.89,
400
+ "learning_rate": 3.679935012185215e-05,
401
+ "loss": 0.6764,
402
+ "step": 1180
403
+ },
404
+ {
405
+ "epoch": 0.9,
406
+ "learning_rate": 3.192526401299756e-05,
407
+ "loss": 0.6745,
408
+ "step": 1200
409
+ },
410
+ {
411
+ "epoch": 0.9,
412
+ "eval_loss": 0.6754332184791565,
413
+ "eval_runtime": 128.9887,
414
+ "eval_samples_per_second": 15.505,
415
+ "eval_steps_per_second": 1.938,
416
+ "step": 1200
417
+ }
418
+ ],
419
+ "max_steps": 1331,
420
+ "num_train_epochs": 1,
421
+ "total_flos": 1.559857496850432e+18,
422
+ "trial_name": null,
423
+ "trial_params": null
424
+ }
llama/lora-alpaca-q90-cce/checkpoint-1200/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fac929355b6e3b85d753318e1aa77cfc30f00e526d22857f9fad037d422dcc46
3
+ size 3515
llama/lora-alpaca-q90-cce/checkpoint-800/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d4dae287198f854ec748229003488aa7600420a32ba91d46f4d84bd52120b57
3
+ size 33629893
llama/lora-alpaca-q90-cce/checkpoint-800/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:acd23c87bf974e5147bc9a152a9262c3efffaa61b736daf495c3cf4da9f237d6
3
+ size 16822989
llama/lora-alpaca-q90-cce/checkpoint-800/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f1a4cc8c5baf51f3cd71e0d56a3c0b65204e606c6f5eb0adc2d7ae2c7e900fbf
3
+ size 14575
llama/lora-alpaca-q90-cce/checkpoint-800/scaler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27ec07a12731ae6f9765d05fe7c8495505f1d0f90b4cc6255a0853fec3970808
3
+ size 557
llama/lora-alpaca-q90-cce/checkpoint-800/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:616a7b1df89e3ef38b6fa5a5f381236e37a81295f1e5fe4260a5014a64e9a421
3
+ size 627
llama/lora-alpaca-q90-cce/checkpoint-800/trainer_state.json ADDED
@@ -0,0 +1,288 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.6822482943534851,
3
+ "best_model_checkpoint": "lora-alpaca/checkpoint-800",
4
+ "epoch": 0.600953074015822,
5
+ "global_step": 800,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 0.02,
12
+ "learning_rate": 5.9999999999999995e-05,
13
+ "loss": 2.2234,
14
+ "step": 20
15
+ },
16
+ {
17
+ "epoch": 0.03,
18
+ "learning_rate": 0.00011999999999999999,
19
+ "loss": 1.6986,
20
+ "step": 40
21
+ },
22
+ {
23
+ "epoch": 0.05,
24
+ "learning_rate": 0.00017999999999999998,
25
+ "loss": 1.0108,
26
+ "step": 60
27
+ },
28
+ {
29
+ "epoch": 0.06,
30
+ "learning_rate": 0.00023999999999999998,
31
+ "loss": 0.8498,
32
+ "step": 80
33
+ },
34
+ {
35
+ "epoch": 0.08,
36
+ "learning_rate": 0.0003,
37
+ "loss": 0.7975,
38
+ "step": 100
39
+ },
40
+ {
41
+ "epoch": 0.09,
42
+ "learning_rate": 0.00029512591389114535,
43
+ "loss": 0.7621,
44
+ "step": 120
45
+ },
46
+ {
47
+ "epoch": 0.11,
48
+ "learning_rate": 0.0002902518277822908,
49
+ "loss": 0.7465,
50
+ "step": 140
51
+ },
52
+ {
53
+ "epoch": 0.12,
54
+ "learning_rate": 0.0002853777416734362,
55
+ "loss": 0.7367,
56
+ "step": 160
57
+ },
58
+ {
59
+ "epoch": 0.14,
60
+ "learning_rate": 0.0002805036555645816,
61
+ "loss": 0.732,
62
+ "step": 180
63
+ },
64
+ {
65
+ "epoch": 0.15,
66
+ "learning_rate": 0.00027562956945572704,
67
+ "loss": 0.7253,
68
+ "step": 200
69
+ },
70
+ {
71
+ "epoch": 0.15,
72
+ "eval_loss": 0.7244793176651001,
73
+ "eval_runtime": 129.0223,
74
+ "eval_samples_per_second": 15.501,
75
+ "eval_steps_per_second": 1.938,
76
+ "step": 200
77
+ },
78
+ {
79
+ "epoch": 0.17,
80
+ "learning_rate": 0.0002707554833468724,
81
+ "loss": 0.7226,
82
+ "step": 220
83
+ },
84
+ {
85
+ "epoch": 0.18,
86
+ "learning_rate": 0.00026588139723801785,
87
+ "loss": 0.716,
88
+ "step": 240
89
+ },
90
+ {
91
+ "epoch": 0.2,
92
+ "learning_rate": 0.00026100731112916323,
93
+ "loss": 0.7182,
94
+ "step": 260
95
+ },
96
+ {
97
+ "epoch": 0.21,
98
+ "learning_rate": 0.00025613322502030867,
99
+ "loss": 0.7026,
100
+ "step": 280
101
+ },
102
+ {
103
+ "epoch": 0.23,
104
+ "learning_rate": 0.0002512591389114541,
105
+ "loss": 0.7139,
106
+ "step": 300
107
+ },
108
+ {
109
+ "epoch": 0.24,
110
+ "learning_rate": 0.0002463850528025995,
111
+ "loss": 0.7156,
112
+ "step": 320
113
+ },
114
+ {
115
+ "epoch": 0.26,
116
+ "learning_rate": 0.00024151096669374492,
117
+ "loss": 0.7069,
118
+ "step": 340
119
+ },
120
+ {
121
+ "epoch": 0.27,
122
+ "learning_rate": 0.0002366368805848903,
123
+ "loss": 0.6998,
124
+ "step": 360
125
+ },
126
+ {
127
+ "epoch": 0.29,
128
+ "learning_rate": 0.0002317627944760357,
129
+ "loss": 0.7058,
130
+ "step": 380
131
+ },
132
+ {
133
+ "epoch": 0.3,
134
+ "learning_rate": 0.00022688870836718114,
135
+ "loss": 0.7004,
136
+ "step": 400
137
+ },
138
+ {
139
+ "epoch": 0.3,
140
+ "eval_loss": 0.6993061900138855,
141
+ "eval_runtime": 129.0376,
142
+ "eval_samples_per_second": 15.499,
143
+ "eval_steps_per_second": 1.937,
144
+ "step": 400
145
+ },
146
+ {
147
+ "epoch": 0.32,
148
+ "learning_rate": 0.00022201462225832652,
149
+ "loss": 0.6933,
150
+ "step": 420
151
+ },
152
+ {
153
+ "epoch": 0.33,
154
+ "learning_rate": 0.00021714053614947196,
155
+ "loss": 0.6868,
156
+ "step": 440
157
+ },
158
+ {
159
+ "epoch": 0.35,
160
+ "learning_rate": 0.00021226645004061737,
161
+ "loss": 0.6955,
162
+ "step": 460
163
+ },
164
+ {
165
+ "epoch": 0.36,
166
+ "learning_rate": 0.00020739236393176277,
167
+ "loss": 0.6878,
168
+ "step": 480
169
+ },
170
+ {
171
+ "epoch": 0.38,
172
+ "learning_rate": 0.00020251827782290818,
173
+ "loss": 0.6891,
174
+ "step": 500
175
+ },
176
+ {
177
+ "epoch": 0.39,
178
+ "learning_rate": 0.0001976441917140536,
179
+ "loss": 0.686,
180
+ "step": 520
181
+ },
182
+ {
183
+ "epoch": 0.41,
184
+ "learning_rate": 0.00019277010560519902,
185
+ "loss": 0.6977,
186
+ "step": 540
187
+ },
188
+ {
189
+ "epoch": 0.42,
190
+ "learning_rate": 0.00018789601949634443,
191
+ "loss": 0.688,
192
+ "step": 560
193
+ },
194
+ {
195
+ "epoch": 0.44,
196
+ "learning_rate": 0.0001830219333874898,
197
+ "loss": 0.6838,
198
+ "step": 580
199
+ },
200
+ {
201
+ "epoch": 0.45,
202
+ "learning_rate": 0.00017814784727863525,
203
+ "loss": 0.682,
204
+ "step": 600
205
+ },
206
+ {
207
+ "epoch": 0.45,
208
+ "eval_loss": 0.6886340379714966,
209
+ "eval_runtime": 128.9722,
210
+ "eval_samples_per_second": 15.507,
211
+ "eval_steps_per_second": 1.938,
212
+ "step": 600
213
+ },
214
+ {
215
+ "epoch": 0.47,
216
+ "learning_rate": 0.00017327376116978065,
217
+ "loss": 0.6879,
218
+ "step": 620
219
+ },
220
+ {
221
+ "epoch": 0.48,
222
+ "learning_rate": 0.0001683996750609261,
223
+ "loss": 0.6871,
224
+ "step": 640
225
+ },
226
+ {
227
+ "epoch": 0.5,
228
+ "learning_rate": 0.00016352558895207147,
229
+ "loss": 0.6823,
230
+ "step": 660
231
+ },
232
+ {
233
+ "epoch": 0.51,
234
+ "learning_rate": 0.00015865150284321688,
235
+ "loss": 0.6732,
236
+ "step": 680
237
+ },
238
+ {
239
+ "epoch": 0.53,
240
+ "learning_rate": 0.0001537774167343623,
241
+ "loss": 0.6832,
242
+ "step": 700
243
+ },
244
+ {
245
+ "epoch": 0.54,
246
+ "learning_rate": 0.0001489033306255077,
247
+ "loss": 0.6795,
248
+ "step": 720
249
+ },
250
+ {
251
+ "epoch": 0.56,
252
+ "learning_rate": 0.00014402924451665313,
253
+ "loss": 0.686,
254
+ "step": 740
255
+ },
256
+ {
257
+ "epoch": 0.57,
258
+ "learning_rate": 0.00013915515840779853,
259
+ "loss": 0.6799,
260
+ "step": 760
261
+ },
262
+ {
263
+ "epoch": 0.59,
264
+ "learning_rate": 0.00013428107229894394,
265
+ "loss": 0.6738,
266
+ "step": 780
267
+ },
268
+ {
269
+ "epoch": 0.6,
270
+ "learning_rate": 0.00012940698619008935,
271
+ "loss": 0.6801,
272
+ "step": 800
273
+ },
274
+ {
275
+ "epoch": 0.6,
276
+ "eval_loss": 0.6822482943534851,
277
+ "eval_runtime": 128.9861,
278
+ "eval_samples_per_second": 15.506,
279
+ "eval_steps_per_second": 1.938,
280
+ "step": 800
281
+ }
282
+ ],
283
+ "max_steps": 1331,
284
+ "num_train_epochs": 1,
285
+ "total_flos": 1.039904997900288e+18,
286
+ "trial_name": null,
287
+ "trial_params": null
288
+ }
llama/lora-alpaca-q90-cce/checkpoint-800/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fac929355b6e3b85d753318e1aa77cfc30f00e526d22857f9fad037d422dcc46
3
+ size 3515
llama/lora-alpaca-q90-use-seq/adapter_config.json ADDED
@@ -0,0 +1,19 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "base_model_name_or_path": "decapoda-research/llama-7b-hf",
3
+ "bias": "none",
4
+ "enable_lora": null,
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "lora_alpha": 16,
9
+ "lora_dropout": 0.05,
10
+ "merge_weights": false,
11
+ "modules_to_save": null,
12
+ "peft_type": "LORA",
13
+ "r": 8,
14
+ "target_modules": [
15
+ "q_proj",
16
+ "v_proj"
17
+ ],
18
+ "task_type": "CAUSAL_LM"
19
+ }
llama/lora-alpaca-q90-use-seq/adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d4f45e8904cd0ecec21041e4968cee3f330f05b89c3aa425e3eb7d9a3251ec4
3
+ size 16822989