ioseff commited on
Commit
7f2f662
1 Parent(s): e6e08d9

Upload 11 files

Browse files
README.md CHANGED
@@ -1,10 +1,6 @@
1
  ---
2
  library_name: peft
3
  base_model: meta-llama/Meta-Llama-3-8B-Instruct
4
- license: apache-2.0
5
- language:
6
- - en
7
- pipeline_tag: text-generation
8
  ---
9
 
10
  # Model Card for Model ID
 
1
  ---
2
  library_name: peft
3
  base_model: meta-llama/Meta-Llama-3-8B-Instruct
 
 
 
 
4
  ---
5
 
6
  # Model Card for Model ID
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd16f35fdb908a86733ece83af9d2f87342fa4b874c01da1f940bf93a38870f9
3
  size 109069176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8186acc92c8049655bf1a8fa4aae3b74883729ba07d50b1649e4beeff71f80c9
3
  size 109069176
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5df8fe78b5a8c6fef8e217bd895aae8747f02b0f9ee135d1805f8c3e5297ae7a
3
  size 218182586
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d823e6e13236821a82815a2e2d60c599bceedaaad68d1900551b8c3110fc71c
3
  size 218182586
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1375e58f1c2fdc473d6c70983ffa392358838d663f6fe1b5084d0b5de5fd1abb
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4fc18d85373c3ed78a72e873131c5ace7c62304ebb2e3b812ae5639c5dfafe6d
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b0a7343915c78125635d9ebba74d66cd6af1a76cc5481916839c6ed63f8cc757
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd2651dbbb234a1169de9db4c1691e20ebcc2a6f2cad7a0b6f3fb47aa10c248f
3
  size 1064
tokenizer.json CHANGED
@@ -1,6 +1,11 @@
1
  {
2
  "version": "1.0",
3
- "truncation": null,
 
 
 
 
 
4
  "padding": null,
5
  "added_tokens": [
6
  {
 
1
  {
2
  "version": "1.0",
3
+ "truncation": {
4
+ "direction": "Right",
5
+ "max_length": 512,
6
+ "strategy": "LongestFirst",
7
+ "stride": 0
8
+ },
9
  "padding": null,
10
  "added_tokens": [
11
  {
trainer_state.json CHANGED
@@ -1,69 +1,118 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.938271604938271,
5
  "eval_steps": 500,
6
- "global_step": 700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.7054673721340388,
13
- "grad_norm": 0.16252084076404572,
14
  "learning_rate": 0.0002,
15
- "loss": 1.6969,
16
  "step": 100
17
  },
18
  {
19
  "epoch": 1.4109347442680775,
20
- "grad_norm": 0.16519103944301605,
21
  "learning_rate": 0.0002,
22
- "loss": 1.4729,
23
  "step": 200
24
  },
25
  {
26
  "epoch": 2.1164021164021163,
27
- "grad_norm": 0.19196180999279022,
28
  "learning_rate": 0.0002,
29
- "loss": 1.4336,
30
  "step": 300
31
  },
32
  {
33
  "epoch": 2.821869488536155,
34
- "grad_norm": 0.1939113885164261,
35
  "learning_rate": 0.0002,
36
  "loss": 1.3806,
37
  "step": 400
38
  },
39
  {
40
  "epoch": 3.527336860670194,
41
- "grad_norm": 0.2203884869813919,
42
  "learning_rate": 0.0002,
43
- "loss": 1.347,
44
  "step": 500
45
  },
46
  {
47
  "epoch": 4.232804232804233,
48
- "grad_norm": 0.2938516139984131,
49
  "learning_rate": 0.0002,
50
  "loss": 1.3009,
51
  "step": 600
52
  },
53
  {
54
  "epoch": 4.938271604938271,
55
- "grad_norm": 0.2624386250972748,
56
  "learning_rate": 0.0002,
57
- "loss": 1.3224,
58
  "step": 700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
59
  }
60
  ],
61
  "logging_steps": 100,
62
- "max_steps": 705,
63
  "num_input_tokens_seen": 0,
64
- "num_train_epochs": 5,
65
  "save_steps": 100,
66
- "total_flos": 8.028866127259238e+16,
67
  "train_batch_size": 2,
68
  "trial_name": null,
69
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 9.876543209876543,
5
  "eval_steps": 500,
6
+ "global_step": 1400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.7054673721340388,
13
+ "grad_norm": 0.16109323501586914,
14
  "learning_rate": 0.0002,
15
+ "loss": 1.6978,
16
  "step": 100
17
  },
18
  {
19
  "epoch": 1.4109347442680775,
20
+ "grad_norm": 0.1664305478334427,
21
  "learning_rate": 0.0002,
22
+ "loss": 1.4728,
23
  "step": 200
24
  },
25
  {
26
  "epoch": 2.1164021164021163,
27
+ "grad_norm": 0.19294697046279907,
28
  "learning_rate": 0.0002,
29
+ "loss": 1.4337,
30
  "step": 300
31
  },
32
  {
33
  "epoch": 2.821869488536155,
34
+ "grad_norm": 0.20536458492279053,
35
  "learning_rate": 0.0002,
36
  "loss": 1.3806,
37
  "step": 400
38
  },
39
  {
40
  "epoch": 3.527336860670194,
41
+ "grad_norm": 0.21814219653606415,
42
  "learning_rate": 0.0002,
43
+ "loss": 1.3472,
44
  "step": 500
45
  },
46
  {
47
  "epoch": 4.232804232804233,
48
+ "grad_norm": 0.2931879162788391,
49
  "learning_rate": 0.0002,
50
  "loss": 1.3009,
51
  "step": 600
52
  },
53
  {
54
  "epoch": 4.938271604938271,
55
+ "grad_norm": 0.2566097676753998,
56
  "learning_rate": 0.0002,
57
+ "loss": 1.3227,
58
  "step": 700
59
+ },
60
+ {
61
+ "epoch": 5.64373897707231,
62
+ "grad_norm": 0.3220715820789337,
63
+ "learning_rate": 0.0002,
64
+ "loss": 1.2369,
65
+ "step": 800
66
+ },
67
+ {
68
+ "epoch": 6.349206349206349,
69
+ "grad_norm": 0.3416915237903595,
70
+ "learning_rate": 0.0002,
71
+ "loss": 1.1785,
72
+ "step": 900
73
+ },
74
+ {
75
+ "epoch": 7.054673721340388,
76
+ "grad_norm": 0.3860929310321808,
77
+ "learning_rate": 0.0002,
78
+ "loss": 1.1151,
79
+ "step": 1000
80
+ },
81
+ {
82
+ "epoch": 7.760141093474427,
83
+ "grad_norm": 0.5656395554542542,
84
+ "learning_rate": 0.0002,
85
+ "loss": 1.033,
86
+ "step": 1100
87
+ },
88
+ {
89
+ "epoch": 8.465608465608465,
90
+ "grad_norm": 0.690337598323822,
91
+ "learning_rate": 0.0002,
92
+ "loss": 0.981,
93
+ "step": 1200
94
+ },
95
+ {
96
+ "epoch": 9.171075837742505,
97
+ "grad_norm": 0.8267166614532471,
98
+ "learning_rate": 0.0002,
99
+ "loss": 0.9868,
100
+ "step": 1300
101
+ },
102
+ {
103
+ "epoch": 9.876543209876543,
104
+ "grad_norm": 0.5811594128608704,
105
+ "learning_rate": 0.0002,
106
+ "loss": 0.9134,
107
+ "step": 1400
108
  }
109
  ],
110
  "logging_steps": 100,
111
+ "max_steps": 1410,
112
  "num_input_tokens_seen": 0,
113
+ "num_train_epochs": 10,
114
  "save_steps": 100,
115
+ "total_flos": 1.604382181037015e+17,
116
  "train_batch_size": 2,
117
  "trial_name": null,
118
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1fc734aad97df0883b151177054bb4776e37aa99406829dee7f493fef55eb451
3
  size 4984
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a7fbe230792309692b93a9f8226dc4049bb8f12efe6e8208885f2775e69fbbff
3
  size 4984