akkky02 commited on
Commit
f194c34
1 Parent(s): 6f58a2e
README.md CHANGED
@@ -44,6 +44,10 @@ The following hyperparameters were used during training:
44
  - training_steps: 10
45
  - mixed_precision_training: Native AMP
46
 
 
 
 
 
47
  ### Framework versions
48
 
49
  - PEFT 0.8.2
 
44
  - training_steps: 10
45
  - mixed_precision_training: Native AMP
46
 
47
+ ### Training results
48
+
49
+
50
+
51
  ### Framework versions
52
 
53
  - PEFT 0.8.2
adapter_config.json CHANGED
@@ -19,13 +19,13 @@
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
- "up_proj",
23
  "k_proj",
24
  "v_proj",
25
- "gate_proj",
26
- "o_proj",
27
  "q_proj",
28
- "down_proj"
 
29
  ],
30
  "task_type": "CAUSAL_LM",
31
  "use_rslora": false
 
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
+ "gate_proj",
23
  "k_proj",
24
  "v_proj",
25
+ "down_proj",
 
26
  "q_proj",
27
+ "o_proj",
28
+ "up_proj"
29
  ],
30
  "task_type": "CAUSAL_LM",
31
  "use_rslora": false
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:130309b46a9db0bda2443a36d5ecaa79e8f390aadee720706f17cf5cbba5cf4b
3
  size 159967880
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8475ca784de92c0eb4f168b0933c30be4344b58a26d81c01c3ff31c0adf6d5f8
3
  size 159967880
all_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0.0,
3
- "total_flos": 333801262080000.0,
4
- "train_loss": 1.2641676783561706,
5
- "train_runtime": 52.6099,
6
- "train_samples_per_second": 0.76,
7
- "train_steps_per_second": 0.19
8
  }
 
1
  {
2
  "epoch": 0.0,
3
+ "total_flos": 324056878080000.0,
4
+ "train_loss": 1.359846395254135,
5
+ "train_runtime": 36.0797,
6
+ "train_samples_per_second": 1.109,
7
+ "train_steps_per_second": 0.277
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 0.0,
3
- "total_flos": 333801262080000.0,
4
- "train_loss": 1.2641676783561706,
5
- "train_runtime": 52.6099,
6
- "train_samples_per_second": 0.76,
7
- "train_steps_per_second": 0.19
8
  }
 
1
  {
2
  "epoch": 0.0,
3
+ "total_flos": 324056878080000.0,
4
+ "train_loss": 1.359846395254135,
5
+ "train_runtime": 36.0797,
6
+ "train_samples_per_second": 1.109,
7
+ "train_steps_per_second": 0.277
8
  }
trainer_state.json CHANGED
@@ -23,59 +23,59 @@
23
  {
24
  "epoch": 0.0,
25
  "learning_rate": 0.000175,
26
- "loss": 1.4764,
27
  "step": 3
28
  },
29
  {
30
  "epoch": 0.0,
31
  "learning_rate": 0.00015000000000000001,
32
- "loss": 0.9966,
33
  "step": 4
34
  },
35
  {
36
  "epoch": 0.0,
37
  "learning_rate": 0.000125,
38
- "loss": 1.339,
39
  "step": 5
40
  },
41
  {
42
  "epoch": 0.0,
43
  "learning_rate": 0.0001,
44
- "loss": 1.0031,
45
  "step": 6
46
  },
47
  {
48
  "epoch": 0.0,
49
  "learning_rate": 7.500000000000001e-05,
50
- "loss": 0.9943,
51
  "step": 7
52
  },
53
  {
54
  "epoch": 0.0,
55
  "learning_rate": 5e-05,
56
- "loss": 1.1203,
57
  "step": 8
58
  },
59
  {
60
  "epoch": 0.0,
61
  "learning_rate": 2.5e-05,
62
- "loss": 0.9072,
63
  "step": 9
64
  },
65
  {
66
  "epoch": 0.0,
67
  "learning_rate": 0.0,
68
- "loss": 0.9334,
69
  "step": 10
70
  },
71
  {
72
  "epoch": 0.0,
73
  "step": 10,
74
- "total_flos": 333801262080000.0,
75
- "train_loss": 1.2641676783561706,
76
- "train_runtime": 52.6099,
77
- "train_samples_per_second": 0.76,
78
- "train_steps_per_second": 0.19
79
  }
80
  ],
81
  "logging_steps": 1,
@@ -83,7 +83,7 @@
83
  "num_input_tokens_seen": 0,
84
  "num_train_epochs": 1,
85
  "save_steps": 500,
86
- "total_flos": 333801262080000.0,
87
  "train_batch_size": 1,
88
  "trial_name": null,
89
  "trial_params": null
 
23
  {
24
  "epoch": 0.0,
25
  "learning_rate": 0.000175,
26
+ "loss": 1.6356,
27
  "step": 3
28
  },
29
  {
30
  "epoch": 0.0,
31
  "learning_rate": 0.00015000000000000001,
32
+ "loss": 1.3577,
33
  "step": 4
34
  },
35
  {
36
  "epoch": 0.0,
37
  "learning_rate": 0.000125,
38
+ "loss": 1.3596,
39
  "step": 5
40
  },
41
  {
42
  "epoch": 0.0,
43
  "learning_rate": 0.0001,
44
+ "loss": 1.0625,
45
  "step": 6
46
  },
47
  {
48
  "epoch": 0.0,
49
  "learning_rate": 7.500000000000001e-05,
50
+ "loss": 1.0769,
51
  "step": 7
52
  },
53
  {
54
  "epoch": 0.0,
55
  "learning_rate": 5e-05,
56
+ "loss": 1.2269,
57
  "step": 8
58
  },
59
  {
60
  "epoch": 0.0,
61
  "learning_rate": 2.5e-05,
62
+ "loss": 1.0216,
63
  "step": 9
64
  },
65
  {
66
  "epoch": 0.0,
67
  "learning_rate": 0.0,
68
+ "loss": 0.9863,
69
  "step": 10
70
  },
71
  {
72
  "epoch": 0.0,
73
  "step": 10,
74
+ "total_flos": 324056878080000.0,
75
+ "train_loss": 1.359846395254135,
76
+ "train_runtime": 36.0797,
77
+ "train_samples_per_second": 1.109,
78
+ "train_steps_per_second": 0.277
79
  }
80
  ],
81
  "logging_steps": 1,
 
83
  "num_input_tokens_seen": 0,
84
  "num_train_epochs": 1,
85
  "save_steps": 500,
86
+ "total_flos": 324056878080000.0,
87
  "train_batch_size": 1,
88
  "trial_name": null,
89
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c032efc104fa0fece03810eb29ba53e2dbcaccc5a8663387f92be442f23a24e2
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa88d497c0724abe443769abdd278e76866c1fb542f8770422dffae4852c481f
3
  size 4664