shaurya-06 commited on
Commit
f351df8
1 Parent(s): 721d7d8

shaurya-06/adapter

Browse files
adapter_config.json CHANGED
@@ -11,7 +11,7 @@
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
  "lora_alpha": 16,
14
- "lora_dropout": 0.1,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
 
11
  "layers_to_transform": null,
12
  "loftq_config": {},
13
  "lora_alpha": 16,
14
+ "lora_dropout": 0.3,
15
  "megatron_config": null,
16
  "megatron_core": "megatron.core",
17
  "modules_to_save": null,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:40e249aa5cad4bb1b7d9575ed7d47d455a84d71b94d53384e03bc1c6e5effa39
3
  size 15734784
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c112228277edab577566384c9e92b4b444c85e04d507efc2f3c9636b33004774
3
  size 15734784
runs/Apr28_21-28-07_757a20c23d9b/events.out.tfevents.1714339701.757a20c23d9b.200681.17 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:00da21693d63b9a011734bcfde2cf8ecac48aa22b585827059ab23fe85cb9af7
3
+ size 8416
trainer_state.json CHANGED
@@ -10,96 +10,96 @@
10
  "log_history": [
11
  {
12
  "epoch": 1.2,
13
- "grad_norm": 0.8679821491241455,
14
  "learning_rate": 0.00019868265225415265,
15
- "loss": 4.9095,
16
  "step": 15
17
  },
18
  {
19
  "epoch": 2.4,
20
- "grad_norm": 1.064207911491394,
21
  "learning_rate": 0.00019075754196709572,
22
- "loss": 3.2019,
23
  "step": 30
24
  },
25
  {
26
  "epoch": 3.6,
27
- "grad_norm": 0.6014457941055298,
28
  "learning_rate": 0.00017621620551276366,
29
- "loss": 1.7706,
30
  "step": 45
31
  },
32
  {
33
  "epoch": 4.8,
34
- "grad_norm": 0.4954649806022644,
35
  "learning_rate": 0.00015611870653623825,
36
- "loss": 1.2936,
37
  "step": 60
38
  },
39
  {
40
  "epoch": 6.0,
41
- "grad_norm": 0.418344110250473,
42
  "learning_rate": 0.000131930153013598,
43
- "loss": 1.0465,
44
  "step": 75
45
  },
46
  {
47
  "epoch": 7.2,
48
- "grad_norm": 0.40778854489326477,
49
  "learning_rate": 0.00010541389085854176,
50
- "loss": 0.9186,
51
  "step": 90
52
  },
53
  {
54
  "epoch": 8.4,
55
- "grad_norm": 0.3741260766983032,
56
  "learning_rate": 7.85029559788976e-05,
57
- "loss": 0.8364,
58
  "step": 105
59
  },
60
  {
61
  "epoch": 9.6,
62
- "grad_norm": 0.4041515290737152,
63
  "learning_rate": 5.3159155930021e-05,
64
- "loss": 0.7861,
65
  "step": 120
66
  },
67
  {
68
  "epoch": 10.8,
69
- "grad_norm": 0.3697131872177124,
70
  "learning_rate": 3.123005411465766e-05,
71
- "loss": 0.7488,
72
  "step": 135
73
  },
74
  {
75
  "epoch": 12.0,
76
- "grad_norm": 0.4183999001979828,
77
  "learning_rate": 1.4314282383241096e-05,
78
- "loss": 0.7274,
79
  "step": 150
80
  },
81
  {
82
  "epoch": 13.2,
83
- "grad_norm": 0.4263518154621124,
84
  "learning_rate": 3.6450007480777093e-06,
85
- "loss": 0.7245,
86
  "step": 165
87
  },
88
  {
89
  "epoch": 14.4,
90
- "grad_norm": 0.3251570165157318,
91
  "learning_rate": 0.0,
92
- "loss": 0.7288,
93
  "step": 180
94
  },
95
  {
96
  "epoch": 14.4,
97
  "step": 180,
98
  "total_flos": 116292530995200.0,
99
- "train_loss": 1.4744000752766928,
100
- "train_runtime": 81.674,
101
- "train_samples_per_second": 18.366,
102
- "train_steps_per_second": 2.204
103
  }
104
  ],
105
  "logging_steps": 15,
 
10
  "log_history": [
11
  {
12
  "epoch": 1.2,
13
+ "grad_norm": 0.8724178671836853,
14
  "learning_rate": 0.00019868265225415265,
15
+ "loss": 4.9162,
16
  "step": 15
17
  },
18
  {
19
  "epoch": 2.4,
20
+ "grad_norm": 1.0800001621246338,
21
  "learning_rate": 0.00019075754196709572,
22
+ "loss": 3.2245,
23
  "step": 30
24
  },
25
  {
26
  "epoch": 3.6,
27
+ "grad_norm": 0.6155961751937866,
28
  "learning_rate": 0.00017621620551276366,
29
+ "loss": 1.7826,
30
  "step": 45
31
  },
32
  {
33
  "epoch": 4.8,
34
+ "grad_norm": 0.4888613522052765,
35
  "learning_rate": 0.00015611870653623825,
36
+ "loss": 1.3024,
37
  "step": 60
38
  },
39
  {
40
  "epoch": 6.0,
41
+ "grad_norm": 0.41620710492134094,
42
  "learning_rate": 0.000131930153013598,
43
+ "loss": 1.053,
44
  "step": 75
45
  },
46
  {
47
  "epoch": 7.2,
48
+ "grad_norm": 0.41758859157562256,
49
  "learning_rate": 0.00010541389085854176,
50
+ "loss": 0.9255,
51
  "step": 90
52
  },
53
  {
54
  "epoch": 8.4,
55
+ "grad_norm": 0.3929082751274109,
56
  "learning_rate": 7.85029559788976e-05,
57
+ "loss": 0.8443,
58
  "step": 105
59
  },
60
  {
61
  "epoch": 9.6,
62
+ "grad_norm": 0.4028986692428589,
63
  "learning_rate": 5.3159155930021e-05,
64
+ "loss": 0.7949,
65
  "step": 120
66
  },
67
  {
68
  "epoch": 10.8,
69
+ "grad_norm": 0.3700142204761505,
70
  "learning_rate": 3.123005411465766e-05,
71
+ "loss": 0.7564,
72
  "step": 135
73
  },
74
  {
75
  "epoch": 12.0,
76
+ "grad_norm": 0.38459262251853943,
77
  "learning_rate": 1.4314282383241096e-05,
78
+ "loss": 0.7354,
79
  "step": 150
80
  },
81
  {
82
  "epoch": 13.2,
83
+ "grad_norm": 0.38667017221450806,
84
  "learning_rate": 3.6450007480777093e-06,
85
+ "loss": 0.7324,
86
  "step": 165
87
  },
88
  {
89
  "epoch": 14.4,
90
+ "grad_norm": 0.3338981568813324,
91
  "learning_rate": 0.0,
92
+ "loss": 0.7366,
93
  "step": 180
94
  },
95
  {
96
  "epoch": 14.4,
97
  "step": 180,
98
  "total_flos": 116292530995200.0,
99
+ "train_loss": 1.4836891492207844,
100
+ "train_runtime": 82.2898,
101
+ "train_samples_per_second": 18.228,
102
+ "train_steps_per_second": 2.187
103
  }
104
  ],
105
  "logging_steps": 15,
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c67cb9db299dcc048f1694cd0a11b03c9a64221d6c98c783770f63b50ac522de
3
  size 5176
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a7c4f52851c1314fe925acbc7427f40e294d8cd69c7baf44199af984ed067ae
3
  size 5176