lolyafedor commited on
Commit
50625c2
1 Parent(s): bcdf77e

End of training

Browse files
adapter_config.json CHANGED
@@ -23,22 +23,22 @@
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
 
 
 
 
26
  "embed_tokens.2",
 
 
27
  "out_proj",
28
- "embed_tokens.0",
29
- "lm_heads.1",
30
- "q_proj",
31
- "fc2",
32
  "lm_heads.2",
33
- "lm_heads.0",
34
- "lm_heads.3",
35
- "enc_to_dec_proj",
36
  "embed_tokens.1",
37
- "audio_enc_to_dec_proj",
38
- "v_proj",
39
  "embed_tokens.3",
40
- "k_proj",
41
- "fc1"
 
 
42
  ],
43
  "task_type": null,
44
  "use_dora": false,
 
23
  "rank_pattern": {},
24
  "revision": null,
25
  "target_modules": [
26
+ "lm_heads.3",
27
+ "audio_enc_to_dec_proj",
28
+ "q_proj",
29
+ "lm_heads.0",
30
  "embed_tokens.2",
31
+ "v_proj",
32
+ "k_proj",
33
  "out_proj",
 
 
 
 
34
  "lm_heads.2",
 
 
 
35
  "embed_tokens.1",
36
+ "fc1",
 
37
  "embed_tokens.3",
38
+ "fc2",
39
+ "enc_to_dec_proj",
40
+ "embed_tokens.0",
41
+ "lm_heads.1"
42
  ],
43
  "task_type": null,
44
  "use_dora": false,
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8b947692009e5564f54e1a1c8064d476f99c0931c814df934f82b46cbdd7b52
3
  size 87103456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:171e33d0c67fea1b84074db35fdfb5c09e900a64fbc64e44d1e26d13ef800b12
3
  size 87103456
trainer_state.json CHANGED
@@ -10,94 +10,94 @@
10
  "log_history": [
11
  {
12
  "epoch": 0.32,
13
- "grad_norm": 1.0525139570236206,
14
  "learning_rate": 0.00018333333333333334,
15
- "loss": 9.5169,
16
  "step": 2
17
  },
18
  {
19
  "epoch": 0.64,
20
- "grad_norm": 1.5870144367218018,
21
  "learning_rate": 0.0001666666666666667,
22
- "loss": 9.1514,
23
  "step": 4
24
  },
25
  {
26
  "epoch": 0.96,
27
- "grad_norm": 1.7580112218856812,
28
  "learning_rate": 0.00015000000000000001,
29
- "loss": 8.7557,
30
  "step": 6
31
  },
32
  {
33
  "epoch": 1.28,
34
- "grad_norm": 2.0391345024108887,
35
  "learning_rate": 0.00013333333333333334,
36
- "loss": 8.2163,
37
  "step": 8
38
  },
39
  {
40
  "epoch": 1.6,
41
- "grad_norm": 2.67323899269104,
42
  "learning_rate": 0.00011666666666666668,
43
- "loss": 7.9266,
44
  "step": 10
45
  },
46
  {
47
  "epoch": 1.92,
48
- "grad_norm": 1.5127941370010376,
49
  "learning_rate": 0.0001,
50
- "loss": 7.6902,
51
  "step": 12
52
  },
53
  {
54
  "epoch": 2.24,
55
- "grad_norm": 1.308194875717163,
56
  "learning_rate": 8.333333333333334e-05,
57
- "loss": 7.5201,
58
  "step": 14
59
  },
60
  {
61
  "epoch": 2.56,
62
- "grad_norm": 0.9491788148880005,
63
  "learning_rate": 6.666666666666667e-05,
64
- "loss": 7.3685,
65
  "step": 16
66
  },
67
  {
68
  "epoch": 2.88,
69
- "grad_norm": 1.15030837059021,
70
  "learning_rate": 5e-05,
71
- "loss": 7.3428,
72
  "step": 18
73
  },
74
  {
75
  "epoch": 3.2,
76
- "grad_norm": 0.9033100008964539,
77
  "learning_rate": 3.3333333333333335e-05,
78
- "loss": 7.2229,
79
  "step": 20
80
  },
81
  {
82
  "epoch": 3.52,
83
- "grad_norm": 1.4382227659225464,
84
  "learning_rate": 1.6666666666666667e-05,
85
- "loss": 7.3239,
86
  "step": 22
87
  },
88
  {
89
  "epoch": 3.84,
90
- "grad_norm": 0.7890676259994507,
91
  "learning_rate": 0.0,
92
- "loss": 7.2211,
93
  "step": 24
94
  },
95
  {
96
  "epoch": 3.84,
97
  "step": 24,
98
- "total_flos": 108535654138032.0,
99
- "train_loss": 7.9380284150441485,
100
- "train_runtime": 637.3795,
101
  "train_samples_per_second": 0.628,
102
  "train_steps_per_second": 0.038
103
  }
@@ -119,7 +119,7 @@
119
  "attributes": {}
120
  }
121
  },
122
- "total_flos": 108535654138032.0,
123
  "train_batch_size": 2,
124
  "trial_name": null,
125
  "trial_params": null
 
10
  "log_history": [
11
  {
12
  "epoch": 0.32,
13
+ "grad_norm": 1.0148409605026245,
14
  "learning_rate": 0.00018333333333333334,
15
+ "loss": 9.5085,
16
  "step": 2
17
  },
18
  {
19
  "epoch": 0.64,
20
+ "grad_norm": 1.6703659296035767,
21
  "learning_rate": 0.0001666666666666667,
22
+ "loss": 9.1769,
23
  "step": 4
24
  },
25
  {
26
  "epoch": 0.96,
27
+ "grad_norm": 1.8155523538589478,
28
  "learning_rate": 0.00015000000000000001,
29
+ "loss": 8.7379,
30
  "step": 6
31
  },
32
  {
33
  "epoch": 1.28,
34
+ "grad_norm": 1.9680312871932983,
35
  "learning_rate": 0.00013333333333333334,
36
+ "loss": 8.2451,
37
  "step": 8
38
  },
39
  {
40
  "epoch": 1.6,
41
+ "grad_norm": 2.053583860397339,
42
  "learning_rate": 0.00011666666666666668,
43
+ "loss": 7.9159,
44
  "step": 10
45
  },
46
  {
47
  "epoch": 1.92,
48
+ "grad_norm": 1.528686285018921,
49
  "learning_rate": 0.0001,
50
+ "loss": 7.6275,
51
  "step": 12
52
  },
53
  {
54
  "epoch": 2.24,
55
+ "grad_norm": 1.1224510669708252,
56
  "learning_rate": 8.333333333333334e-05,
57
+ "loss": 7.4755,
58
  "step": 14
59
  },
60
  {
61
  "epoch": 2.56,
62
+ "grad_norm": 1.065908670425415,
63
  "learning_rate": 6.666666666666667e-05,
64
+ "loss": 7.3267,
65
  "step": 16
66
  },
67
  {
68
  "epoch": 2.88,
69
+ "grad_norm": 1.025452971458435,
70
  "learning_rate": 5e-05,
71
+ "loss": 7.2462,
72
  "step": 18
73
  },
74
  {
75
  "epoch": 3.2,
76
+ "grad_norm": 0.8463966846466064,
77
  "learning_rate": 3.3333333333333335e-05,
78
+ "loss": 7.2758,
79
  "step": 20
80
  },
81
  {
82
  "epoch": 3.52,
83
+ "grad_norm": 1.6603997945785522,
84
  "learning_rate": 1.6666666666666667e-05,
85
+ "loss": 7.3307,
86
  "step": 22
87
  },
88
  {
89
  "epoch": 3.84,
90
+ "grad_norm": 0.8327799439430237,
91
  "learning_rate": 0.0,
92
+ "loss": 7.1545,
93
  "step": 24
94
  },
95
  {
96
  "epoch": 3.84,
97
  "step": 24,
98
+ "total_flos": 108923677416360.0,
99
+ "train_loss": 7.9184350570042925,
100
+ "train_runtime": 637.0525,
101
  "train_samples_per_second": 0.628,
102
  "train_steps_per_second": 0.038
103
  }
 
119
  "attributes": {}
120
  }
121
  },
122
+ "total_flos": 108923677416360.0,
123
  "train_batch_size": 2,
124
  "trial_name": null,
125
  "trial_params": null
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3d26e79c9fb97880dd95e934c8113fd0b69eb285f4bdc928c70a8aa73b85d85c
3
  size 5368
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:afc003429aace195a39fbdebcfa03a517206c2b9ed9b8539f7cf8883bd3fb41e
3
  size 5368