Tejaswi006 commited on
Commit
c8bebbb
1 Parent(s): 12c6498

Model save

Browse files
README.md CHANGED
@@ -15,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 1.4756
19
 
20
  ## Model description
21
 
@@ -43,22 +43,16 @@ The following hyperparameters were used during training:
43
  - total_train_batch_size: 512
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: cosine
46
- - num_epochs: 10
47
 
48
  ### Training results
49
 
50
  | Training Loss | Epoch | Step | Validation Loss |
51
  |:-------------:|:-----:|:----:|:---------------:|
52
- | 1.7095 | 0.09 | 1 | 1.7015 |
53
- | 1.7095 | 1.07 | 2 | 1.6854 |
54
- | 1.7095 | 2.06 | 3 | 1.6635 |
55
- | 1.7095 | 3.04 | 4 | 1.6377 |
56
- | 1.6715 | 4.02 | 5 | 1.6106 |
57
- | 1.6715 | 5.1 | 7 | 1.5584 |
58
- | 1.6715 | 6.08 | 8 | 1.5344 |
59
- | 1.6715 | 7.06 | 9 | 1.5132 |
60
- | 1.5588 | 8.05 | 10 | 1.4933 |
61
- | 1.5588 | 9.03 | 11 | 1.4756 |
62
 
63
 
64
  ### Framework versions
 
15
 
16
  This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 1.6381
19
 
20
  ## Model description
21
 
 
43
  - total_train_batch_size: 512
44
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
45
  - lr_scheduler_type: cosine
46
+ - num_epochs: 4
47
 
48
  ### Training results
49
 
50
  | Training Loss | Epoch | Step | Validation Loss |
51
  |:-------------:|:-----:|:----:|:---------------:|
52
+ | 1.7095 | 0.09 | 1 | 1.7014 |
53
+ | 1.7095 | 1.07 | 2 | 1.6855 |
54
+ | 1.7095 | 2.06 | 3 | 1.6637 |
55
+ | 1.7095 | 3.04 | 4 | 1.6382 |
 
 
 
 
 
 
56
 
57
 
58
  ### Framework versions
adapter_config.json CHANGED
@@ -16,10 +16,10 @@
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
19
- "q_proj",
20
- "o_proj",
21
  "v_proj",
22
- "k_proj"
 
 
23
  ],
24
  "task_type": "CAUSAL_LM"
25
  }
 
16
  "rank_pattern": {},
17
  "revision": null,
18
  "target_modules": [
 
 
19
  "v_proj",
20
+ "q_proj",
21
+ "k_proj",
22
+ "o_proj"
23
  ],
24
  "task_type": "CAUSAL_LM"
25
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:666cc01e681b40ccae20914c7fad0fdcd73f72ed3f8d6b3d3d5f94c4575a253e
3
  size 218138576
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0cbcd1726281528770049545897e4929fdbcd2e3c520f2cd8517683dc9413b7b
3
  size 218138576
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
- "epoch": 9.03,
3
- "eval_loss": 1.4756207466125488,
4
- "eval_runtime": 6.3444,
5
  "eval_samples": 294,
6
- "eval_samples_per_second": 46.34,
7
- "eval_steps_per_second": 5.832,
8
- "train_loss": 1.7142409628087825,
9
- "train_runtime": 4135.8024,
10
  "train_samples": 5895,
11
- "train_samples_per_second": 14.254,
12
  "train_steps_per_second": 0.027
13
  }
 
1
  {
2
+ "epoch": 3.04,
3
+ "eval_loss": 1.6381348371505737,
4
+ "eval_runtime": 6.3457,
5
  "eval_samples": 294,
6
+ "eval_samples_per_second": 46.331,
7
+ "eval_steps_per_second": 5.831,
8
+ "train_loss": 1.9840126037597656,
9
+ "train_runtime": 1653.0192,
10
  "train_samples": 5895,
11
+ "train_samples_per_second": 14.265,
12
  "train_steps_per_second": 0.027
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 9.03,
3
- "eval_loss": 1.4756207466125488,
4
- "eval_runtime": 6.3444,
5
  "eval_samples": 294,
6
- "eval_samples_per_second": 46.34,
7
- "eval_steps_per_second": 5.832
8
  }
 
1
  {
2
+ "epoch": 3.04,
3
+ "eval_loss": 1.6381348371505737,
4
+ "eval_runtime": 6.3457,
5
  "eval_samples": 294,
6
+ "eval_samples_per_second": 46.331,
7
+ "eval_steps_per_second": 5.831
8
  }
runs/Dec14_19-31-24_s4311/events.out.tfevents.1702582363.s4311.897573.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8e986c304032fa4314bd17cd1ec247ab0c55cf74221235993be6aa019bbb297
3
+ size 5819
runs/Dec14_19-31-24_s4311/events.out.tfevents.1702584022.s4311.897573.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:898d7be29463605ee33deb4c69f44f454a32dc33c9c1920f1dc705b0424861e3
3
+ size 354
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "epoch": 9.03,
3
- "train_loss": 1.7142409628087825,
4
- "train_runtime": 4135.8024,
5
  "train_samples": 5895,
6
- "train_samples_per_second": 14.254,
7
  "train_steps_per_second": 0.027
8
  }
 
1
  {
2
+ "epoch": 3.04,
3
+ "train_loss": 1.9840126037597656,
4
+ "train_runtime": 1653.0192,
5
  "train_samples": 5895,
6
+ "train_samples_per_second": 14.265,
7
  "train_steps_per_second": 0.027
8
  }
trainer_state.json CHANGED
@@ -1,126 +1,66 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 9.033242876526458,
5
  "eval_steps": 500,
6
- "global_step": 11,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.09,
13
- "learning_rate": 1.9995921928281893e-05,
14
  "loss": 1.7095,
15
  "step": 1
16
  },
17
  {
18
  "epoch": 0.09,
19
- "eval_loss": 1.701454997062683,
20
- "eval_runtime": 6.3744,
21
- "eval_samples_per_second": 46.122,
22
- "eval_steps_per_second": 5.804,
23
  "step": 1
24
  },
25
  {
26
  "epoch": 1.07,
27
- "eval_loss": 1.6854428052902222,
28
- "eval_runtime": 6.363,
29
- "eval_samples_per_second": 46.204,
30
- "eval_steps_per_second": 5.815,
31
  "step": 2
32
  },
33
  {
34
  "epoch": 2.06,
35
- "eval_loss": 1.6634844541549683,
36
- "eval_runtime": 6.3663,
37
- "eval_samples_per_second": 46.181,
38
- "eval_steps_per_second": 5.812,
39
  "step": 3
40
  },
41
  {
42
  "epoch": 3.04,
43
- "eval_loss": 1.6377184391021729,
44
- "eval_runtime": 6.3346,
45
- "eval_samples_per_second": 46.412,
46
- "eval_steps_per_second": 5.841,
47
  "step": 4
48
  },
49
  {
50
- "epoch": 4.02,
51
- "learning_rate": 1.989821441880933e-05,
52
- "loss": 1.6715,
53
- "step": 5
54
- },
55
- {
56
- "epoch": 4.02,
57
- "eval_loss": 1.6106265783309937,
58
- "eval_runtime": 6.3433,
59
- "eval_samples_per_second": 46.348,
60
- "eval_steps_per_second": 5.833,
61
- "step": 5
62
- },
63
- {
64
- "epoch": 5.1,
65
- "eval_loss": 1.5584189891815186,
66
- "eval_runtime": 6.3627,
67
- "eval_samples_per_second": 46.206,
68
- "eval_steps_per_second": 5.815,
69
- "step": 7
70
- },
71
- {
72
- "epoch": 6.08,
73
- "eval_loss": 1.5343831777572632,
74
- "eval_runtime": 6.3403,
75
- "eval_samples_per_second": 46.37,
76
- "eval_steps_per_second": 5.836,
77
- "step": 8
78
- },
79
- {
80
- "epoch": 7.06,
81
- "eval_loss": 1.5131820440292358,
82
- "eval_runtime": 6.3297,
83
- "eval_samples_per_second": 46.448,
84
- "eval_steps_per_second": 5.845,
85
- "step": 9
86
- },
87
- {
88
- "epoch": 8.05,
89
- "learning_rate": 1.9594929736144978e-05,
90
- "loss": 1.5588,
91
- "step": 10
92
- },
93
- {
94
- "epoch": 8.05,
95
- "eval_loss": 1.4932821989059448,
96
- "eval_runtime": 6.3196,
97
- "eval_samples_per_second": 46.522,
98
- "eval_steps_per_second": 5.855,
99
- "step": 10
100
- },
101
- {
102
- "epoch": 9.03,
103
- "eval_loss": 1.4755805730819702,
104
- "eval_runtime": 6.3502,
105
- "eval_samples_per_second": 46.298,
106
- "eval_steps_per_second": 5.827,
107
- "step": 11
108
- },
109
- {
110
- "epoch": 9.03,
111
- "step": 11,
112
- "total_flos": 5.3091621472200294e+17,
113
- "train_loss": 1.7142409628087825,
114
- "train_runtime": 4135.8024,
115
- "train_samples_per_second": 14.254,
116
  "train_steps_per_second": 0.027
117
  }
118
  ],
119
  "logging_steps": 5,
120
- "max_steps": 110,
121
- "num_train_epochs": 10,
122
  "save_steps": 500,
123
- "total_flos": 5.3091621472200294e+17,
124
  "trial_name": null,
125
  "trial_params": null
126
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.0400271370420624,
5
  "eval_steps": 500,
6
+ "global_step": 4,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
  "epoch": 0.09,
13
+ "learning_rate": 1.9974521146102535e-05,
14
  "loss": 1.7095,
15
  "step": 1
16
  },
17
  {
18
  "epoch": 0.09,
19
+ "eval_loss": 1.7013664245605469,
20
+ "eval_runtime": 6.3791,
21
+ "eval_samples_per_second": 46.088,
22
+ "eval_steps_per_second": 5.8,
23
  "step": 1
24
  },
25
  {
26
  "epoch": 1.07,
27
+ "eval_loss": 1.6854876279830933,
28
+ "eval_runtime": 6.3439,
29
+ "eval_samples_per_second": 46.344,
30
+ "eval_steps_per_second": 5.832,
31
  "step": 2
32
  },
33
  {
34
  "epoch": 2.06,
35
+ "eval_loss": 1.663727045059204,
36
+ "eval_runtime": 6.3388,
37
+ "eval_samples_per_second": 46.381,
38
+ "eval_steps_per_second": 5.837,
39
  "step": 3
40
  },
41
  {
42
  "epoch": 3.04,
43
+ "eval_loss": 1.6381714344024658,
44
+ "eval_runtime": 6.3614,
45
+ "eval_samples_per_second": 46.216,
46
+ "eval_steps_per_second": 5.816,
47
  "step": 4
48
  },
49
  {
50
+ "epoch": 3.04,
51
+ "step": 4,
52
+ "total_flos": 2.123664876067881e+17,
53
+ "train_loss": 1.9840126037597656,
54
+ "train_runtime": 1653.0192,
55
+ "train_samples_per_second": 14.265,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  "train_steps_per_second": 0.027
57
  }
58
  ],
59
  "logging_steps": 5,
60
+ "max_steps": 44,
61
+ "num_train_epochs": 4,
62
  "save_steps": 500,
63
+ "total_flos": 2.123664876067881e+17,
64
  "trial_name": null,
65
  "trial_params": null
66
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:82f9cbd5ae020148e5023e8a94238eb6d948e2533fd584833fd423ac94f00345
3
  size 4664
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2b24e1f15eab76911260c95314f76d27bf8972d0e9bf4fdbcf3205fc1db47f9a
3
  size 4664