vxbrandon commited on
Commit
5c020b5
1 Parent(s): 2e659db

Training in progress, step 23

Browse files
README.md CHANGED
@@ -15,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
15
 
16
  This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
- - Loss: 2.3244
19
 
20
  ## Model description
21
 
@@ -36,13 +36,13 @@ More information needed
36
  The following hyperparameters were used during training:
37
  - learning_rate: 1e-05
38
  - train_batch_size: 1
39
- - eval_batch_size: 1
40
  - seed: 0
41
  - distributed_type: multi-GPU
42
- - num_devices: 4
43
  - gradient_accumulation_steps: 4
44
- - total_train_batch_size: 16
45
- - total_eval_batch_size: 4
46
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
  - lr_scheduler_type: linear
48
  - training_steps: 1
 
15
 
16
  This model is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) on the None dataset.
17
  It achieves the following results on the evaluation set:
18
+ - Loss: 2.3265
19
 
20
  ## Model description
21
 
 
36
  The following hyperparameters were used during training:
37
  - learning_rate: 1e-05
38
  - train_batch_size: 1
39
+ - eval_batch_size: 2
40
  - seed: 0
41
  - distributed_type: multi-GPU
42
+ - num_devices: 3
43
  - gradient_accumulation_steps: 4
44
+ - total_train_batch_size: 12
45
+ - total_eval_batch_size: 6
46
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
47
  - lr_scheduler_type: linear
48
  - training_steps: 1
adapter_config.json CHANGED
@@ -21,9 +21,9 @@
21
  "target_modules": [
22
  "down_proj",
23
  "q_proj",
24
- "gate_proj",
25
  "v_proj",
26
- "up_proj"
 
27
  ],
28
  "task_type": "CAUSAL_LM"
29
  }
 
21
  "target_modules": [
22
  "down_proj",
23
  "q_proj",
 
24
  "v_proj",
25
+ "up_proj",
26
+ "gate_proj"
27
  ],
28
  "task_type": "CAUSAL_LM"
29
  }
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:618f7c3c384f8c4df79ed9c7b90c7046f1b2138650a0204c3a84bc30b222faa1
3
  size 281061608
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a0e683818f3737d3f621de0742433e36175dd01e4cc1fb3ba73043092a94748a
3
  size 281061608
config.json CHANGED
@@ -23,38 +23,38 @@
23
  "rope_theta": 10000.0,
24
  "sliding_window": 4096,
25
  "thresholds": [
26
- 0.017051145434379578,
27
  0.023069201037287712,
28
- 0.049147434532642365,
29
- 0.051153454929590225,
30
- 0.061183542013168335,
31
- 0.07121363282203674,
32
- 0.0732196494936943,
33
- 0.07923770695924759,
34
- 0.08324974030256271,
35
- 0.08124372363090515,
36
- 0.089267797768116,
37
- 0.09127381443977356,
38
- 0.10130390524864197,
39
- 0.0992978885769844,
40
- 0.10732196271419525,
41
- 0.12337010353803635,
42
- 0.14343027770519257,
43
- 0.16148445010185242,
44
- 0.17953860759735107,
45
- 0.1935807317495346,
46
- 0.1995987892150879,
47
  0.2196589708328247,
48
- 0.2196589708328247,
49
- 0.23169508576393127,
50
- 0.2357071191072464,
51
- 0.23370109498500824,
52
- 0.225677028298378,
53
- 0.22968906164169312,
54
- 0.225677028298378,
55
  0.22768303751945496,
56
- 0.2457372099161148,
57
- 0.2678034007549286
 
 
 
 
 
 
 
 
 
58
  ],
59
  "tie_word_embeddings": false,
60
  "torch_dtype": "bfloat16",
 
23
  "rope_theta": 10000.0,
24
  "sliding_window": 4096,
25
  "thresholds": [
 
26
  0.023069201037287712,
27
+ 0.03309928998351097,
28
+ 0.04312938079237938,
29
+ 0.05516548827290535,
30
+ 0.07522567361593246,
31
+ 0.09327983111143112,
32
+ 0.10531593859195709,
33
+ 0.11935807019472122,
34
+ 0.12738214433193207,
35
+ 0.12738214433193207,
36
+ 0.1313941776752472,
37
+ 0.13340020179748535,
38
+ 0.13941824436187744,
39
+ 0.1414242684841156,
40
+ 0.15546639263629913,
41
+ 0.1675025075674057,
42
+ 0.18555666506290436,
43
+ 0.19157472252845764,
44
+ 0.20762285590171814,
 
45
  0.2196589708328247,
 
 
 
 
 
 
 
46
  0.22768303751945496,
47
+ 0.23771312832832336,
48
+ 0.2357071191072464,
49
+ 0.23771312832832336,
50
+ 0.24172517657279968,
51
+ 0.24172517657279968,
52
+ 0.24172517657279968,
53
+ 0.24172517657279968,
54
+ 0.24172517657279968,
55
+ 0.23971915245056152,
56
+ 0.2357071191072464,
57
+ 0.225677028298378
58
  ],
59
  "tie_word_embeddings": false,
60
  "torch_dtype": "bfloat16",
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5911787808afa752c8244407a9d8cb33084339ffeed4eea9bbff386f12098f53
3
  size 4943162336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c33a22761a44ac1bbfcc481e6fa0d5aa9022678c6ccfdac9e824e1e36d01fff9
3
  size 4943162336
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3601f0d928ba7b47182816f3315737a27f63661193641f306591225c498b1991
3
  size 4999819336
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3d22bcdd4a37d68894daa852415cc04d78cfb3a342c02f8e1487315fc33815bb
3
  size 4999819336
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:79a4dc55e86e504b865080b2e14a23f4547c9db064eb409f1bc7bd7b066694b0
3
  size 4540516344
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7412e818b679a6d0013cf994db7d8970df225f83a5e429609d2ebf32a832ec8d
3
  size 4540516344
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:66299b2c729096a65452b6d03a9daa98bf3280b2eaacbacf3df6caacd7ad1d74
3
  size 6456
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62a9254f9424d6ed8aeff2636f645b370874f626a656b2e2b79b552c0a30f8ed
3
  size 6456