DrishtiSharma commited on
Commit
2368fc1
1 Parent(s): 651818b

End of training

Browse files
README.md CHANGED
@@ -8,18 +8,18 @@ datasets:
8
  - generator
9
  base_model: NousResearch/Llama-2-7b-hf
10
  model-index:
11
- - name: llama2-7b-int4-dolly-15k-hindi-flash-attention-2-w-packing
12
  results: []
13
  ---
14
 
15
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
16
  should probably proofread and complete it, then remove this comment. -->
17
 
18
- # llama2-7b-int4-dolly-15k-hindi-flash-attention-2-w-packing
19
 
20
  This model is a fine-tuned version of [NousResearch/Llama-2-7b-hf](https://huggingface.co/NousResearch/Llama-2-7b-hf) on the generator dataset.
21
  It achieves the following results on the evaluation set:
22
- - Loss: 1.2200
23
 
24
  ## Model description
25
 
@@ -51,10 +51,10 @@ The following hyperparameters were used during training:
51
 
52
  | Training Loss | Epoch | Step | Validation Loss |
53
  |:-------------:|:-----:|:----:|:---------------:|
54
- | 1.2692 | 0.64 | 100 | 1.2311 |
55
- | 1.1911 | 1.27 | 200 | 1.2219 |
56
- | 1.1786 | 1.91 | 300 | 1.2171 |
57
- | 1.1377 | 2.55 | 400 | 1.2200 |
58
 
59
 
60
  ### Framework versions
 
8
  - generator
9
  base_model: NousResearch/Llama-2-7b-hf
10
  model-index:
11
+ - name: llama2-7b-int4-dolly-15k-english-flash-attention2-w-packing
12
  results: []
13
  ---
14
 
15
  <!-- This model card has been generated automatically according to the information the Trainer had access to. You
16
  should probably proofread and complete it, then remove this comment. -->
17
 
18
+ # llama2-7b-int4-dolly-15k-english-flash-attention2-w-packing
19
 
20
  This model is a fine-tuned version of [NousResearch/Llama-2-7b-hf](https://huggingface.co/NousResearch/Llama-2-7b-hf) on the generator dataset.
21
  It achieves the following results on the evaluation set:
22
+ - Loss: 1.2201
23
 
24
  ## Model description
25
 
 
51
 
52
  | Training Loss | Epoch | Step | Validation Loss |
53
  |:-------------:|:-----:|:----:|:---------------:|
54
+ | 1.2688 | 0.64 | 100 | 1.2310 |
55
+ | 1.1907 | 1.27 | 200 | 1.2219 |
56
+ | 1.178 | 1.91 | 300 | 1.2170 |
57
+ | 1.1368 | 2.55 | 400 | 1.2201 |
58
 
59
 
60
  ### Framework versions
adapter_config.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
- "base_model_name_or_path": null,
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
@@ -19,9 +19,9 @@
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
 
22
  "k_proj",
23
  "q_proj",
24
- "o_proj",
25
  "v_proj"
26
  ],
27
  "task_type": "CAUSAL_LM",
 
1
  {
2
  "alpha_pattern": {},
3
  "auto_mapping": null,
4
+ "base_model_name_or_path": "NousResearch/Llama-2-7b-hf",
5
  "bias": "none",
6
  "fan_in_fan_out": false,
7
  "inference_mode": true,
 
19
  "rank_pattern": {},
20
  "revision": null,
21
  "target_modules": [
22
+ "o_proj",
23
  "k_proj",
24
  "q_proj",
 
25
  "v_proj"
26
  ],
27
  "task_type": "CAUSAL_LM",
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9c6fc3395801474a11d75ca2bc6cc4d780bcc91f5dfc2d1ad03afa00ceb22b32
3
- size 268474624
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5a2b6aecd25e7361cf2b7d68ea41ba7a91054e9ca653c3883b82a1e774f282f
3
+ size 268470272
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ebb02a0f9e6aa49806b96231b3de7fe3d95fe75babf8b4c80b7e9a035bf55571
3
  size 4792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8c5ad94a420aaaa7f468152d39e2474600276a6d27b953ec633d7930946cb2d7
3
  size 4792