prithviraj-maurya commited on
Commit
76cebef
1 Parent(s): 5618261

Upload folder using huggingface_hub

Browse files
.gitattributes.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f4f958c3ffbc6d56674862b5fa2c1108f4f8423b3c4d68baa514da49cea8cc41
3
+ size 234
README.md.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2cdd5c7a20eee9878696d181518f99e75712943315a0d486503573f9685b22f
3
+ size 1832
config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "architectures": [
3
+ "MistralForCausalLM"
4
+ ],
5
+ "attention_dropout": 0.0,
6
+ "bos_token_id": 1,
7
+ "eos_token_id": 2,
8
+ "hidden_act": "silu",
9
+ "hidden_size": 4096,
10
+ "initializer_range": 0.02,
11
+ "intermediate_size": 14336,
12
+ "max_position_embeddings": 32768,
13
+ "model_type": "mistral",
14
+ "num_attention_heads": 32,
15
+ "num_hidden_layers": 32,
16
+ "num_key_value_heads": 8,
17
+ "rms_norm_eps": 1e-05,
18
+ "rope_theta": 1000000.0,
19
+ "sliding_window": null,
20
+ "tie_word_embeddings": false,
21
+ "torch_dtype": "bfloat16",
22
+ "transformers_version": "4.36.0",
23
+ "use_cache": true,
24
+ "vocab_size": 32000
25
+ }
generation_config.json.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9347ec7bd04b2842e56166aba8e77951ee01f0b90eb8eb6369dfef4c71cb40db
3
+ size 130
model.safetensors.index.json.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea6607c0462fd5e531ed494292771b6d0f80ad9aec33b9fed6172aef84cd69a2
3
+ size 1220
pytorch_model-00001-of-00003.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d8836f675fe1c4c43f3ff4e93f4cc0e97ef7a13e8c240fb39ad02d37ff303ef5
3
+ size 4943184288
pytorch_model-00002-of-00003.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58a7ddffb463397de5dbe1f1e2ec1ccf6aae2b549565f83f3ded124e0b4c5069
3
+ size 4999843272
pytorch_model-00003-of-00003.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:75824d68dcf82d02b731b2bdfd3a9711acb7c58b8d566f4c0d3e9efac52f9a21
3
+ size 5064824210
pytorch_model.bin.index.json.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1d102e7e25a15163ceb36a16f33e5528d0e09166d7239f035fbcce9e6d5d06c8
3
+ size 1173
special_tokens_map.json.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ead195241576930d3f554cef07b3af64b461bd1be45f6fcd48027c1011a17faa
3
+ size 86
tokenizer.json.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d7117e273fc04b34070250598e898ad986f3e7d6d35175c515115b320fe8e999
3
+ size 470451
tokenizer.model.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e79974504b88d369289ba3f7a5c19edc1ca2877be925e163f3ea39a2661b4a0
3
+ size 220844
tokenizer_config.json.gz ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:159ba10d29e920f92b586c39106859c2622408b47e8bc3f322f738f272f05aaa
3
+ size 564
torchtune_config.yaml ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ tokenizer:
2
+ _component_: torchtune.models.mistral.mistral_tokenizer
3
+ path: /tmp/Mistral-7B-Instruct-v0.2/tokenizer.model
4
+ dataset:
5
+ _component_: torchtune.datasets.instruct_dataset
6
+ source: prithviraj-maurya/legalbench-entire
7
+ template: AlpacaInstructTemplate
8
+ column_map:
9
+ instruction: instruction
10
+ input: question
11
+ output: answer
12
+ max_seq_len: 256
13
+ train_on_input: true
14
+ split: train
15
+ seed: null
16
+ shuffle: true
17
+ model:
18
+ _component_: torchtune.models.mistral.qlora_mistral_7b
19
+ lora_attn_modules:
20
+ - q_proj
21
+ - k_proj
22
+ - v_proj
23
+ apply_lora_to_mlp: true
24
+ apply_lora_to_output: false
25
+ lora_rank: 64
26
+ lora_alpha: 16
27
+ checkpointer:
28
+ _component_: torchtune.utils.FullModelHFCheckpointer
29
+ checkpoint_dir: /tmp/Mistral-7B-Instruct-v0.2
30
+ checkpoint_files:
31
+ - pytorch_model-00001-of-00003.bin
32
+ - pytorch_model-00002-of-00003.bin
33
+ - pytorch_model-00003-of-00003.bin
34
+ recipe_checkpoint: null
35
+ output_dir: /tmp/Mistral-7B-Instruct-v0.2
36
+ model_type: MISTRAL
37
+ resume_from_checkpoint: false
38
+ optimizer:
39
+ _component_: torch.optim.AdamW
40
+ lr: 2.0e-05
41
+ lr_scheduler:
42
+ _component_: torchtune.modules.get_cosine_schedule_with_warmup
43
+ num_warmup_steps: 100
44
+ loss:
45
+ _component_: torch.nn.CrossEntropyLoss
46
+ batch_size: 4
47
+ epochs: 10
48
+ max_steps_per_epoch: 100000
49
+ gradient_accumulation_steps: 4
50
+ compile: false
51
+ device: cuda
52
+ enable_activation_checkpointing: true
53
+ dtype: fp32
54
+ output_dir: /logs/mistral_7b_qlora_single_device_finetune
55
+ metric_logger:
56
+ _component_: torchtune.utils.metric_logging.WandBLogger
57
+ project: torchtune
58
+ log_every_n_steps: 10
59
+ profiler:
60
+ _component_: torchtune.utils.profiler
61
+ enabled: false
62
+ output_dir: /tmp/alpaca-llama2-finetune/torchtune_perf_tracing.json