tloen commited on
Commit
28801ea
1 Parent(s): 4e4afc5

Actually masked loss

Browse files
Files changed (3) hide show
  1. README.md +23 -2
  2. adapter_config.json +4 -2
  3. adapter_model.bin +2 -2
README.md CHANGED
@@ -5,6 +5,27 @@ license: mit
5
  This repo contains a low-rank adapter for LLaMA-7b
6
  fit on the [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca) dataset.
7
 
8
- It doesn't contain the foundation model itself, so it's MIT licensed!
9
 
10
- Instructions for running it can be found at https://github.com/tloen/alpaca-lora.
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
5
  This repo contains a low-rank adapter for LLaMA-7b
6
  fit on the [Stanford Alpaca](https://github.com/tatsu-lab/stanford_alpaca) dataset.
7
 
8
+ This version of the weights was trained with the following hyperparameters:
9
 
10
+ - Epochs: 10 (load from best epoch)
11
+ - Batch size: 128
12
+ - Cutoff length: 512
13
+ - Learning rate: 3e-4
14
+ - Lora _r_: 16
15
+ - Lora target modules: q_proj, k_proj, v_proj, o_proj
16
+
17
+ That is:
18
+
19
+ ```
20
+ python finetune.py \
21
+ --base_model='decapoda-research/llama-7b-hf' \
22
+ --num_epochs=10 \
23
+ --cutoff_len=512 \
24
+ --group_by_length \
25
+ --output_dir='./lora-alpaca-512-qkvo' \
26
+ --lora_target_modules='[q_proj,k_proj,v_proj,o_proj]' \
27
+ --lora_r=16 \
28
+ --micro_batch_size=8
29
+ ```
30
+
31
+ Instructions for running it can be found at https://github.com/tloen/alpaca-lora.
adapter_config.json CHANGED
@@ -9,10 +9,12 @@
9
  "merge_weights": false,
10
  "modules_to_save": null,
11
  "peft_type": "LORA",
12
- "r": 8,
13
  "target_modules": [
14
  "q_proj",
15
- "v_proj"
 
 
16
  ],
17
  "task_type": "CAUSAL_LM"
18
  }
9
  "merge_weights": false,
10
  "modules_to_save": null,
11
  "peft_type": "LORA",
12
+ "r": 16,
13
  "target_modules": [
14
  "q_proj",
15
+ "k_proj",
16
+ "v_proj",
17
+ "o_proj"
18
  ],
19
  "task_type": "CAUSAL_LM"
20
  }
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:321e826099a0eacb1cf39916923eb6feb4327e8e5e09fe9f09a6d6d2a8595448
3
- size 16822989
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e7187f51fbdeff8815046d30f0a325e43491040e6eac8cec5e2ba64f1e87807
3
+ size 67201357