interstellarninja commited on
Commit
0a2ae67
1 Parent(s): f6ea201

Upload 4 files

Browse files
Files changed (4) hide show
  1. README.md +21 -0
  2. adapter_config.json +26 -0
  3. adapter_model.bin +3 -0
  4. qlora_toolformer.yml +80 -0
README.md ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - quant_method: bitsandbytes
9
+ - load_in_8bit: False
10
+ - load_in_4bit: True
11
+ - llm_int8_threshold: 6.0
12
+ - llm_int8_skip_modules: None
13
+ - llm_int8_enable_fp32_cpu_offload: False
14
+ - llm_int8_has_fp16_weight: False
15
+ - bnb_4bit_quant_type: nf4
16
+ - bnb_4bit_use_double_quant: True
17
+ - bnb_4bit_compute_dtype: bfloat16
18
+ ### Framework versions
19
+
20
+
21
+ - PEFT 0.6.0.dev0
adapter_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": "codellama/CodeLlama-7b-hf",
4
+ "bias": "none",
5
+ "fan_in_fan_out": null,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 16,
11
+ "lora_dropout": 0.0,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 64,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "down_proj",
18
+ "o_proj",
19
+ "v_proj",
20
+ "k_proj",
21
+ "gate_proj",
22
+ "q_proj",
23
+ "up_proj"
24
+ ],
25
+ "task_type": "CAUSAL_LM"
26
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8d779580966e71238fd150bdfc24bc6c0717f613c9b2e01c94ee7b9e932348ba
3
+ size 639792909
qlora_toolformer.yml ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: codellama/CodeLlama-7b-hf
2
+ base_model_config: codellama/CodeLlama-7b-hf
3
+ model_type: LlamaForCausalLM
4
+ tokenizer_type: CodeLlamaTokenizer
5
+
6
+ load_in_8bit: false
7
+ load_in_4bit: true
8
+ strict: false
9
+
10
+ datasets:
11
+ - path: /home/interstellarninja/projects/axolotl/dataset/gorilla-16k.json
12
+ type: alpaca
13
+ dataset_prepared_path: last_run_prepared
14
+ val_set_size: 0.05
15
+ output_dir: ./qlora-out
16
+
17
+ adapter: qlora
18
+ lora_model_dir:
19
+
20
+ sequence_len: 2048
21
+ max_packed_sequence_len:
22
+ lora_r: 64
23
+ lora_alpha: 16
24
+ lora_dropout: 0.00
25
+ lora_target_modules:
26
+ - gate_proj
27
+ - down_proj
28
+ - up_proj
29
+ - q_proj
30
+ - v_proj
31
+ - k_proj
32
+ - o_proj
33
+ lora_target_linear: true
34
+ lora_fan_in_fan_out:
35
+
36
+ wandb_project: llama-2-toolformer
37
+ wandb_watch:
38
+ wandb_log_model:
39
+
40
+ data_seed: 42
41
+ seed: 42
42
+
43
+ gradient_accumulation_steps: 4
44
+ micro_batch_size: 4
45
+ num_epochs: 1
46
+ optimizer: adamw_bnb_8bit
47
+ lr_scheduler: constant_with_warmup
48
+ learning_rate: 0.00002
49
+
50
+ train_on_inputs: false
51
+ group_by_length: false
52
+ bf16: true
53
+ fp16: false
54
+ tf32: false
55
+
56
+ gradient_checkpointing: true
57
+ early_stopping_patience: 5
58
+ resume_from_checkpoint:
59
+ local_rank:
60
+ logging_steps: 1
61
+ xformers_attention: false
62
+ flash_attention: true
63
+
64
+ eval_steps: 200
65
+ save_steps: 200
66
+ save_total_limit: 5
67
+ load_best_model_at_end: true
68
+ greater_is_better: false
69
+ metric_for_best_model: eval_loss
70
+ do_mmlu_eval: true
71
+
72
+ debug:
73
+ deepspeed:
74
+ weight_decay: 0.0
75
+ fsdp:
76
+ fsdp_config:
77
+ special_tokens:
78
+ bos_token: "<s>"
79
+ eos_token: "</s>"
80
+ unk_token: "<unk>"