winglian commited on
Commit
0bd7682
1 Parent(s): 5fe22c5

update config embeddings

Browse files
Files changed (2) hide show
  1. config.json +1 -1
  2. configs/minotaur.yml +107 -0
config.json CHANGED
@@ -11,7 +11,7 @@
11
  "initializer_range": 0.02,
12
  "intermediate_size": 16384,
13
  "layer_norm_eps": 1e-05,
14
- "max_position_embeddings": 2048,
15
  "model_type": "gpt_neox",
16
  "num_attention_heads": 32,
17
  "num_hidden_layers": 32,
 
11
  "initializer_range": 0.02,
12
  "intermediate_size": 16384,
13
  "layer_norm_eps": 1e-05,
14
+ "max_position_embeddings": 8192,
15
  "model_type": "gpt_neox",
16
  "num_attention_heads": 32,
17
  "num_hidden_layers": 32,
configs/minotaur.yml ADDED
@@ -0,0 +1,107 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: /workspace/pythia-6.9b-deduped-8k/
2
+ base_model_config: EleutherAI/pythia-6.9b-deduped
3
+ model_type: GPTNeoXForCausalLM
4
+ tokenizer_type: AutoTokenizer
5
+ load_in_8bit: false
6
+ load_in_4bit: false
7
+ strict: False
8
+ gptq: false
9
+ device_map: auto
10
+ push_dataset_to_hub: winglian
11
+ hf_use_auth_token: true
12
+ # dataset_shard_num: 3
13
+ # dataset_shard_idx: 0
14
+ datasets:
15
+ - path: winglian/pygmalion-cleaned
16
+ data_files:
17
+ - v13_no_ai.cleaned.jsonl
18
+ type: pygmalion
19
+ - path: winglian/evals
20
+ data_files:
21
+ - hf/ARC-Challenge.jsonl
22
+ - hf/ARC-Easy.jsonl
23
+ - hf/riddle_sense.jsonl
24
+ type: explainchoice:chat
25
+ - path: winglian/evals
26
+ data_files:
27
+ - openai/tldr.jsonl
28
+ type: summarizetldr:chat
29
+ - path: winglian/evals
30
+ data_files:
31
+ - hf/gsm8k.jsonl
32
+ type: alpaca_chat.load_qa
33
+ - path: winglian/evals
34
+ data_files:
35
+ - hellaswag/hellaswag.jsonl
36
+ type: explainchoice:chat
37
+ - path: metaeval/ScienceQA_text_only
38
+ type: concisechoice:chat
39
+ - path: ehartford/WizardLM_alpaca_evol_instruct_70k_unfiltered
40
+ type: alpaca_chat
41
+ - path: ehartford/wizard_vicuna_70k_unfiltered
42
+ type: sharegpt:chat
43
+ - path: winglian/chatlogs-en-cleaned
44
+ data_files:
45
+ - sharegpt_cleaned.jsonl
46
+ type: sharegpt:chat
47
+ - path: teknium/GPT4-LLM-Cleaned
48
+ type: alpaca_chat
49
+ - path: teknium/GPTeacher-General-Instruct
50
+ data_files: gpt4-instruct-similarity-0.6-dataset.json
51
+ type: gpteacher:chat
52
+ - path: ewof/code-alpaca-instruct-unfiltered
53
+ type: alpaca_chat
54
+ - path: QingyiSi/Alpaca-CoT
55
+ data_files:
56
+ - Chain-of-Thought/formatted_cot_data/aqua_train.json
57
+ - Chain-of-Thought/formatted_cot_data/creak_train.json
58
+ - Chain-of-Thought/formatted_cot_data/ecqa_train.json
59
+ - Chain-of-Thought/formatted_cot_data/esnli_train.json
60
+ - Chain-of-Thought/formatted_cot_data/gsm8k_train.json
61
+ - Chain-of-Thought/formatted_cot_data/qasc_train.json
62
+ - Chain-of-Thought/formatted_cot_data/qed_train.json
63
+ - Chain-of-Thought/formatted_cot_data/sensemaking_train.json
64
+ - Chain-of-Thought/formatted_cot_data/strategyqa_train.json
65
+ - GPTeacher/Roleplay/formatted_roleplay-similarity_0.6-instruct-dataset.json
66
+ type: alpaca_chat
67
+ dataset_prepared_path: last_run_prepared
68
+ val_set_size: 0.01
69
+ adapter:
70
+ lora_model_dir:
71
+ sequence_len: 8192
72
+ max_packed_sequence_len: 8192
73
+ lora_r: 64
74
+ lora_alpha: 32
75
+ lora_dropout: 0.0
76
+ lora_target_modules:
77
+ lora_target_linear: true
78
+ lora_fan_in_fan_out: true # pythia/GPTNeoX lora specific
79
+ wandb_project: minotaur-7b
80
+ wandb_watch:
81
+ wandb_run_id:
82
+ wandb_log_model:
83
+ output_dir: ./minotaur-7b
84
+ gradient_accumulation_steps: 1
85
+ micro_batch_size: 4
86
+ num_epochs: 1
87
+ learning_rate: 0.00003
88
+ optimizer: adamw_bnb_8bit
89
+ lr_scheduler: cosine
90
+ warmup_steps: 100
91
+ save_steps: 84
92
+ eval_steps: 10
93
+ train_on_inputs: false
94
+ group_by_length: true
95
+ bf16: false
96
+ fp16: false
97
+ bfloat16: true
98
+ tf32: true
99
+ flash_optimum: true
100
+ early_stopping_patience:
101
+ resume_from_checkpoint:
102
+ local_rank:
103
+ gradient_checkpointing: true
104
+ fsdp:
105
+ fsdp_transformer_layer_cls_to_wrap:
106
+ collator_pad_to_longest: true
107
+ save_steps: 35