Text Generation
Transformers
PyTorch
gpt_bigcode
code
Inference Endpoints
text-generation-inference
4 papers
winglian commited on
Commit
5af0f64
1 Parent(s): 80903f0

Create configs/config.yml

Browse files
Files changed (1) hide show
  1. configs/config.yml +140 -0
configs/config.yml ADDED
@@ -0,0 +1,140 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: bigcode/starcoderplus
2
+ base_model_config: bigcode/starcoderplus
3
+ load_in_8bit: false
4
+ load_in_4bit: true
5
+ gptq: false
6
+ strict: false
7
+ push_dataset_to_hub: winglian
8
+ hf_use_auth_token: true
9
+ datasets:
10
+ - path: winglian/evals
11
+ data_files:
12
+ - hf/ARC-Challenge.jsonl
13
+ - hf/ARC-Easy.jsonl
14
+ - hf/riddle_sense.jsonl
15
+ - hf/piqa.jsonl
16
+ type: explainchoice:chat
17
+ - path: winglian/evals
18
+ data_files:
19
+ - hf/gsm8k.jsonl
20
+ - hf/winogrande.jsonl
21
+ type: alpaca_chat.load_qa
22
+ - path: winglian/evals
23
+ data_files:
24
+ - custom/n_task.jsonl
25
+ - custom/misconceptions.jsonl
26
+ - custom/context_insensitivity.jsonl
27
+ type: alpaca_chat
28
+ - path: camel-ai/math
29
+ type: alpaca_chat.load_camel_ai
30
+ - path: camel-ai/biology
31
+ type: alpaca_chat.load_camel_ai
32
+ - path: camel-ai/physics
33
+ type: alpaca_chat.load_camel_ai
34
+ - path: camel-ai/chemistry
35
+ type: alpaca_chat.load_camel_ai
36
+ - path: winglian/evals
37
+ data_files:
38
+ - custom/in_context_qa.jsonl
39
+ type: context_qa
40
+ - path: winglian/evals
41
+ data_files:
42
+ - custom/in_context_qa.jsonl
43
+ type: context_qa.load_404
44
+ - path: winglian/evals
45
+ data_files:
46
+ - custom/jokes_explained_500up.jsonl
47
+ type: sharegpt_jokes
48
+ - path: winglian/evals
49
+ data_files:
50
+ - custom/classify-self-chat.sharegpt.jsonl
51
+ - custom/coding-self-chat.sharegpt.jsonl
52
+ - custom/prose-gpt4.sharegpt.jsonl
53
+ - custom/prose-rewrite-gpt4.sharegpt.jsonl
54
+ type: sharegpt_simple.load_role
55
+ - path: winglian/evals
56
+ data_files:
57
+ - openai/tldr.jsonl
58
+ type: summarizetldr:chat
59
+ - path: winglian/evals
60
+ data_files:
61
+ - hellaswag/hellaswag.jsonl
62
+ type: explainchoice:chat
63
+ - path: metaeval/ScienceQA_text_only
64
+ type: concisechoice:chat
65
+ - path: teknium/GPT4-LLM-Cleaned
66
+ type: alpaca_chat
67
+ - path: teknium/GPTeacher-General-Instruct
68
+ data_files: gpt4-instruct-similarity-0.6-dataset.json
69
+ type: gpteacher:chat
70
+ - path: QingyiSi/Alpaca-CoT
71
+ data_files:
72
+ - Chain-of-Thought/formatted_cot_data/aqua_train.json
73
+ - Chain-of-Thought/formatted_cot_data/creak_train.json
74
+ - Chain-of-Thought/formatted_cot_data/ecqa_train.json
75
+ - Chain-of-Thought/formatted_cot_data/esnli_train.json
76
+ - Chain-of-Thought/formatted_cot_data/qasc_train.json
77
+ - Chain-of-Thought/formatted_cot_data/qed_train.json
78
+ - Chain-of-Thought/formatted_cot_data/sensemaking_train.json
79
+ - Chain-of-Thought/formatted_cot_data/strategyqa_train.json
80
+ - GPTeacher/Roleplay/formatted_roleplay-similarity_0.6-instruct-dataset.json
81
+ type: alpaca_chat
82
+ - path: ehartford/WizardLM_alpaca_evol_instruct_70k_unfiltered
83
+ type: alpaca_chat
84
+ - path: ehartford/wizard_vicuna_70k_unfiltered
85
+ type: sharegpt:chat
86
+ dataset_prepared_path: last_run_prepared
87
+ val_set_size: 0.01
88
+ adapter: qlora
89
+ lora_model_dir:
90
+ sequence_len: 8192
91
+ max_packed_sequence_len: 8192
92
+ lora_r: 40
93
+ lora_alpha: 32
94
+ lora_dropout: 0.1
95
+ lora_target_modules:
96
+ - c_attn
97
+ - c_proj
98
+ - c_fc
99
+ lora_target_linear: true
100
+ lora_fan_in_fan_out:
101
+ wandb_project: minotaur-16b-8k
102
+ wandb_watch:
103
+ wandb_run_id:
104
+ wandb_log_model:
105
+ output_dir: ./minotaur-16b-8k
106
+ gradient_accumulation_steps: 1
107
+ micro_batch_size: 1
108
+ num_epochs: 3
109
+ optimizer: adamw_bnb_8bit
110
+ torchdistx_path:
111
+ lr_scheduler: cosine
112
+ learning_rate: 0.00013
113
+ train_on_inputs: false
114
+ group_by_length: true
115
+ bf16: true
116
+ fp16: false
117
+ tf32: true
118
+ gradient_checkpointing: true
119
+ early_stopping_patience:
120
+ resume_from_checkpoint:
121
+ local_rank:
122
+ logging_steps: 1
123
+ xformers_attention: false
124
+ flash_attention:
125
+ gptq_groupsize:
126
+ gptq_model_v1:
127
+ warmup_steps: 100
128
+ eval_steps: 20
129
+ save_steps: 51
130
+ load_best_model_at_end: false
131
+ debug:
132
+ deepspeed:
133
+ weight_decay: 0.01
134
+ fsdp:
135
+ fsdp_config:
136
+ special_tokens:
137
+ pad_token: "<|endoftext|>"
138
+ bos_token: "<|endoftext|>"
139
+ eos_token: "<|endoftext|>"
140
+ unk_token: "<|endoftext|>"