|
This model is a finetuned version run by the finetune.py in github repository tolen/alpaca-lora with the following parameters, notice that the training dataset can be found in repository:https://github.com/ddzipp/AutoAudit_LLM_Dataset |
|
|
|
# model/data params |
|
base_model: str = "yahma/llama-7b-hf", |
|
data_path: str = "", # dataset see repository https://github.com/ddzipp/AutoAudit_LLM_Dataset/tree/v0.0.1 |
|
output_dir: str = "./autoaudit_20230703_attempt1", |
|
# training hyperparams |
|
batch_size: int = 4, |
|
micro_batch_size: int = 1, |
|
num_epochs: int = 14, |
|
learning_rate: float = 3e-4, |
|
cutoff_len: int = 512, |
|
val_set_size: int = 400, |
|
# lora hyperparams |
|
lora_r: int = 16, |
|
lora_alpha: int = 16, |
|
lora_dropout: float = 0.05, |
|
lora_target_modules: List[str] = [ |
|
"q_proj", |
|
"k_proj", |
|
"v_proj", |
|
"o_proj" |
|
], |
|
# llm hyperparams |
|
train_on_inputs: bool = True, # if False, masks out inputs in loss |
|
add_eos_token: bool = False, |
|
group_by_length: bool = False, # faster, but produces an odd training loss curve |