vincentlinzhu
commited on
Training in progress, epoch 1
Browse files- adapter_config.json +34 -0
- adapter_model.safetensors +3 -0
- dpo_training.yaml +62 -0
- runs/Sep12_06-52-21_a100/events.out.tfevents.1726123985.a100.1178374.0 +3 -0
- special_tokens_map.json +23 -0
- tokenizer.json +0 -0
- tokenizer_config.json +42 -0
- training_args.bin +3 -0
adapter_config.json
ADDED
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"alpha_pattern": {},
|
3 |
+
"auto_mapping": null,
|
4 |
+
"base_model_name_or_path": "deepseek-ai/DeepSeek-Prover-V1",
|
5 |
+
"bias": "none",
|
6 |
+
"fan_in_fan_out": false,
|
7 |
+
"inference_mode": true,
|
8 |
+
"init_lora_weights": true,
|
9 |
+
"layer_replication": null,
|
10 |
+
"layers_pattern": null,
|
11 |
+
"layers_to_transform": null,
|
12 |
+
"loftq_config": {},
|
13 |
+
"lora_alpha": 32,
|
14 |
+
"lora_dropout": 0.05,
|
15 |
+
"megatron_config": null,
|
16 |
+
"megatron_core": "megatron.core",
|
17 |
+
"modules_to_save": null,
|
18 |
+
"peft_type": "LORA",
|
19 |
+
"r": 16,
|
20 |
+
"rank_pattern": {},
|
21 |
+
"revision": null,
|
22 |
+
"target_modules": [
|
23 |
+
"k_proj",
|
24 |
+
"gate_proj",
|
25 |
+
"up_proj",
|
26 |
+
"v_proj",
|
27 |
+
"o_proj",
|
28 |
+
"q_proj",
|
29 |
+
"down_proj"
|
30 |
+
],
|
31 |
+
"task_type": "CAUSAL_LM",
|
32 |
+
"use_dora": false,
|
33 |
+
"use_rslora": false
|
34 |
+
}
|
adapter_model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b5c305f542cef861b67db8b5a05e662ab84099a6b02ab1d05913b17ad5d2128b
|
3 |
+
size 149969848
|
dpo_training.yaml
ADDED
@@ -0,0 +1,62 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
dpo:
|
2 |
+
data:
|
3 |
+
splits: "random" # {random, novel_premises}
|
4 |
+
train_size: 0.8
|
5 |
+
include_next_state: false
|
6 |
+
|
7 |
+
# paths are relative to the project root
|
8 |
+
raw_data: "data/time_filtered_v3.json"
|
9 |
+
formatted_dataset_dir: "data/straight_shot_proof_sample/filtered_negative_tactics_dataset.json"
|
10 |
+
|
11 |
+
expand_records: false
|
12 |
+
# processed_data: "data/straight_shot_proof_sample/dpo_expanded_dataset"
|
13 |
+
# processed_data: "data/straight_shot_proof_sample/dpo_single_entry_dataset"
|
14 |
+
processed_data: "data/straight_shot_proof_sample/dpo_flattened_dataset"
|
15 |
+
|
16 |
+
# prompt formatting {llemma, deepseek}
|
17 |
+
# model_prompt_template: "deepseek"
|
18 |
+
model_prompt_template: "llemma"
|
19 |
+
|
20 |
+
use_sts_format: false
|
21 |
+
|
22 |
+
model:
|
23 |
+
# TODO: figure this out
|
24 |
+
base_model_id: "deepseek-ai/DeepSeek-Prover-V1"
|
25 |
+
max_seq_length: 1024
|
26 |
+
packing: true # pack examples together for better efficiency
|
27 |
+
|
28 |
+
training_args:
|
29 |
+
# output_dir: "dspv1_dpo_dspfmt_medium" # directory to save and repository id (relative to project root)
|
30 |
+
output_dir: "dspv1_dpo_llemmafmt_medium" # directory to save and repository id (relative to project root)
|
31 |
+
num_train_epochs: 3 # number of training epochs
|
32 |
+
per_device_train_batch_size: 3 # batch size per device during training
|
33 |
+
gradient_accumulation_steps: 2 # number of steps before performing a backward/update pass
|
34 |
+
gradient_checkpointing: true # use gradient checkpointing to save memory
|
35 |
+
optim: "adamw_torch_fused" # use fused adamw optimizer
|
36 |
+
logging_steps: 10 # log every 10 steps
|
37 |
+
save_strategy: "epoch" # save checkpoint every epoch
|
38 |
+
learning_rate: 0.0002 # learning rate, based on QLoRA paper
|
39 |
+
bf16: true # use bfloat16 precision
|
40 |
+
tf32: true # use tf32 precision
|
41 |
+
max_grad_norm: 0.3 # max gradient norm based on QLoRA paper
|
42 |
+
warmup_ratio: 0.03 # warmup ratio based on QLoRA paper
|
43 |
+
lr_scheduler_type: "constant" # use constant learning rate scheduler
|
44 |
+
push_to_hub: true # push model to hub
|
45 |
+
report_to: "tensorboard" # report metrics to tensorboard
|
46 |
+
beta: 0.01 # # TODO: tune this (beta for the loss function)
|
47 |
+
|
48 |
+
bnb:
|
49 |
+
_target_: transformers.BitsAndBytesConfig
|
50 |
+
load_in_4bit: true
|
51 |
+
bnb_4bit_use_double_quant: true
|
52 |
+
bnb_4bit_quant_type: nf4
|
53 |
+
bnb_4bit_compute_dtype: bfloat16
|
54 |
+
|
55 |
+
lora:
|
56 |
+
_target_: peft.LoraConfig
|
57 |
+
r: 16
|
58 |
+
lora_alpha: 32
|
59 |
+
lora_dropout: 0.05
|
60 |
+
bias: "none"
|
61 |
+
target_modules: "all-linear"
|
62 |
+
task_type: "CAUSAL_LM"
|
runs/Sep12_06-52-21_a100/events.out.tfevents.1726123985.a100.1178374.0
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d62be177b72f1b7aabe1062d8812c6f968ba83a3e2ce266101446de85f24d212
|
3 |
+
size 144477
|
special_tokens_map.json
ADDED
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<|begin▁of▁sentence|>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": true,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"eos_token": {
|
10 |
+
"content": "<|end▁of▁sentence|>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": true,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"pad_token": {
|
17 |
+
"content": "<pad>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
}
|
23 |
+
}
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
ADDED
@@ -0,0 +1,42 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"add_bos_token": true,
|
3 |
+
"add_eos_token": false,
|
4 |
+
"add_prefix_space": null,
|
5 |
+
"added_tokens_decoder": {
|
6 |
+
"100000": {
|
7 |
+
"content": "<|begin▁of▁sentence|>",
|
8 |
+
"lstrip": false,
|
9 |
+
"normalized": true,
|
10 |
+
"rstrip": false,
|
11 |
+
"single_word": false,
|
12 |
+
"special": true
|
13 |
+
},
|
14 |
+
"100001": {
|
15 |
+
"content": "<|end▁of▁sentence|>",
|
16 |
+
"lstrip": false,
|
17 |
+
"normalized": true,
|
18 |
+
"rstrip": false,
|
19 |
+
"single_word": false,
|
20 |
+
"special": true
|
21 |
+
},
|
22 |
+
"100002": {
|
23 |
+
"content": "<pad>",
|
24 |
+
"lstrip": false,
|
25 |
+
"normalized": false,
|
26 |
+
"rstrip": false,
|
27 |
+
"single_word": false,
|
28 |
+
"special": true
|
29 |
+
}
|
30 |
+
},
|
31 |
+
"bos_token": "<|begin▁of▁sentence|>",
|
32 |
+
"chat_template": "{%- set found_item = false -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'system' -%}\n {%- set found_item = true -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if not found_item -%}\n{{'You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer.\\n'}}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' %}\n{{ message['content'] }}\n {%- else %}\n {%- if message['role'] == 'user' %}\n{{'### Instruction:\\n' + message['content'] + '\\n'}}\n {%- else %}\n{{'### Response:\\n' + message['content'] + '\\n<|EOT|>\\n'}}\n {%- endif %}\n {%- endif %}\n{%- endfor %}\n{{'### Response:\\n'}}\n",
|
33 |
+
"clean_up_tokenization_spaces": false,
|
34 |
+
"eos_token": "<|end▁of▁sentence|>",
|
35 |
+
"legacy": true,
|
36 |
+
"model_max_length": 16384,
|
37 |
+
"pad_token": "<pad>",
|
38 |
+
"sp_model_kwargs": {},
|
39 |
+
"tokenizer_class": "LlamaTokenizer",
|
40 |
+
"unk_token": null,
|
41 |
+
"use_default_system_prompt": false
|
42 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6c1f3ccaafa7fc9c3c0d4b033180dee71875e7ec283488dc8defdd9681dec9d5
|
3 |
+
size 6008
|