Training in progress, epoch 1

Browse files

Files changed (8) hide show

adapter_config.json +34 -0
adapter_model.safetensors +3 -0
dpo_training.yaml +62 -0
runs/Sep12_06-52-21_a100/events.out.tfevents.1726123985.a100.1178374.0 +3 -0
special_tokens_map.json +23 -0
tokenizer.json +0 -0
tokenizer_config.json +42 -0
training_args.bin +3 -0

adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "deepseek-ai/DeepSeek-Prover-V1",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 16,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "k_proj",
+    "gate_proj",
+    "up_proj",
+    "v_proj",
+    "o_proj",
+    "q_proj",
+    "down_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b5c305f542cef861b67db8b5a05e662ab84099a6b02ab1d05913b17ad5d2128b
+size 149969848

dpo_training.yaml ADDED Viewed

	@@ -0,0 +1,62 @@

+dpo:
+  data:
+    splits: "random" # {random, novel_premises}
+    train_size: 0.8
+    include_next_state: false
+    # paths are relative to the project root
+    raw_data: "data/time_filtered_v3.json"
+    formatted_dataset_dir: "data/straight_shot_proof_sample/filtered_negative_tactics_dataset.json"
+    expand_records: false
+    # processed_data: "data/straight_shot_proof_sample/dpo_expanded_dataset"
+    # processed_data: "data/straight_shot_proof_sample/dpo_single_entry_dataset"
+    processed_data: "data/straight_shot_proof_sample/dpo_flattened_dataset"
+    # prompt formatting {llemma, deepseek}
+    # model_prompt_template: "deepseek"
+    model_prompt_template: "llemma"
+    use_sts_format: false
+  model:
+    # TODO: figure this out
+    base_model_id: "deepseek-ai/DeepSeek-Prover-V1"
+    max_seq_length: 1024
+    packing: true                             # pack examples together for better efficiency
+    training_args:
+      # output_dir: "dspv1_dpo_dspfmt_medium"   # directory to save and repository id (relative to project root)
+      output_dir: "dspv1_dpo_llemmafmt_medium"   # directory to save and repository id (relative to project root)
+      num_train_epochs: 3                     # number of training epochs
+      per_device_train_batch_size: 3          # batch size per device during training
+      gradient_accumulation_steps: 2          # number of steps before performing a backward/update pass
+      gradient_checkpointing: true            # use gradient checkpointing to save memory
+      optim: "adamw_torch_fused"              # use fused adamw optimizer
+      logging_steps: 10                       # log every 10 steps
+      save_strategy: "epoch"                  # save checkpoint every epoch
+      learning_rate: 0.0002                   # learning rate, based on QLoRA paper
+      bf16: true                              # use bfloat16 precision
+      tf32: true                              # use tf32 precision
+      max_grad_norm: 0.3                      # max gradient norm based on QLoRA paper
+      warmup_ratio: 0.03                      # warmup ratio based on QLoRA paper
+      lr_scheduler_type: "constant"           # use constant learning rate scheduler
+      push_to_hub: true                       # push model to hub
+      report_to: "tensorboard"                # report metrics to tensorboard
+      beta: 0.01                              # # TODO: tune this (beta for the loss function)
+    bnb:
+      _target_: transformers.BitsAndBytesConfig
+      load_in_4bit: true
+      bnb_4bit_use_double_quant: true
+      bnb_4bit_quant_type: nf4
+      bnb_4bit_compute_dtype: bfloat16
+    lora:
+      _target_: peft.LoraConfig
+      r: 16
+      lora_alpha: 32
+      lora_dropout: 0.05
+      bias: "none"
+      target_modules: "all-linear"
+      task_type: "CAUSAL_LM"

runs/Sep12_06-52-21_a100/events.out.tfevents.1726123985.a100.1178374.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d62be177b72f1b7aabe1062d8812c6f968ba83a3e2ce266101446de85f24d212
+size 144477

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "bos_token": {
+    "content": "<｜begin▁of▁sentence｜>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "<｜end▁of▁sentence｜>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

tokenizer.json ADDED Viewed

The diff for this file is too large to render. See raw diff

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,42 @@

+{
+  "add_bos_token": true,
+  "add_eos_token": false,
+  "add_prefix_space": null,
+  "added_tokens_decoder": {
+    "100000": {
+      "content": "<｜begin▁of▁sentence｜>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100001": {
+      "content": "<｜end▁of▁sentence｜>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "100002": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<｜begin▁of▁sentence｜>",
+  "chat_template": "{%- set found_item = false -%}\n{%- for message in messages -%}\n    {%- if message['role'] == 'system' -%}\n        {%- set found_item = true -%}\n    {%- endif -%}\n{%- endfor -%}\n{%- if not found_item -%}\n{{'You are an AI programming assistant, utilizing the Deepseek Coder model, developed by Deepseek Company, and you only answer questions related to computer science. For politically sensitive questions, security and privacy issues, and other non-computer science questions, you will refuse to answer.\\n'}}\n{%- endif %}\n{%- for message in messages %}\n    {%- if message['role'] == 'system' %}\n{{ message['content'] }}\n    {%- else %}\n        {%- if message['role'] == 'user' %}\n{{'### Instruction:\\n' + message['content'] + '\\n'}}\n        {%- else %}\n{{'### Response:\\n' + message['content'] + '\\n<|EOT|>\\n'}}\n        {%- endif %}\n    {%- endif %}\n{%- endfor %}\n{{'### Response:\\n'}}\n",
+  "clean_up_tokenization_spaces": false,
+  "eos_token": "<｜end▁of▁sentence｜>",
+  "legacy": true,
+  "model_max_length": 16384,
+  "pad_token": "<pad>",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "LlamaTokenizer",
+  "unk_token": null,
+  "use_default_system_prompt": false
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6c1f3ccaafa7fc9c3c0d4b033180dee71875e7ec283488dc8defdd9681dec9d5
+size 6008