| model: |
| name: bigcode/starcoder2-7b |
| tokenizer_name: bigcode/starcoder2-7b |
| max_length: 1024 |
| dtype: bfloat16 |
| trust_remote_code: true |
| use_fast_tokenizer: true |
| cache_dir: null |
| local_files_only: false |
| low_cpu_mem_usage: true |
| tie_word_embeddings: true |
| gradient_checkpointing: false |
| use_chat_template: false |
| dataset: |
| name: codedp-ase26/codedp-cpt |
| split: train |
| mode: cpt |
| text_column: text |
| validation_ratio: 0.05 |
| max_samples: -1 |
| lora: |
| enabled: true |
| r: 16 |
| alpha: 32 |
| dropout: 0.05 |
| target_modules: |
| - q_proj |
| - k_proj |
| - v_proj |
| - o_proj |
| modules_to_save: |
| - lm_head |
| bias: none |
| training: |
| seed: 42 |
| epochs: 2 |
| warmup_steps: null |
| warmup_ratio: 0.05 |
| mixed_precision: false |
| mixed_precision_dtype: bfloat16 |
| batch_size: 8 |
| eval_batch_size: 8 |
| eval_every_steps: 50 |
| eval_every_epochs: 1 |
| learning_rate: 0.0002 |
| optimizer: adamw |
| lr_scheduler: cosine |
| adam_beta1: 0.9 |
| adam_beta2: 0.999 |
| adam_epsilon: 1.0e-08 |
| sgd_momentum: 0.9 |
| weight_decay: 0.01 |
| max_grad_norm: 1.0 |
| log_every: 10 |
| gradient_accumulation_steps: 16 |
| num_workers: 4 |
| output_dir: runs/cpt/starcoder2-7b/dp3 |
| distributed: |
| strategy: dpddp |
| backend: nccl |
| devices: null |
| dp: |
| module_validator: auto |
| target_delta: 1.0e-05 |
| noise_multiplier: null |
| max_grad_norm: 1.0 |
| grad_sample_mode: hooks |
| clipping: flat |
| secure_mode: false |
| enabled: true |
| target_epsilon: 3.0 |
| audit: |
| enabled: true |
| run_every_epoch: true |
| epoch_device: cuda |
| q_canary: auto |
| num_canaries: 500 |
| prefix_length: 49 |
| num_digits: 12 |
| batch_size: 32 |
| delta: 1.0e-05 |
| p_values: |
| - 0.05 |
| - 0.01 |
| paper_guess_fraction: 0.2 |
| paper_guess_steps: 20 |
| enable_holdout_empirical_epsilon: false |
| holdout_seed: 42 |
| tie_seed: 42 |
| tracking: |
| enabled: true |
| tensorboard: true |
| wandb: false |
| wandb_project: codedp-finetune-h200-audit |
| wandb_run_name: starcoder2-7b-cpt-dp3 |
| wandb_mode: online |
| codecarbon: true |
| codecarbon_output_file: codecarbon.csv |
| codecarbon_measure_power_secs: 15 |
| codecarbon_country_iso_code: null |
| codecarbon_project_name: codedp-starcoder2-7b-cpt-dp3 |
|
|