model:
  task: text-generation
  system_prompt: "너는 주어진 Context에서 Question에 대한 Answer를 찾는 챗봇이야. Context에서 Answer가 될 수 있는 부분을 찾아서 그대로 적어줘. 단, Answer는 주관식이 아니라 단답형으로 적어야 해."
  path: MLP-KTLim/llama-3-Korean-Bllossom-8B
  torch_dtype: auto
  device_map: auto
  attn_implementation: sdpa
dataset:
  path: jijihuny/economics_qa
  name: train
  shuffle: false
  test_size: null
  include_answer: true
metric:
  path: jijihuny/ecqa
generation:
  # 프롬프트를 포함하지 않음(false)
  return_full_text: false
  # 생성할 최대 토큰 숫자
  max_new_tokens: null
  # Stochastic Decoding Algorithm
  do_sample: false
  # 상위 K개의 Vocab
  top_k: 1
  # Smallest subset V' s.t \sum_{v \in V} v \geq p
  top_p: 0.95
  # softmax(x/T) 
  # T > 1        => smooth(uniform as T -> \infty) 
  # 0 <= T < 1   => sharpen(deterministic as T -> 0+)
  temperature: 1.0
    # penalty on generated token. temperature보다 높아야함
  repetition_penalty: null

  # Contrastive search
  # Degeneration penalty
  # argmax (1-alpha) * p(v, x_{<i}) - alpha * max_{j<i}(similarity(v, x_j))
  penalty_alpha: null

  # https://arxiv.org/abs/2309.03883
  dola_layers: null

train:
  instruction_template: "<|start_header_id|>user<|end_header_id|>"
  response_template: "<|start_header_id|>assistant<|end_header_id|>"
  use_completion_only_data_collator: false
  quantization:
    load_in_4bit: true
    bnb_4bit_quant_type: nf4
    bnb_4bit_compute_dtype: bfloat16
    bnb_4bit_use_double_quant: true
  lora:
    r: 16
    lora_alpha: 32
    lora_dropout: 0.05
    bias: none
    target_modules:
      - up_proj
      - down_proj
      - gate_proj
      - k_proj
      - q_proj
      - v_proj
      - o_proj
      # - lm_head
    task_type: CAUSAL_LM
  args:
    output_dir: llama3-qlora-r16-a32
    run_name: llama3-qlora-r16-a32
    report_to: wandb
    # dataloader_num_workers: 4
    torch_empty_cache_steps: 3

    # group_by_length: true
    max_seq_length: 2048
    eval_strategy: steps
    per_device_train_batch_size: 16
    per_device_eval_batch_size: 32
    gradient_accumulation_steps: 1
    eval_accumulation_steps: 1

    optim: paged_adamw_8bit
    bf16: true
    bf16_full_eval: true
    learning_rate: 0.0002
    weight_decay: 0.01
    num_train_epochs: 3
    warmup_ratio: 0.005
    max_grad_norm: 2.0
    
    eval_steps: 0.2
    eval_on_start: false
    save_steps: 0.2
    logging_steps: 1
    
    push_to_hub: true

    # torch_compile: true
seed: 42