checkpoints: null data: null general: benchmark_csv_path: null consumed_train_samples: null ignore_sanity_checks: false project: openbmb run: MiniCPM-2B-dpo-bf16 seed: 42 step: 0 lighteval: null logging: null model: ddp_bucket_cap_mb: 25 dtype: bfloat16 init_method: std: 0.025 make_vocab_size_divisible_by: 1 model_config: attn_pdrop: 0.0 bos_token_id: 1 dim_model_base: 256 eos_token_id: 2 hidden_act: silu hidden_size: 2304 initializer_range: 0.1 intermediate_size: 5760 max_position_embeddings: 2048 num_attention_heads: 36 num_hidden_layers: 40 num_key_value_heads: 36 pad_token_id: null pretraining_tp: 1 rms_norm_eps: 1.0e-05 rope_theta: 10000.0 scale_depth: 1.4 scale_emb: 12 tie_word_embeddings: true use_cache: true vocab_size: 122753 optimizer: null parallelism: dp: 1 pp: 1 pp_engine: 1f1b recompute_granularity: SELECTIVE tp: 1 tp_linear_async_communication: true tp_mode: REDUCE_SCATTER profiler: null tokenizer: tokenizer_max_length: null tokenizer_name_or_path: openbmb/MiniCPM-2B-dpo-bf16 tokenizer_revision: null tokens: null