File size: 2,357 Bytes
f59a1a3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 |
backbone_config_path: /work/models/exp500
best_model_path: /work/models/exp500/models/fold_0_42_best.pth
checkpoint_path: /work/models/exp500/chkp/fold_0_42_chkp.pth
config_path: /work/models/exp500/config.yaml
criterion:
criterion_type: BCEWithLogitsLoss
mcrmse_loss:
weights:
- 0.5
- 0.5
mse_loss:
reduction: mean
rmse_loss:
eps: 1.0e-09
reduction: mean
smooth_l1_loss:
beta: 0.1
reduction: mean
data_dir: /work/data
dataset:
bucket_batch_sampler:
bucket_size: 400
noise_factor: 0.2
folds: true
labels:
- generated
max_length: 384
sampler_type: StratifiedBatchSampler
train_batch_size: 36
train_sources:
- daigt
- persuade
- persuade_gpt
- persuade_humanized_1
- persuade_gpt_patially_rewritten
- persuade_gpt_patially_rewritten_05
- persuade_humanized_easy_1
- daigt_gpt_patially_rewritten
- llama-mistral-partially-r
- moth
- books
- neural-chat-7b
- nbroad
valid_batch_size: 36
valid_sources:
- none
debug: false
exp_name: exp500_seed42
external_dir: /work/data/external
fold: 0
interim_dir: /work/data/interim
log_path: /work/models/exp500/logs/fold-0.log
logger:
job_type: training
project: DAIGT-AIE
train_print_frequency: 100
use_wandb: true
valid_print_frequency: 100
model:
architecture_type: CustomModel
attention_dropout: 0.0
backbone_type: microsoft/deberta-v3-large
dropout: 0.0
freeze_embeddings: false
freeze_n_layers: 0
gem_pooling:
eps: 1.0e-06
p: 3
gradient_checkpointing: false
load_embeddings: true
load_head: true
load_n_layers: 24
load_parts: true
pooling_type: MeanPooling
reinitialize_n_layers: 0
state_from_model: None
models_dir: /work/models
optimizer:
beta1: 0.9
beta2: 0.999
decoder_lr: 2.0e-05
embeddings_lr: 2.0e-05
encoder_lr: 2.0e-05
eps: 1.0e-06
group_lr_multiplier: 1
n_groups: 1
weight_decay: 0.01
processed_dir: /work/data/processed
raw_dir: /work/data/raw
run_dir: /work/models/exp500
run_id: exp500_seed42_fold0
run_name: exp500_seed42_fold0
scheduler:
cosine_schedule_with_warmup:
n_cycles: 0.5
n_warmup_steps: 0
type: cosine_schedule_with_warmup
seed: 42
tokenizer: null
tokenizer_path: /work/models/exp500/tokenizer
training:
apex: true
epochs: 3
evaluate_n_times_per_epoch: 16
gradient_accumulation_steps: 1
max_grad_norm: 10
|