File size: 2,357 Bytes
f59a1a3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
backbone_config_path: /work/models/exp500
best_model_path: /work/models/exp500/models/fold_0_42_best.pth
checkpoint_path: /work/models/exp500/chkp/fold_0_42_chkp.pth
config_path: /work/models/exp500/config.yaml
criterion:
  criterion_type: BCEWithLogitsLoss
  mcrmse_loss:
    weights:
    - 0.5
    - 0.5
  mse_loss:
    reduction: mean
  rmse_loss:
    eps: 1.0e-09
    reduction: mean
  smooth_l1_loss:
    beta: 0.1
    reduction: mean
data_dir: /work/data
dataset:
  bucket_batch_sampler:
    bucket_size: 400
    noise_factor: 0.2
  folds: true
  labels:
  - generated
  max_length: 384
  sampler_type: StratifiedBatchSampler
  train_batch_size: 36
  train_sources:
  - daigt
  - persuade
  - persuade_gpt
  - persuade_humanized_1
  - persuade_gpt_patially_rewritten
  - persuade_gpt_patially_rewritten_05
  - persuade_humanized_easy_1
  - daigt_gpt_patially_rewritten
  - llama-mistral-partially-r
  - moth
  - books
  - neural-chat-7b
  - nbroad
  valid_batch_size: 36
  valid_sources:
  - none
debug: false
exp_name: exp500_seed42
external_dir: /work/data/external
fold: 0
interim_dir: /work/data/interim
log_path: /work/models/exp500/logs/fold-0.log
logger:
  job_type: training
  project: DAIGT-AIE
  train_print_frequency: 100
  use_wandb: true
  valid_print_frequency: 100
model:
  architecture_type: CustomModel
  attention_dropout: 0.0
  backbone_type: microsoft/deberta-v3-large
  dropout: 0.0
  freeze_embeddings: false
  freeze_n_layers: 0
  gem_pooling:
    eps: 1.0e-06
    p: 3
  gradient_checkpointing: false
  load_embeddings: true
  load_head: true
  load_n_layers: 24
  load_parts: true
  pooling_type: MeanPooling
  reinitialize_n_layers: 0
  state_from_model: None
models_dir: /work/models
optimizer:
  beta1: 0.9
  beta2: 0.999
  decoder_lr: 2.0e-05
  embeddings_lr: 2.0e-05
  encoder_lr: 2.0e-05
  eps: 1.0e-06
  group_lr_multiplier: 1
  n_groups: 1
  weight_decay: 0.01
processed_dir: /work/data/processed
raw_dir: /work/data/raw
run_dir: /work/models/exp500
run_id: exp500_seed42_fold0
run_name: exp500_seed42_fold0
scheduler:
  cosine_schedule_with_warmup:
    n_cycles: 0.5
    n_warmup_steps: 0
  type: cosine_schedule_with_warmup
seed: 42
tokenizer: null
tokenizer_path: /work/models/exp500/tokenizer
training:
  apex: true
  epochs: 3
  evaluate_n_times_per_epoch: 16
  gradient_accumulation_steps: 1
  max_grad_norm: 10