|
name: p_tuning_squad_t5 |
|
|
|
trainer: |
|
devices: 1 |
|
accelerator: gpu |
|
num_nodes: 1 |
|
precision: 16 |
|
logger: False |
|
enable_checkpointing: False |
|
replace_sampler_ddp: False |
|
max_epochs: 10 |
|
max_steps: -1 |
|
log_every_n_steps: 10 |
|
val_check_interval: 1.0 |
|
gradient_clip_val: 1.0 |
|
resume_from_checkpoint: null |
|
|
|
exp_manager: |
|
explicit_log_dir: null |
|
exp_dir: null |
|
name: ${name} |
|
create_wandb_logger: False |
|
wandb_logger_kwargs: |
|
project: PromptLearning-T5 |
|
name: ${name} |
|
resume_if_exists: True |
|
resume_ignore_no_checkpoint: True |
|
create_checkpoint_callback: True |
|
checkpoint_callback_params: |
|
monitor: val_loss |
|
save_top_k: 2 |
|
mode: min |
|
save_nemo_on_train_end: False |
|
filename: "megatron_t5_prompt_tune--{${exp_manager.checkpoint_callback_params.monitor}:.3f}-{step}" |
|
model_parallel_size: ${model.tensor_model_parallel_size} |
|
save_best_model: True |
|
create_early_stopping_callback: True |
|
early_stopping_callback_params: |
|
monitor: "val_loss" |
|
mode: "min" |
|
min_delta: 0.001 |
|
patience: 10 |
|
verbose: True |
|
|
|
model: |
|
seed: 1234 |
|
nemo_path: ${name}.nemo |
|
virtual_prompt_style: "p-tuning" |
|
tensor_model_parallel_size: 1 |
|
pipeline_model_parallel_size: 1 |
|
global_batch_size: 8 |
|
micro_batch_size: 8 |
|
validation_global_batch_size: ${model.global_batch_size} |
|
validation_micro_batch_size: ${model.micro_batch_size} |
|
validation_drop_last: False |
|
report_validation_accuracy: False |
|
|
|
restore_path: null |
|
language_model_path: ??? |
|
save_nemo_on_validation_end: True |
|
existing_tasks: [] |
|
new_tasks: ["squad"] |
|
|
|
|
|
task_templates: |
|
- taskname: "squad" |
|
prompt_template: "<|VIRTUAL_PROMPT_0|> {context} {question} {answer}" |
|
total_virtual_tokens: 100 |
|
virtual_token_splits: [100] |
|
truncate_field: context |
|
answer_field: answer |
|
|
|
p_tuning: |
|
encoder_type: "mlp" |
|
num_layers: 2 |
|
dropout: 0.0 |
|
|
|
prompt_tuning: |
|
new_prompt_init_methods: ['text'] |
|
new_prompt_init_text: ['some init text goes here'] |
|
|
|
data: |
|
train_ds: ["data/squad_train.jsonl"] |
|
validation_ds: ["data/squad_val.jsonl"] |
|
add_eos: true |
|
add_bos: false |
|
decoder_starts_with_pad: False |
|
add_eos_to_decoder_output: True |
|
add_sentinel_to_input: True |
|
ul2_prompt_token: null |
|
shuffle: true |
|
num_workers: 4 |
|
pin_memory: true |
|
|
|
optim: |
|
name: fused_adam |
|
lr: 1e-4 |
|
weight_decay: 0.01 |
|
betas: |
|
- 0.9 |
|
- 0.98 |
|
sched: |
|
name: CosineAnnealing |
|
warmup_steps: 50 |
|
constant_steps: 0 |
|
min_lr: 0.0 |
|
monitor: val_loss |
|
reduce_on_plateau: false |