NeMo / examples /nlp /language_modeling /conf /megatron_t5_prompt_learning.yaml
camenduru's picture
thanks to NVIDIA ❤
7934b29
name: p_tuning_squad_t5
trainer:
devices: 1
accelerator: gpu
num_nodes: 1
precision: 16
logger: False
enable_checkpointing: False
replace_sampler_ddp: False
max_epochs: 10
max_steps: -1
log_every_n_steps: 10
val_check_interval: 1.0
gradient_clip_val: 1.0
resume_from_checkpoint: null
exp_manager:
explicit_log_dir: null
exp_dir: null
name: ${name}
create_wandb_logger: False
wandb_logger_kwargs:
project: PromptLearning-T5
name: ${name}
resume_if_exists: True
resume_ignore_no_checkpoint: True
create_checkpoint_callback: True
checkpoint_callback_params:
monitor: val_loss
save_top_k: 2
mode: min
save_nemo_on_train_end: False # Should be false, correct prompt learning model file is saved at model.nemo_path set below
filename: "megatron_t5_prompt_tune--{${exp_manager.checkpoint_callback_params.monitor}:.3f}-{step}"
model_parallel_size: ${model.tensor_model_parallel_size}
save_best_model: True
create_early_stopping_callback: True
early_stopping_callback_params:
monitor: "val_loss"
mode: "min"
min_delta: 0.001
patience: 10
verbose: True
model:
seed: 1234
nemo_path: ${name}.nemo # .nemo filename/absolute path to where the virtual prompt model parameters will be saved
virtual_prompt_style: "p-tuning" # one of 'prompt-tuning', 'p-tuning', or 'inference'
tensor_model_parallel_size: 1
pipeline_model_parallel_size: 1
global_batch_size: 8
micro_batch_size: 8 # micro batch size should equal global batch size when pipeline parallel = 1
validation_global_batch_size: ${model.global_batch_size}
validation_micro_batch_size: ${model.micro_batch_size}
validation_drop_last: False
report_validation_accuracy: False
restore_path: null # Path to an existing p-tuned/prompt tuned .nemo model you wish to add new tasks to or run inference with
language_model_path: ??? # Path to the pretrained T5 language model .nemo file, always required
save_nemo_on_validation_end: True # Saves an inference ready .nemo file every time a checkpoint is saved during training.
existing_tasks: []
new_tasks: ["squad"]
task_templates:
- taskname: "squad"
prompt_template: "<|VIRTUAL_PROMPT_0|> {context} {question} {answer}"
total_virtual_tokens: 100
virtual_token_splits: [100]
truncate_field: context
answer_field: answer
p_tuning: # P-tuning specific params
encoder_type: "mlp" # Either "mlp" or "lstm", mlp is default
num_layers: 2 # 2 recommended for MLP, 1 recommended for LSTM, must be at least 2 for mlp
dropout: 0.0
prompt_tuning: # Prompt tunin specific params
new_prompt_init_methods: ['text'] # List of 'text' or 'random', should correspond to tasks listed in new tasks
new_prompt_init_text: ['some init text goes here'] # some init text if init method is text, or None if init method is random
data:
train_ds: ["data/squad_train.jsonl"]
validation_ds: ["data/squad_val.jsonl"]
add_eos: true
add_bos: false
decoder_starts_with_pad: False
add_eos_to_decoder_output: True
add_sentinel_to_input: True
ul2_prompt_token: null # <extra_id_s>, <extra_id_r>, <extra_id_x>
shuffle: true
num_workers: 4
pin_memory: true
optim:
name: fused_adam
lr: 1e-4
weight_decay: 0.01
betas:
- 0.9
- 0.98
sched:
name: CosineAnnealing
warmup_steps: 50
constant_steps: 0
min_lr: 0.0
monitor: val_loss
reduce_on_plateau: false