NeMo / examples /nlp /language_modeling /conf /megatron_t5_prompt_learning.yaml

thanks to NVIDIA ❤

7934b29 about 2 years ago

3.52 kB

	name: p_tuning_squad_t5

	trainer:
	devices: 1
	accelerator: gpu
	num_nodes: 1
	precision: 16
	logger: False
	enable_checkpointing: False
	replace_sampler_ddp: False
	max_epochs: 10
	max_steps: -1
	log_every_n_steps: 10
	val_check_interval: 1.0
	gradient_clip_val: 1.0
	resume_from_checkpoint: null

	exp_manager:
	explicit_log_dir: null
	exp_dir: null
	name: ${name}
	create_wandb_logger: False
	wandb_logger_kwargs:
	project: PromptLearning-T5
	name: ${name}
	resume_if_exists: True
	resume_ignore_no_checkpoint: True
	create_checkpoint_callback: True
	checkpoint_callback_params:
	monitor: val_loss
	save_top_k: 2
	mode: min
	save_nemo_on_train_end: False # Should be false, correct prompt learning model file is saved at model.nemo_path set below
	filename: "megatron_t5_prompt_tune--{${exp_manager.checkpoint_callback_params.monitor}:.3f}-{step}"
	model_parallel_size: ${model.tensor_model_parallel_size}
	save_best_model: True
	create_early_stopping_callback: True
	early_stopping_callback_params:
	monitor: "val_loss"
	mode: "min"
	min_delta: 0.001
	patience: 10
	verbose: True

	model:
	seed: 1234
	nemo_path: ${name}.nemo # .nemo filename/absolute path to where the virtual prompt model parameters will be saved
	virtual_prompt_style: "p-tuning" # one of 'prompt-tuning', 'p-tuning', or 'inference'
	tensor_model_parallel_size: 1
	pipeline_model_parallel_size: 1
	global_batch_size: 8
	micro_batch_size: 8 # micro batch size should equal global batch size when pipeline parallel = 1
	validation_global_batch_size: ${model.global_batch_size}
	validation_micro_batch_size: ${model.micro_batch_size}
	validation_drop_last: False
	report_validation_accuracy: False

	restore_path: null # Path to an existing p-tuned/prompt tuned .nemo model you wish to add new tasks to or run inference with
	language_model_path: ??? # Path to the pretrained T5 language model .nemo file, always required
	save_nemo_on_validation_end: True # Saves an inference ready .nemo file every time a checkpoint is saved during training.
	existing_tasks: []
	new_tasks: ["squad"]


	task_templates:
	- taskname: "squad"
	prompt_template: "<\|VIRTUAL_PROMPT_0\|> {context} {question} {answer}"
	total_virtual_tokens: 100
	virtual_token_splits: [100]
	truncate_field: context
	answer_field: answer

	p_tuning: # P-tuning specific params
	encoder_type: "mlp" # Either "mlp" or "lstm", mlp is default
	num_layers: 2 # 2 recommended for MLP, 1 recommended for LSTM, must be at least 2 for mlp
	dropout: 0.0

	prompt_tuning: # Prompt tunin specific params
	new_prompt_init_methods: ['text'] # List of 'text' or 'random', should correspond to tasks listed in new tasks
	new_prompt_init_text: ['some init text goes here'] # some init text if init method is text, or None if init method is random

	data:
	train_ds: ["data/squad_train.jsonl"]
	validation_ds: ["data/squad_val.jsonl"]
	add_eos: true
	add_bos: false
	decoder_starts_with_pad: False
	add_eos_to_decoder_output: True
	add_sentinel_to_input: True
	ul2_prompt_token: null # <extra_id_s>, <extra_id_r>, <extra_id_x>
	shuffle: true
	num_workers: 4
	pin_memory: true

	optim:
	name: fused_adam
	lr: 1e-4
	weight_decay: 0.01
	betas:
	- 0.9
	- 0.98
	sched:
	name: CosineAnnealing
	warmup_steps: 50
	constant_steps: 0
	min_lr: 0.0
	monitor: val_loss
	reduce_on_plateau: false