deanna-emery's picture
updates
93528c6
task:
init_checkpoint: ''
model:
cls_heads: [{activation: tanh, cls_token_idx: 0, dropout_rate: 0.1, inner_dim: 768, name: next_sentence, num_classes: 2}]
train_data:
drop_remainder: true
global_batch_size: 512
is_training: true
max_predictions_per_seq: 76
seq_length: 512
use_next_sentence_label: false
use_whole_word_masking: true
tfds_name: wikipedia/20201201.en
tfds_split: train
vocab_file_path: 'Please provide the vocab file path.'
validation_data:
drop_remainder: true
global_batch_size: 32
is_training: false
max_predictions_per_seq: 76
seq_length: 512
use_next_sentence_label: false
use_whole_word_masking: true
tfds_name: wikipedia/20201201.en
tfds_split: train
vocab_file_path: 'Please provide the vocab file path.'
trainer:
checkpoint_interval: 20000
max_to_keep: 5
optimizer_config:
learning_rate:
polynomial:
cycle: false
decay_steps: 1000000
end_learning_rate: 0.0
initial_learning_rate: 0.0001
power: 1.0
type: polynomial
optimizer:
type: adamw
warmup:
polynomial:
power: 1
warmup_steps: 10000
type: polynomial
steps_per_loop: 1000
summary_interval: 1000
train_steps: 1000000
validation_interval: 1000
validation_steps: 64