|
## Overview |
|
T5-Base v1.1 model trained to generate hypotheses given a premise and a label. Below the settings used to train it |
|
|
|
```yaml |
|
|
|
Experiment configurations |
|
βββ datasets |
|
β βββ mnli_train: |
|
β dataset_name: multi_nli |
|
β dataset_config_name: null |
|
β cache_dir: null |
|
β input_fields: |
|
β - premise |
|
β - hypothesis |
|
β target_field: label |
|
β train_subset_names: null |
|
β val_subset_names: validation_matched |
|
β test_subset_names: none |
|
β train_val_split: null |
|
β limit_train_samples: null |
|
β limit_val_samples: null |
|
β limit_test_samples: null |
|
β sampling_kwargs: |
|
β sampling_strategy: random |
|
β seed: 42 |
|
β replace: false |
|
β align_labels_with_mapping: null |
|
β avoid_consistency_check: false |
|
β predict_label_mapping: null |
|
β mnli: |
|
β dataset_name: multi_nli |
|
β dataset_config_name: null |
|
β cache_dir: null |
|
β input_fields: |
|
β - premise |
|
β - hypothesis |
|
β target_field: label |
|
β train_subset_names: none |
|
β val_subset_names: none |
|
β test_subset_names: validation_mismatched |
|
β train_val_split: null |
|
β limit_train_samples: null |
|
β limit_val_samples: null |
|
β limit_test_samples: null |
|
β sampling_kwargs: |
|
β sampling_strategy: random |
|
β seed: 42 |
|
β replace: false |
|
β align_labels_with_mapping: null |
|
β avoid_consistency_check: false |
|
β predict_label_mapping: null |
|
β |
|
βββ data |
|
β βββ _target_: src.task.nli.data.NLIGenerationData.from_config |
|
β main_dataset_name: null |
|
β use_additional_as_test: null |
|
β dataloader: |
|
β batch_size: 64 |
|
β eval_batch_size: 100 |
|
β num_workers: 16 |
|
β pin_memory: true |
|
β drop_last: false |
|
β persistent_workers: false |
|
β shuffle: true |
|
β seed_dataloader: 42 |
|
β replacement: false |
|
β processing: |
|
β preprocessing_num_workers: 16 |
|
β preprocessing_batch_size: 1000 |
|
β load_from_cache_file: true |
|
β padding: longest |
|
β truncation: longest_first |
|
β max_source_length: 128 |
|
β max_target_length: 128 |
|
β template: 'premise: $premise $label hypothesis: ' |
|
β tokenizer: |
|
β _target_: transformers.AutoTokenizer.from_pretrained |
|
β pretrained_model_name_or_path: google/t5-v1_1-base |
|
β use_fast: true |
|
β |
|
βββ task |
|
β βββ optimizer: |
|
β name: Adafactor |
|
β lr: 0.001 |
|
β weight_decay: 0.0 |
|
β no_decay: |
|
β - bias |
|
β - LayerNorm.weight |
|
β decay_rate: -0.8 |
|
β clip_threshold: 1.0 |
|
β relative_step: false |
|
β scale_parameter: false |
|
β warmup_init: false |
|
β scheduler: |
|
β name: constant_schedule |
|
β model: |
|
β model_name_or_path: google/t5-v1_1-base |
|
β checkpoint_path: null |
|
β freeze: false |
|
β seed_init_weight: 42 |
|
β _target_: src.task.nli.NLIGenerationTask.from_config |
|
β generation: |
|
β max_length: 128 |
|
β min_length: 3 |
|
β do_sample: true |
|
β early_stopping: false |
|
β num_beams: 1 |
|
β temperature: 1.0 |
|
β top_k: 50 |
|
β top_p: 0.95 |
|
β repetition_penalty: null |
|
β length_penalty: null |
|
β no_repeat_ngram_size: null |
|
β encoder_no_repeat_ngram_size: null |
|
β num_return_sequences: 1 |
|
β max_time: null |
|
β max_new_tokens: null |
|
β decoder_start_token_id: null |
|
β use_cache: null |
|
β num_beam_groups: null |
|
β diversity_penalty: null |
|
β |
|
βββ trainer |
|
β βββ _target_: pytorch_lightning.Trainer |
|
β callbacks: |
|
β lr_monitor: |
|
β _target_: pytorch_lightning.callbacks.LearningRateMonitor |
|
β logging_interval: step |
|
β log_momentum: false |
|
β model_checkpoint: |
|
β _target_: pytorch_lightning.callbacks.ModelCheckpoint |
|
β dirpath: ./checkpoints/ |
|
β filename: nli_generator_mnli-epoch={epoch:02d}-val_loss={val/aggregated_loss:.2f} |
|
β monitor: val/aggregated_loss |
|
β mode: min |
|
β verbose: false |
|
β save_last: true |
|
β save_top_k: 1 |
|
β auto_insert_metric_name: false |
|
β save_on_train_epoch_end: false |
|
β rich_model_summary: |
|
β _target_: pytorch_lightning.callbacks.RichModelSummary |
|
β max_depth: 1 |
|
β log_grad_norm: |
|
β _target_: src.core.callbacks.LogGradNorm |
|
β norm_type: 2 |
|
β group_separator: / |
|
β only_total: true |
|
β on_step: true |
|
β on_epoch: false |
|
β prog_bar: true |
|
β log_generated_text: |
|
β _target_: src.core.callbacks.GenerateAndLogText |
|
β dirpath: ./generated_text |
|
β type: generated_text |
|
β pop_keys_after_logging: true |
|
β on_train: false |
|
β on_validation: false |
|
β on_test: true |
|
β log_to_wandb: true |
|
β wandb_log_dataset_sizes: |
|
β _target_: src.core.callbacks.WandbLogDatasetSizes |
|
β logger: |
|
β wandb: |
|
β _target_: pytorch_lightning.loggers.WandbLogger |
|
β project: nli_debiasing |
|
β entity: team_brushino |
|
β name: nli_generator_mnli |
|
β save_dir: ./ |
|
β offline: false |
|
β log_model: false |
|
β group: mnli |
|
β job_type: generator |
|
β tags: |
|
β - nli_generator_mnli |
|
β - seed=42 |
|
β - seed_dataloader=42 |
|
β notes: nli_generator_mnli_time=02-24-53 |
|
β enable_checkpointing: true |
|
β enable_progress_bar: true |
|
β enable_model_summary: true |
|
β gradient_clip_val: 0.0 |
|
β gradient_clip_algorithm: null |
|
β accelerator: gpu |
|
β devices: auto |
|
β gpus: null |
|
β auto_select_gpus: true |
|
β accumulate_grad_batches: 1 |
|
β max_epochs: 3 |
|
β min_epochs: 1 |
|
β max_steps: -1 |
|
β min_steps: null |
|
β max_time: null |
|
β num_sanity_val_steps: 2 |
|
β overfit_batches: 0.0 |
|
β fast_dev_run: false |
|
β limit_train_batches: 1.0 |
|
β limit_val_batches: 1.0 |
|
β limit_test_batches: 1.0 |
|
β profiler: null |
|
β detect_anomaly: false |
|
β deterministic: false |
|
β check_val_every_n_epoch: 1 |
|
β val_check_interval: 0.1 |
|
β log_every_n_steps: 10 |
|
β move_metrics_to_cpu: false |
|
β |
|
βββ training |
|
βββ run_val_before_fit: false |
|
run_val_after_fit: false |
|
run_test_before_fit: false |
|
run_test_after_fit: true |
|
lr: 0.001 |
|
seed: 42 |
|
show_batch: false |
|
batch_size: 64 |
|
eval_batch_size: 100 |
|
num_workers: 16 |
|
pin_memory: true |
|
drop_last: false |
|
persistent_workers: false |
|
shuffle: true |
|
seed_dataloader: 42 |
|
ignore_warnings: true |
|
experiment_name: nli_generator_mnli |
|
|
|
``` |