t5v1_1-base-mnli / README.md
pietrolesci's picture
Create README.md
1cc5664
## Overview
T5-Base v1.1 model trained to generate hypotheses given a premise and a label. Below the settings used to train it
```yaml
Experiment configurations
β”œβ”€β”€ datasets
β”‚ └── mnli_train:
β”‚ dataset_name: multi_nli
β”‚ dataset_config_name: null
β”‚ cache_dir: null
β”‚ input_fields:
β”‚ - premise
β”‚ - hypothesis
β”‚ target_field: label
β”‚ train_subset_names: null
β”‚ val_subset_names: validation_matched
β”‚ test_subset_names: none
β”‚ train_val_split: null
β”‚ limit_train_samples: null
β”‚ limit_val_samples: null
β”‚ limit_test_samples: null
β”‚ sampling_kwargs:
β”‚ sampling_strategy: random
β”‚ seed: 42
β”‚ replace: false
β”‚ align_labels_with_mapping: null
β”‚ avoid_consistency_check: false
β”‚ predict_label_mapping: null
β”‚ mnli:
β”‚ dataset_name: multi_nli
β”‚ dataset_config_name: null
β”‚ cache_dir: null
β”‚ input_fields:
β”‚ - premise
β”‚ - hypothesis
β”‚ target_field: label
β”‚ train_subset_names: none
β”‚ val_subset_names: none
β”‚ test_subset_names: validation_mismatched
β”‚ train_val_split: null
β”‚ limit_train_samples: null
β”‚ limit_val_samples: null
β”‚ limit_test_samples: null
β”‚ sampling_kwargs:
β”‚ sampling_strategy: random
β”‚ seed: 42
β”‚ replace: false
β”‚ align_labels_with_mapping: null
β”‚ avoid_consistency_check: false
β”‚ predict_label_mapping: null
β”‚
β”œβ”€β”€ data
β”‚ └── _target_: src.task.nli.data.NLIGenerationData.from_config
β”‚ main_dataset_name: null
β”‚ use_additional_as_test: null
β”‚ dataloader:
β”‚ batch_size: 64
β”‚ eval_batch_size: 100
β”‚ num_workers: 16
β”‚ pin_memory: true
β”‚ drop_last: false
β”‚ persistent_workers: false
β”‚ shuffle: true
β”‚ seed_dataloader: 42
β”‚ replacement: false
β”‚ processing:
β”‚ preprocessing_num_workers: 16
β”‚ preprocessing_batch_size: 1000
β”‚ load_from_cache_file: true
β”‚ padding: longest
β”‚ truncation: longest_first
β”‚ max_source_length: 128
β”‚ max_target_length: 128
β”‚ template: 'premise: $premise $label hypothesis: '
β”‚ tokenizer:
β”‚ _target_: transformers.AutoTokenizer.from_pretrained
β”‚ pretrained_model_name_or_path: google/t5-v1_1-base
β”‚ use_fast: true
β”‚
β”œβ”€β”€ task
β”‚ └── optimizer:
β”‚ name: Adafactor
β”‚ lr: 0.001
β”‚ weight_decay: 0.0
β”‚ no_decay:
β”‚ - bias
β”‚ - LayerNorm.weight
β”‚ decay_rate: -0.8
β”‚ clip_threshold: 1.0
β”‚ relative_step: false
β”‚ scale_parameter: false
β”‚ warmup_init: false
β”‚ scheduler:
β”‚ name: constant_schedule
β”‚ model:
β”‚ model_name_or_path: google/t5-v1_1-base
β”‚ checkpoint_path: null
β”‚ freeze: false
β”‚ seed_init_weight: 42
β”‚ _target_: src.task.nli.NLIGenerationTask.from_config
β”‚ generation:
β”‚ max_length: 128
β”‚ min_length: 3
β”‚ do_sample: true
β”‚ early_stopping: false
β”‚ num_beams: 1
β”‚ temperature: 1.0
β”‚ top_k: 50
β”‚ top_p: 0.95
β”‚ repetition_penalty: null
β”‚ length_penalty: null
β”‚ no_repeat_ngram_size: null
β”‚ encoder_no_repeat_ngram_size: null
β”‚ num_return_sequences: 1
β”‚ max_time: null
β”‚ max_new_tokens: null
β”‚ decoder_start_token_id: null
β”‚ use_cache: null
β”‚ num_beam_groups: null
β”‚ diversity_penalty: null
β”‚
β”œβ”€β”€ trainer
β”‚ └── _target_: pytorch_lightning.Trainer
β”‚ callbacks:
β”‚ lr_monitor:
β”‚ _target_: pytorch_lightning.callbacks.LearningRateMonitor
β”‚ logging_interval: step
β”‚ log_momentum: false
β”‚ model_checkpoint:
β”‚ _target_: pytorch_lightning.callbacks.ModelCheckpoint
β”‚ dirpath: ./checkpoints/
β”‚ filename: nli_generator_mnli-epoch={epoch:02d}-val_loss={val/aggregated_loss:.2f}
β”‚ monitor: val/aggregated_loss
β”‚ mode: min
β”‚ verbose: false
β”‚ save_last: true
β”‚ save_top_k: 1
β”‚ auto_insert_metric_name: false
β”‚ save_on_train_epoch_end: false
β”‚ rich_model_summary:
β”‚ _target_: pytorch_lightning.callbacks.RichModelSummary
β”‚ max_depth: 1
β”‚ log_grad_norm:
β”‚ _target_: src.core.callbacks.LogGradNorm
β”‚ norm_type: 2
β”‚ group_separator: /
β”‚ only_total: true
β”‚ on_step: true
β”‚ on_epoch: false
β”‚ prog_bar: true
β”‚ log_generated_text:
β”‚ _target_: src.core.callbacks.GenerateAndLogText
β”‚ dirpath: ./generated_text
β”‚ type: generated_text
β”‚ pop_keys_after_logging: true
β”‚ on_train: false
β”‚ on_validation: false
β”‚ on_test: true
β”‚ log_to_wandb: true
β”‚ wandb_log_dataset_sizes:
β”‚ _target_: src.core.callbacks.WandbLogDatasetSizes
β”‚ logger:
β”‚ wandb:
β”‚ _target_: pytorch_lightning.loggers.WandbLogger
β”‚ project: nli_debiasing
β”‚ entity: team_brushino
β”‚ name: nli_generator_mnli
β”‚ save_dir: ./
β”‚ offline: false
β”‚ log_model: false
β”‚ group: mnli
β”‚ job_type: generator
β”‚ tags:
β”‚ - nli_generator_mnli
β”‚ - seed=42
β”‚ - seed_dataloader=42
β”‚ notes: nli_generator_mnli_time=02-24-53
β”‚ enable_checkpointing: true
β”‚ enable_progress_bar: true
β”‚ enable_model_summary: true
β”‚ gradient_clip_val: 0.0
β”‚ gradient_clip_algorithm: null
β”‚ accelerator: gpu
β”‚ devices: auto
β”‚ gpus: null
β”‚ auto_select_gpus: true
β”‚ accumulate_grad_batches: 1
β”‚ max_epochs: 3
β”‚ min_epochs: 1
β”‚ max_steps: -1
β”‚ min_steps: null
β”‚ max_time: null
β”‚ num_sanity_val_steps: 2
β”‚ overfit_batches: 0.0
β”‚ fast_dev_run: false
β”‚ limit_train_batches: 1.0
β”‚ limit_val_batches: 1.0
β”‚ limit_test_batches: 1.0
β”‚ profiler: null
β”‚ detect_anomaly: false
β”‚ deterministic: false
β”‚ check_val_every_n_epoch: 1
β”‚ val_check_interval: 0.1
β”‚ log_every_n_steps: 10
β”‚ move_metrics_to_cpu: false
β”‚
└── training
└── run_val_before_fit: false
run_val_after_fit: false
run_test_before_fit: false
run_test_after_fit: true
lr: 0.001
seed: 42
show_batch: false
batch_size: 64
eval_batch_size: 100
num_workers: 16
pin_memory: true
drop_last: false
persistent_workers: false
shuffle: true
seed_dataloader: 42
ignore_warnings: true
experiment_name: nli_generator_mnli
```