t5v1_1-base-mnli / README.md

Create README.md

1cc5664 about 2 years ago

No virus

26.1 kB

	## Overview
	T5-Base v1.1 model trained to generate hypotheses given a premise and a label. Below the settings used to train it

	```yaml

	Experiment configurations
	├── datasets
	│ └── mnli_train:
	│ dataset_name: multi_nli
	│ dataset_config_name: null
	│ cache_dir: null
	│ input_fields:
	│ - premise
	│ - hypothesis
	│ target_field: label
	│ train_subset_names: null
	│ val_subset_names: validation_matched
	│ test_subset_names: none
	│ train_val_split: null
	│ limit_train_samples: null
	│ limit_val_samples: null
	│ limit_test_samples: null
	│ sampling_kwargs:
	│ sampling_strategy: random
	│ seed: 42
	│ replace: false
	│ align_labels_with_mapping: null
	│ avoid_consistency_check: false
	│ predict_label_mapping: null
	│ mnli:
	│ dataset_name: multi_nli
	│ dataset_config_name: null
	│ cache_dir: null
	│ input_fields:
	│ - premise
	│ - hypothesis
	│ target_field: label
	│ train_subset_names: none
	│ val_subset_names: none
	│ test_subset_names: validation_mismatched
	│ train_val_split: null
	│ limit_train_samples: null
	│ limit_val_samples: null
	│ limit_test_samples: null
	│ sampling_kwargs:
	│ sampling_strategy: random
	│ seed: 42
	│ replace: false
	│ align_labels_with_mapping: null
	│ avoid_consistency_check: false
	│ predict_label_mapping: null
	│
	├── data
	│ └── _target_: src.task.nli.data.NLIGenerationData.from_config
	│ main_dataset_name: null
	│ use_additional_as_test: null
	│ dataloader:
	│ batch_size: 64
	│ eval_batch_size: 100
	│ num_workers: 16
	│ pin_memory: true
	│ drop_last: false
	│ persistent_workers: false
	│ shuffle: true
	│ seed_dataloader: 42
	│ replacement: false
	│ processing:
	│ preprocessing_num_workers: 16
	│ preprocessing_batch_size: 1000
	│ load_from_cache_file: true
	│ padding: longest
	│ truncation: longest_first
	│ max_source_length: 128
	│ max_target_length: 128
	│ template: 'premise: $premise $label hypothesis: '
	│ tokenizer:
	│ _target_: transformers.AutoTokenizer.from_pretrained
	│ pretrained_model_name_or_path: google/t5-v1_1-base
	│ use_fast: true
	│
	├── task
	│ └── optimizer:
	│ name: Adafactor
	│ lr: 0.001
	│ weight_decay: 0.0
	│ no_decay:
	│ - bias
	│ - LayerNorm.weight
	│ decay_rate: -0.8
	│ clip_threshold: 1.0
	│ relative_step: false
	│ scale_parameter: false
	│ warmup_init: false
	│ scheduler:
	│ name: constant_schedule
	│ model:
	│ model_name_or_path: google/t5-v1_1-base
	│ checkpoint_path: null
	│ freeze: false
	│ seed_init_weight: 42
	│ _target_: src.task.nli.NLIGenerationTask.from_config
	│ generation:
	│ max_length: 128
	│ min_length: 3
	│ do_sample: true
	│ early_stopping: false
	│ num_beams: 1
	│ temperature: 1.0
	│ top_k: 50
	│ top_p: 0.95
	│ repetition_penalty: null
	│ length_penalty: null
	│ no_repeat_ngram_size: null
	│ encoder_no_repeat_ngram_size: null
	│ num_return_sequences: 1
	│ max_time: null
	│ max_new_tokens: null
	│ decoder_start_token_id: null
	│ use_cache: null
	│ num_beam_groups: null
	│ diversity_penalty: null
	│
	├── trainer
	│ └── _target_: pytorch_lightning.Trainer
	│ callbacks:
	│ lr_monitor:
	│ _target_: pytorch_lightning.callbacks.LearningRateMonitor
	│ logging_interval: step
	│ log_momentum: false
	│ model_checkpoint:
	│ _target_: pytorch_lightning.callbacks.ModelCheckpoint
	│ dirpath: ./checkpoints/
	│ filename: nli_generator_mnli-epoch={epoch:02d}-val_loss={val/aggregated_loss:.2f}
	│ monitor: val/aggregated_loss
	│ mode: min
	│ verbose: false
	│ save_last: true
	│ save_top_k: 1
	│ auto_insert_metric_name: false
	│ save_on_train_epoch_end: false
	│ rich_model_summary:
	│ _target_: pytorch_lightning.callbacks.RichModelSummary
	│ max_depth: 1
	│ log_grad_norm:
	│ _target_: src.core.callbacks.LogGradNorm
	│ norm_type: 2
	│ group_separator: /
	│ only_total: true
	│ on_step: true
	│ on_epoch: false
	│ prog_bar: true
	│ log_generated_text:
	│ _target_: src.core.callbacks.GenerateAndLogText
	│ dirpath: ./generated_text
	│ type: generated_text
	│ pop_keys_after_logging: true
	│ on_train: false
	│ on_validation: false
	│ on_test: true
	│ log_to_wandb: true
	│ wandb_log_dataset_sizes:
	│ _target_: src.core.callbacks.WandbLogDatasetSizes
	│ logger:
	│ wandb:
	│ _target_: pytorch_lightning.loggers.WandbLogger
	│ project: nli_debiasing
	│ entity: team_brushino
	│ name: nli_generator_mnli
	│ save_dir: ./
	│ offline: false
	│ log_model: false
	│ group: mnli
	│ job_type: generator
	│ tags:
	│ - nli_generator_mnli
	│ - seed=42
	│ - seed_dataloader=42
	│ notes: nli_generator_mnli_time=02-24-53
	│ enable_checkpointing: true
	│ enable_progress_bar: true
	│ enable_model_summary: true
	│ gradient_clip_val: 0.0
	│ gradient_clip_algorithm: null
	│ accelerator: gpu
	│ devices: auto
	│ gpus: null
	│ auto_select_gpus: true
	│ accumulate_grad_batches: 1
	│ max_epochs: 3
	│ min_epochs: 1
	│ max_steps: -1
	│ min_steps: null
	│ max_time: null
	│ num_sanity_val_steps: 2
	│ overfit_batches: 0.0
	│ fast_dev_run: false
	│ limit_train_batches: 1.0
	│ limit_val_batches: 1.0
	│ limit_test_batches: 1.0
	│ profiler: null
	│ detect_anomaly: false
	│ deterministic: false
	│ check_val_every_n_epoch: 1
	│ val_check_interval: 0.1
	│ log_every_n_steps: 10
	│ move_metrics_to_cpu: false
	│
	└── training
	└── run_val_before_fit: false
	run_val_after_fit: false
	run_test_before_fit: false
	run_test_after_fit: true
	lr: 0.001
	seed: 42
	show_batch: false
	batch_size: 64
	eval_batch_size: 100
	num_workers: 16
	pin_memory: true
	drop_last: false
	persistent_workers: false
	shuffle: true
	seed_dataloader: 42
	ignore_warnings: true
	experiment_name: nli_generator_mnli

	```