bartdoc / config.json

Upload BartForConditionalGeneration

0967d0a verified 8 months ago

4.2 kB

	{
	"_name_or_path": "facebook/bart-base",
	"accelerator": null,
	"accumulate_grad_batches": 2,
	"activation_dropout": 0.1,
	"activation_function": "gelu",
	"add_bias_logits": false,
	"add_context": false,
	"add_final_layer_norm": false,
	"amp_backend": null,
	"amp_level": null,
	"architectures": [
	"BartForConditionalGeneration"
	],
	"attention_dropout": 0.1,
	"auto_lr_find": false,
	"auto_scale_batch_size": false,
	"auto_select_gpus": null,
	"batch_size": 8,
	"benchmark": null,
	"bos_token_id": 0,
	"check_val_every_n_epoch": 1,
	"checkpoint": null,
	"class_labels": {
	"delete": 3,
	"ignore": 0,
	"rephrase": 1,
	"split": 2
	},
	"classif_dropout": 0.1,
	"classifier_dropout": 0.0,
	"context_dir": null,
	"context_doc_id": null,
	"context_window": 13,
	"d_model": 768,
	"decoder_attention_heads": 12,
	"decoder_ffn_dim": 3072,
	"decoder_layerdrop": 0.0,
	"decoder_layers": 6,
	"decoder_start_token_id": 2,
	"default_root_dir": null,
	"detect_anomaly": false,
	"devices": "2",
	"dropout": 0.1,
	"early_stopping": true,
	"enable_checkpointing": true,
	"enable_model_summary": true,
	"enable_progress_bar": true,
	"encoder_attention_heads": 12,
	"encoder_ffn_dim": 3072,
	"encoder_layerdrop": 0.0,
	"encoder_layers": 6,
	"eos_token_id": 2,
	"eval_beams": null,
	"eval_max_length": null,
	"fast_dev_run": false,
	"forced_bos_token_id": 0,
	"forced_eos_token_id": 2,
	"gpus": null,
	"gradient_checkpointing": false,
	"gradient_clip_algorithm": null,
	"gradient_clip_val": null,
	"id2label": {
	"0": "LABEL_0",
	"1": "LABEL_1",
	"2": "LABEL_2"
	},
	"inference_mode": true,
	"init_std": 0.02,
	"ipus": null,
	"is_encoder_decoder": true,
	"label2id": {
	"LABEL_0": 0,
	"LABEL_1": 1,
	"LABEL_2": 2
	},
	"limit_predict_batches": null,
	"limit_test_batches": null,
	"limit_train_batches": null,
	"limit_val_batches": null,
	"log_every_n_steps": 50,
	"logger": true,
	"longformer": false,
	"lr": 2e-05,
	"lr_scheduler": false,
	"max_epochs": null,
	"max_length": 1024,
	"max_position_embeddings": 1024,
	"max_samples": -1,
	"max_steps": -1,
	"max_time": null,
	"min_epochs": null,
	"min_steps": null,
	"model_type": "bart",
	"move_metrics_to_cpu": false,
	"multiple_trainloader_mode": "max_size_cycle",
	"name": "bart-sent",
	"no_repeat_ngram_size": 3,
	"normalize_before": false,
	"normalize_embedding": true,
	"num_beams": 4,
	"num_hidden_layers": 6,
	"num_nodes": 1,
	"num_processes": null,
	"num_sanity_val_steps": 2,
	"op_col": null,
	"op_tokens": [
	"<COPY>",
	"<REPHRASE>",
	"<SPLIT>",
	"<DELETE>"
	],
	"overfit_batches": 0.0,
	"pad_token_id": 1,
	"plan_col": "labels",
	"plan_loss": null,
	"plan_prefix": false,
	"plan_sep": false,
	"plugins": null,
	"precision": 32,
	"prefix_only": false,
	"profiler": null,
	"project": "simplification_models",
	"reading_lvl": null,
	"reload_dataloaders_every_n_epochs": 0,
	"replace_sampler_ddp": true,
	"resume_from_checkpoint": null,
	"save_dir": null,
	"scale_embedding": false,
	"sent_level": false,
	"simple_context_dir": null,
	"simple_context_doc_id": null,
	"skip_val_gen": true,
	"strategy": null,
	"sync_batchnorm": false,
	"task_specific_params": {
	"summarization": {
	"length_penalty": 1.0,
	"max_length": 128,
	"min_length": 12,
	"num_beams": 4
	},
	"summarization_cnn": {
	"length_penalty": 2.0,
	"max_length": 142,
	"min_length": 56,
	"num_beams": 4
	},
	"summarization_xsum": {
	"length_penalty": 1.0,
	"max_length": 62,
	"min_length": 11,
	"num_beams": 6
	}
	},
	"torch_dtype": "float32",
	"tpu_cores": null,
	"track_grad_norm": -1,
	"train_check_interval": 0.01,
	"train_data_dir": null,
	"train_file": "data/wikiauto_docs_train.csv",
	"train_split": 0.9,
	"train_workers": 32,
	"transformers_version": "4.29.1",
	"use_cache": true,
	"val_check_interval": null,
	"val_file": "data/wikiauto_docs_valid.csv",
	"val_metric": "bleu",
	"val_split": 0.05,
	"val_workers": 8,
	"valid_data_dir": null,
	"vocab_size": 50274,
	"wandb_id": null,
	"x_col": "complex",
	"y_col": "simple"
	}