diff --git "a/training_logs" "b/training_logs" new file mode 100644--- /dev/null +++ "b/training_logs" @@ -0,0 +1,268 @@ +Namespace(tokenizer='pretrained_models/trimmed_longmbart', save_dir='finetuned_checkpoints/trimmed_longmbart_35k_apa', save_prefix='trial_1', resume_ckpt=None, pretrained_ckpt=None, from_pretrained='pretrained_models/trimmed_longmbart', num_sanity_val_steps=0, model_type='mbart', train_source='datasets/prepared_data/deplain_APA_train.compl', train_target='datasets/prepared_data/deplain_APA_train.simpl', dev_source='datasets/prepared_data/deplain_APA_dev.compl', dev_target='datasets/prepared_data/deplain_APA_dev.simpl', test_source='datasets/prepared_data/deplain_APA_test.compl', test_target='datasets/prepared_data/deplain_APA_test.simpl', train_jsons=None, dev_jsons=None, test_jsons=None, remove_xml_in_json=False, remove_linebreaks_in_json=False, src_lang='de_DE', tgt_lang='de_SI', tgt_tags_included=False, src_tags_included=False, remove_special_tokens_containing=None, max_output_len=1024, max_input_len=2048, batch_size=1, num_workers=20, grad_accum=1, accelerator='gpu', devices=[0], seed=222, attention_dropout=0.1, dropout=0.3, activation_dropout=0.0, label_smoothing=0.2, min_delta=0.0005, attention_mode='sliding_chunks', attention_window=512, global_attention_indices=[-1], lr=3e-05, check_val_every_n_epoch=5, val_check_interval=None, val_percent_check=1.0, train_percent_check=1.0, max_epochs=100, max_steps=-1, early_stopping_metric='rougeL', patience=10, lr_reduce_patience=8, lr_reduce_factor=0.5, lr_cooldown=0, disable_checkpointing=False, save_top_k=5, save_every_n_val_epochs=0, grad_ckpt=True, beam_size=4, test_percent_check=1.0, progress_bar_refresh_rate=10, disable_validation_bar=True, fp32=False, print_params=False, wandb=None, wandb_entity=None) + Training: 0it [00:00, ?it/s] Training: 0%| | 0/387 [00:00