jordyvl's picture
Saving best model to hub
7f559cd
{
"EE_config": {
"alpha": 0.5,
"apply_ocr": true,
"batch_size": 8,
"benchmark_OCR": false,
"checkpoint": "",
"data_parallel": false,
"dataset": "maveriq/tobacco3482",
"device": "cuda",
"downsampling": 0,
"encoder_layer_strategy": "ramp",
"epochs": 100,
"eval_batch_size": 1,
"eval_start": false,
"exit_head_num_layers": 2,
"exit_threshold": -1,
"exits": [
"text_avg",
"vision_avg",
1,
4,
8
],
"forward_signature": [
"pixel_values",
"head_mask",
"labels",
"output_attentions",
"output_hidden_states",
"return_dict"
],
"gamma": 0,
"get_raw_ocr_data": false,
"global_threshold": 1.000001,
"gradient_accumulation_steps": 1,
"inference_strategy": "max_confidence",
"lowercase": false,
"lr": 2e-05,
"model": "dit",
"model_weights": "microsoft/dit-base",
"optimizer": "AdamW",
"plot_exits": false,
"print_freq": 50,
"seed": 42,
"temperature": 1,
"test_dataset": "jordyvl/rvl_cdip_100_examples_per_class",
"training_strategy": "joint_weighted_avg",
"use_images": true,
"warmup_ratio": 0,
"weight_decay": 0
},
"_name_or_path": "microsoft/dit-base",
"architectures": [
"BeitForImageClassification"
],
"attention_probs_dropout_prob": 0.0,
"auxiliary_channels": 256,
"auxiliary_concat_input": false,
"auxiliary_loss_weight": 0.4,
"auxiliary_num_convs": 1,
"drop_path_rate": 0.1,
"hidden_act": "gelu",
"hidden_dropout_prob": 0.0,
"hidden_size": 768,
"id2label": {
"0": "ADVE",
"1": "Email",
"2": "Form",
"3": "Letter",
"4": "Memo",
"5": "News",
"6": "Note",
"7": "Report",
"8": "Resume",
"9": "Scientific"
},
"image_size": 224,
"initializer_range": 0.02,
"intermediate_size": 3072,
"label2id": {
"ADVE": 0,
"Email": 1,
"Form": 2,
"Letter": 3,
"Memo": 4,
"News": 5,
"Note": 6,
"Report": 7,
"Resume": 8,
"Scientific": 9
},
"layer_norm_eps": 1e-12,
"layer_scale_init_value": 0.1,
"model_type": "beit",
"num_attention_heads": 12,
"num_channels": 3,
"num_hidden_layers": 12,
"out_indices": [
3,
5,
7,
11
],
"patch_size": 16,
"pool_scales": [
1,
2,
3,
6
],
"problem_type": "single_label_classification",
"semantic_loss_ignore_index": 255,
"torch_dtype": "float32",
"transformers_version": "4.26.1",
"use_absolute_position_embeddings": true,
"use_auxiliary_head": true,
"use_mask_token": true,
"use_mean_pooling": true,
"use_relative_position_bias": false,
"use_shared_relative_position_bias": false,
"vocab_size": 8192
}