to-be commited on
Commit
404dbc5
1 Parent(s): 45b4d9e

Training in progress, epoch 0

Browse files
Files changed (3) hide show
  1. config.json +61 -75
  2. generation_config.json +5 -7
  3. pytorch_model.bin +2 -2
config.json CHANGED
@@ -1,83 +1,85 @@
1
  {
2
- "_commit_hash": "a959cf33c20e09215873e338299c900f57047c61",
3
- "_name_or_path": "naver-clova-ix/donut-base",
4
  "architectures": [
5
- "VisionEncoderDecoderModel"
6
  ],
7
- "decoder": {
 
 
 
 
 
 
 
 
8
  "_name_or_path": "",
9
- "activation_dropout": 0.0,
10
- "activation_function": "gelu",
11
- "add_cross_attention": true,
12
- "add_final_layer_norm": true,
13
  "architectures": null,
14
- "attention_dropout": 0.0,
15
  "bad_words_ids": null,
16
  "begin_suppress_tokens": null,
17
- "bos_token_id": 0,
18
  "chunk_size_feed_forward": 0,
19
- "classifier_dropout": 0.0,
20
  "cross_attention_hidden_size": null,
21
- "d_model": 1024,
22
- "decoder_attention_heads": 16,
23
- "decoder_ffn_dim": 4096,
24
- "decoder_layerdrop": 0.0,
25
- "decoder_layers": 4,
26
- "decoder_start_token_id": null,
27
  "diversity_penalty": 0.0,
28
  "do_sample": false,
29
- "dropout": 0.1,
30
  "early_stopping": false,
31
- "encoder_attention_heads": 16,
32
- "encoder_ffn_dim": 4096,
33
- "encoder_layerdrop": 0.0,
34
- "encoder_layers": 12,
35
  "encoder_no_repeat_ngram_size": 0,
36
- "eos_token_id": 2,
37
  "exponential_decay_length_penalty": null,
38
  "finetuning_task": null,
39
  "forced_bos_token_id": null,
40
- "forced_eos_token_id": 2,
 
41
  "id2label": {
42
  "0": "LABEL_0",
43
  "1": "LABEL_1"
44
  },
45
- "init_std": 0.02,
 
46
  "is_decoder": true,
47
  "is_encoder_decoder": false,
48
  "label2id": {
49
  "LABEL_0": 0,
50
  "LABEL_1": 1
51
  },
 
52
  "length_penalty": 1.0,
53
- "max_length": 768,
54
- "max_position_embeddings": 1536,
55
  "min_length": 0,
56
- "model_type": "mbart",
57
  "no_repeat_ngram_size": 0,
58
  "num_beam_groups": 1,
59
  "num_beams": 1,
60
- "num_hidden_layers": 12,
 
61
  "num_return_sequences": 1,
62
  "output_attentions": false,
63
  "output_hidden_states": false,
64
  "output_scores": false,
65
- "pad_token_id": 1,
66
  "prefix": null,
67
  "problem_type": null,
68
  "pruned_heads": {},
 
 
69
  "remove_invalid_values": false,
70
  "repetition_penalty": 1.0,
71
  "return_dict": true,
72
  "return_dict_in_generate": false,
73
- "scale_embedding": true,
74
  "sep_token_id": null,
75
  "suppress_tokens": null,
76
  "task_specific_params": null,
77
  "temperature": 1.0,
78
  "tf_legacy_loss": false,
79
  "tie_encoder_decoder": false,
80
- "tie_word_embeddings": true,
81
  "tokenizer_class": null,
82
  "top_k": 50,
83
  "top_p": 1.0,
@@ -86,49 +88,43 @@
86
  "transformers_version": "4.31.0.dev0",
87
  "typical_p": 1.0,
88
  "use_bfloat16": false,
89
- "use_cache": true,
90
- "vocab_size": 57544
91
  },
92
- "decoder_start_token_id": 57543,
93
- "encoder": {
 
 
94
  "_name_or_path": "",
95
  "add_cross_attention": false,
96
  "architectures": null,
97
- "attention_probs_dropout_prob": 0.0,
98
  "bad_words_ids": null,
99
  "begin_suppress_tokens": null,
100
  "bos_token_id": null,
101
  "chunk_size_feed_forward": 0,
102
  "cross_attention_hidden_size": null,
 
 
103
  "decoder_start_token_id": null,
104
- "depths": [
105
- 2,
106
- 2,
107
- 14,
108
- 2
109
- ],
110
  "diversity_penalty": 0.0,
111
  "do_sample": false,
112
- "drop_path_rate": 0.1,
113
  "early_stopping": false,
114
- "embed_dim": 128,
115
  "encoder_no_repeat_ngram_size": 0,
116
  "eos_token_id": null,
117
  "exponential_decay_length_penalty": null,
118
  "finetuning_task": null,
119
  "forced_bos_token_id": null,
120
  "forced_eos_token_id": null,
121
- "hidden_act": "gelu",
122
- "hidden_dropout_prob": 0.0,
123
- "hidden_size": 1024,
124
  "id2label": {
125
  "0": "LABEL_0",
126
  "1": "LABEL_1"
127
  },
128
- "image_size": [
129
- 1920,
130
- 1280
131
- ],
132
  "initializer_range": 0.02,
133
  "is_decoder": false,
134
  "is_encoder_decoder": false,
@@ -136,39 +132,37 @@
136
  "LABEL_0": 0,
137
  "LABEL_1": 1
138
  },
139
- "layer_norm_eps": 1e-05,
 
140
  "length_penalty": 1.0,
141
  "max_length": 20,
142
  "min_length": 0,
143
- "mlp_ratio": 4.0,
144
- "model_type": "donut-swin",
145
  "no_repeat_ngram_size": 0,
 
146
  "num_beam_groups": 1,
147
  "num_beams": 1,
148
  "num_channels": 3,
149
- "num_heads": [
150
- 4,
151
- 8,
152
- 16,
153
- 32
154
- ],
155
- "num_layers": 4,
156
  "num_return_sequences": 1,
157
  "output_attentions": false,
158
  "output_hidden_states": false,
159
  "output_scores": false,
160
  "pad_token_id": null,
161
- "patch_size": 4,
162
- "path_norm": true,
163
  "prefix": null,
164
  "problem_type": null,
 
165
  "pruned_heads": {},
166
- "qkv_bias": true,
 
167
  "remove_invalid_values": false,
168
  "repetition_penalty": 1.0,
169
  "return_dict": true,
170
  "return_dict_in_generate": false,
171
  "sep_token_id": null,
 
172
  "suppress_tokens": null,
173
  "task_specific_params": null,
174
  "temperature": 1.0,
@@ -182,14 +176,6 @@
182
  "torchscript": false,
183
  "transformers_version": "4.31.0.dev0",
184
  "typical_p": 1.0,
185
- "use_absolute_embeddings": false,
186
- "use_bfloat16": false,
187
- "window_size": 10
188
- },
189
- "is_encoder_decoder": true,
190
- "model_type": "vision-encoder-decoder",
191
- "pad_token_id": 1,
192
- "tie_word_embeddings": false,
193
- "torch_dtype": "float32",
194
- "transformers_version": null
195
  }
 
1
  {
2
+ "_commit_hash": "956e2761626d0c37672c7741204a105ecd389245",
3
+ "_name_or_path": "google/pix2struct-base",
4
  "architectures": [
5
+ "Pix2StructForConditionalGeneration"
6
  ],
7
+ "decoder_start_token_id": 0,
8
+ "eos_token_id": 1,
9
+ "initializer_factor": 1.0,
10
+ "initializer_range": 0.02,
11
+ "is_encoder_decoder": true,
12
+ "is_vqa": false,
13
+ "model_type": "pix2struct",
14
+ "pad_token_id": 0,
15
+ "text_config": {
16
  "_name_or_path": "",
17
+ "add_cross_attention": false,
 
 
 
18
  "architectures": null,
 
19
  "bad_words_ids": null,
20
  "begin_suppress_tokens": null,
21
+ "bos_token_id": null,
22
  "chunk_size_feed_forward": 0,
 
23
  "cross_attention_hidden_size": null,
24
+ "d_ff": 2048,
25
+ "d_kv": 64,
26
+ "decoder_start_token_id": 0,
27
+ "dense_act_fn": "gelu_new",
 
 
28
  "diversity_penalty": 0.0,
29
  "do_sample": false,
30
+ "dropout_rate": 0.2,
31
  "early_stopping": false,
32
+ "encoder_hidden_size": 768,
 
 
 
33
  "encoder_no_repeat_ngram_size": 0,
34
+ "eos_token_id": 1,
35
  "exponential_decay_length_penalty": null,
36
  "finetuning_task": null,
37
  "forced_bos_token_id": null,
38
+ "forced_eos_token_id": null,
39
+ "hidden_size": 768,
40
  "id2label": {
41
  "0": "LABEL_0",
42
  "1": "LABEL_1"
43
  },
44
+ "initializer_factor": 1.0,
45
+ "initializer_range": 0.02,
46
  "is_decoder": true,
47
  "is_encoder_decoder": false,
48
  "label2id": {
49
  "LABEL_0": 0,
50
  "LABEL_1": 1
51
  },
52
+ "layer_norm_epsilon": 1e-06,
53
  "length_penalty": 1.0,
54
+ "max_length": 20,
 
55
  "min_length": 0,
56
+ "model_type": "pix2struct_text_model",
57
  "no_repeat_ngram_size": 0,
58
  "num_beam_groups": 1,
59
  "num_beams": 1,
60
+ "num_heads": 12,
61
+ "num_layers": 12,
62
  "num_return_sequences": 1,
63
  "output_attentions": false,
64
  "output_hidden_states": false,
65
  "output_scores": false,
66
+ "pad_token_id": 0,
67
  "prefix": null,
68
  "problem_type": null,
69
  "pruned_heads": {},
70
+ "relative_attention_max_distance": 128,
71
+ "relative_attention_num_buckets": 32,
72
  "remove_invalid_values": false,
73
  "repetition_penalty": 1.0,
74
  "return_dict": true,
75
  "return_dict_in_generate": false,
 
76
  "sep_token_id": null,
77
  "suppress_tokens": null,
78
  "task_specific_params": null,
79
  "temperature": 1.0,
80
  "tf_legacy_loss": false,
81
  "tie_encoder_decoder": false,
82
+ "tie_word_embeddings": false,
83
  "tokenizer_class": null,
84
  "top_k": 50,
85
  "top_p": 1.0,
 
88
  "transformers_version": "4.31.0.dev0",
89
  "typical_p": 1.0,
90
  "use_bfloat16": false,
91
+ "use_cache": false,
92
+ "vocab_size": 50362
93
  },
94
+ "tie_word_embeddings": false,
95
+ "torch_dtype": "float32",
96
+ "transformers_version": null,
97
+ "vision_config": {
98
  "_name_or_path": "",
99
  "add_cross_attention": false,
100
  "architectures": null,
101
+ "attention_dropout": 0.2,
102
  "bad_words_ids": null,
103
  "begin_suppress_tokens": null,
104
  "bos_token_id": null,
105
  "chunk_size_feed_forward": 0,
106
  "cross_attention_hidden_size": null,
107
+ "d_ff": 2048,
108
+ "d_kv": 64,
109
  "decoder_start_token_id": null,
110
+ "dense_act_fn": "gelu_new",
 
 
 
 
 
111
  "diversity_penalty": 0.0,
112
  "do_sample": false,
113
+ "dropout_rate": 0.2,
114
  "early_stopping": false,
 
115
  "encoder_no_repeat_ngram_size": 0,
116
  "eos_token_id": null,
117
  "exponential_decay_length_penalty": null,
118
  "finetuning_task": null,
119
  "forced_bos_token_id": null,
120
  "forced_eos_token_id": null,
121
+ "hidden_dropout_prob": 0.2,
122
+ "hidden_size": 768,
 
123
  "id2label": {
124
  "0": "LABEL_0",
125
  "1": "LABEL_1"
126
  },
127
+ "initializer_factor": 1.0,
 
 
 
128
  "initializer_range": 0.02,
129
  "is_decoder": false,
130
  "is_encoder_decoder": false,
 
132
  "LABEL_0": 0,
133
  "LABEL_1": 1
134
  },
135
+ "layer_norm_bias": false,
136
+ "layer_norm_eps": 1e-06,
137
  "length_penalty": 1.0,
138
  "max_length": 20,
139
  "min_length": 0,
140
+ "model_type": "pix2struct_vision_model",
 
141
  "no_repeat_ngram_size": 0,
142
+ "num_attention_heads": 12,
143
  "num_beam_groups": 1,
144
  "num_beams": 1,
145
  "num_channels": 3,
146
+ "num_hidden_layers": 12,
 
 
 
 
 
 
147
  "num_return_sequences": 1,
148
  "output_attentions": false,
149
  "output_hidden_states": false,
150
  "output_scores": false,
151
  "pad_token_id": null,
152
+ "patch_embed_hidden_size": 768,
153
+ "patch_size": 16,
154
  "prefix": null,
155
  "problem_type": null,
156
+ "projection_dim": 768,
157
  "pruned_heads": {},
158
+ "relative_attention_max_distance": 128,
159
+ "relative_attention_num_buckets": 32,
160
  "remove_invalid_values": false,
161
  "repetition_penalty": 1.0,
162
  "return_dict": true,
163
  "return_dict_in_generate": false,
164
  "sep_token_id": null,
165
+ "seq_len": 4096,
166
  "suppress_tokens": null,
167
  "task_specific_params": null,
168
  "temperature": 1.0,
 
176
  "torchscript": false,
177
  "transformers_version": "4.31.0.dev0",
178
  "typical_p": 1.0,
179
+ "use_bfloat16": false
180
+ }
 
 
 
 
 
 
 
 
181
  }
generation_config.json CHANGED
@@ -1,10 +1,8 @@
1
  {
2
  "_from_model_config": true,
3
- "bos_token_id": 0,
4
- "decoder_start_token_id": 57543,
5
- "eos_token_id": 2,
6
- "forced_eos_token_id": 2,
7
- "max_length": 768,
8
- "pad_token_id": 1,
9
- "transformers_version": "4.31.0.dev0"
10
  }
 
1
  {
2
  "_from_model_config": true,
3
+ "decoder_start_token_id": 0,
4
+ "eos_token_id": 1,
5
+ "pad_token_id": 0,
6
+ "transformers_version": "4.31.0.dev0",
7
+ "use_cache": false
 
 
8
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:33a1ab266baa2e9888d655b955b32842a6184cb8fbd168d922e15f398d4c515d
3
- size 809256729
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8f1f4bd06e273460063a7aceb776a61db630fe051dcc8c64c41f19b0d028fdef
3
+ size 1129967041