larabe commited on
Commit
2272d40
1 Parent(s): 0f67176

End of training

Browse files
README.md ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: mit
3
+ tags:
4
+ - generated_from_trainer
5
+ model-index:
6
+ - name: combo1
7
+ results: []
8
+ ---
9
+
10
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
11
+ should probably proofread and complete it, then remove this comment. -->
12
+
13
+ # combo1
14
+
15
+ This model is a fine-tuned version of [naver-clova-ix/donut-base](https://huggingface.co/naver-clova-ix/donut-base) on the None dataset.
16
+
17
+ ## Model description
18
+
19
+ More information needed
20
+
21
+ ## Intended uses & limitations
22
+
23
+ More information needed
24
+
25
+ ## Training and evaluation data
26
+
27
+ More information needed
28
+
29
+ ## Training procedure
30
+
31
+ ### Training hyperparameters
32
+
33
+ The following hyperparameters were used during training:
34
+ - learning_rate: 0.0002
35
+ - train_batch_size: 4
36
+ - eval_batch_size: 8
37
+ - seed: 42
38
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
39
+ - lr_scheduler_type: linear
40
+ - num_epochs: 15
41
+
42
+ ### Training results
43
+
44
+
45
+
46
+ ### Framework versions
47
+
48
+ - Transformers 4.30.1
49
+ - Pytorch 2.0.1+cu117
50
+ - Datasets 2.14.5
51
+ - Tokenizers 0.13.3
added_tokens.json ADDED
@@ -0,0 +1,123 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "</s_None>": 57634,
3
+ "</s_address>": 57642,
4
+ "</s_client>": 57632,
5
+ "</s_client_tax_id>": 57630,
6
+ "</s_company>": 57640,
7
+ "</s_date>": 57638,
8
+ "</s_h_bic>": 57582,
9
+ "</s_h_contactperson>": 57580,
10
+ "</s_h_creditorcity>": 57564,
11
+ "</s_h_creditorcountry>": 57562,
12
+ "</s_h_creditoriban>": 57560,
13
+ "</s_h_creditorname>": 57558,
14
+ "</s_h_creditorpobox>": 57594,
15
+ "</s_h_creditorpostcode>": 57572,
16
+ "</s_h_creditorsalestaxid>": 57556,
17
+ "</s_h_creditorsearch>": 57590,
18
+ "</s_h_creditorstreet>": 57570,
19
+ "</s_h_creditortaxid>": 57554,
20
+ "</s_h_currency1>": 57552,
21
+ "</s_h_deldate>": 57550,
22
+ "</s_h_deliverynotenumber>": 57586,
23
+ "</s_h_discount>": 57578,
24
+ "</s_h_documentdate>": 57548,
25
+ "</s_h_documenttype>": 57546,
26
+ "</s_h_extracharges>": 57544,
27
+ "</s_h_grossamount>": 57542,
28
+ "</s_h_invoicenumber>": 57540,
29
+ "</s_h_invoicerecipient>": 57538,
30
+ "</s_h_netamount1>": 57536,
31
+ "</s_h_ordernumber>": 57568,
32
+ "</s_h_paymentterms>": 57576,
33
+ "</s_h_receiptdate>": 57534,
34
+ "</s_h_recipientsalestaxid>": 57584,
35
+ "</s_h_taxamount1>": 57532,
36
+ "</s_h_taxamount2>": 57530,
37
+ "</s_h_taxbasisamount1>": 57566,
38
+ "</s_h_taxbasisamount2>": 57588,
39
+ "</s_h_taxbasisamount3>": 57592,
40
+ "</s_h_taxrate1>": 57528,
41
+ "</s_h_taxrate2>": 57526,
42
+ "</s_h_web>": 57574,
43
+ "</s_header>": 57618,
44
+ "</s_iban>": 57628,
45
+ "</s_invoice_date>": 57626,
46
+ "</s_invoice_no>": 57624,
47
+ "</s_item_desc>": 57616,
48
+ "</s_item_gross_worth>": 57614,
49
+ "</s_item_net_price>": 57612,
50
+ "</s_item_net_worth>": 57610,
51
+ "</s_item_qty>": 57608,
52
+ "</s_item_vat>": 57606,
53
+ "</s_items>": 57604,
54
+ "</s_seller>": 57622,
55
+ "</s_seller_tax_id>": 57620,
56
+ "</s_summary>": 57596,
57
+ "</s_total>": 57636,
58
+ "</s_total_gross_worth>": 57602,
59
+ "</s_total_net_worth>": 57600,
60
+ "</s_total_vat>": 57598,
61
+ "<s_None>": 57633,
62
+ "<s_address>": 57641,
63
+ "<s_client>": 57631,
64
+ "<s_client_tax_id>": 57629,
65
+ "<s_company>": 57639,
66
+ "<s_date>": 57637,
67
+ "<s_h_bic>": 57581,
68
+ "<s_h_contactperson>": 57579,
69
+ "<s_h_creditorcity>": 57563,
70
+ "<s_h_creditorcountry>": 57561,
71
+ "<s_h_creditoriban>": 57559,
72
+ "<s_h_creditorname>": 57557,
73
+ "<s_h_creditorpobox>": 57593,
74
+ "<s_h_creditorpostcode>": 57571,
75
+ "<s_h_creditorsalestaxid>": 57555,
76
+ "<s_h_creditorsearch>": 57589,
77
+ "<s_h_creditorstreet>": 57569,
78
+ "<s_h_creditortaxid>": 57553,
79
+ "<s_h_currency1>": 57551,
80
+ "<s_h_deldate>": 57549,
81
+ "<s_h_deliverynotenumber>": 57585,
82
+ "<s_h_discount>": 57577,
83
+ "<s_h_documentdate>": 57547,
84
+ "<s_h_documenttype>": 57545,
85
+ "<s_h_extracharges>": 57543,
86
+ "<s_h_grossamount>": 57541,
87
+ "<s_h_invoicenumber>": 57539,
88
+ "<s_h_invoicerecipient>": 57537,
89
+ "<s_h_netamount1>": 57535,
90
+ "<s_h_ordernumber>": 57567,
91
+ "<s_h_paymentterms>": 57575,
92
+ "<s_h_receiptdate>": 57533,
93
+ "<s_h_recipientsalestaxid>": 57583,
94
+ "<s_h_taxamount1>": 57531,
95
+ "<s_h_taxamount2>": 57529,
96
+ "<s_h_taxbasisamount1>": 57565,
97
+ "<s_h_taxbasisamount2>": 57587,
98
+ "<s_h_taxbasisamount3>": 57591,
99
+ "<s_h_taxrate1>": 57527,
100
+ "<s_h_taxrate2>": 57525,
101
+ "<s_h_web>": 57573,
102
+ "<s_header>": 57617,
103
+ "<s_iban>": 57627,
104
+ "<s_iitcdip>": 57523,
105
+ "<s_invoice_date>": 57625,
106
+ "<s_invoice_no>": 57623,
107
+ "<s_item_desc>": 57615,
108
+ "<s_item_gross_worth>": 57613,
109
+ "<s_item_net_price>": 57611,
110
+ "<s_item_net_worth>": 57609,
111
+ "<s_item_qty>": 57607,
112
+ "<s_item_vat>": 57605,
113
+ "<s_items>": 57603,
114
+ "<s_seller>": 57621,
115
+ "<s_seller_tax_id>": 57619,
116
+ "<s_summary>": 57595,
117
+ "<s_synthdog>": 57524,
118
+ "<s_total>": 57635,
119
+ "<s_total_gross_worth>": 57601,
120
+ "<s_total_net_worth>": 57599,
121
+ "<s_total_vat>": 57597,
122
+ "<sep/>": 57522
123
+ }
generation_config.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 0,
4
+ "eos_token_id": 2,
5
+ "forced_eos_token_id": 2,
6
+ "pad_token_id": 1,
7
+ "transformers_version": "4.30.1"
8
+ }
preprocessor_config.json ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "do_align_long_axis": false,
3
+ "do_normalize": true,
4
+ "do_pad": true,
5
+ "do_rescale": true,
6
+ "do_resize": true,
7
+ "do_thumbnail": true,
8
+ "image_mean": [
9
+ 0.5,
10
+ 0.5,
11
+ 0.5
12
+ ],
13
+ "image_processor_type": "DonutImageProcessor",
14
+ "image_std": [
15
+ 0.5,
16
+ 0.5,
17
+ 0.5
18
+ ],
19
+ "processor_class": "DonutProcessor",
20
+ "resample": 2,
21
+ "rescale_factor": 0.00392156862745098,
22
+ "size": [
23
+ 720,
24
+ 960
25
+ ]
26
+ }
runs/Sep20_14-29-19_ip-172-31-42-230.eu-central-1.compute.internal/events.out.tfevents.1695220165.ip-172-31-42-230.eu-central-1.compute.internal.16990.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2785a592ea0b1442cff437690ce2eefba326b3d99d57eee066dea7ea5108c644
3
- size 10432
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ad48f9ff959dae5dbec4b3d75faf70d0a73d3155a48d7a0af8b5b453c3575019
3
+ size 10786
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cb9e3dce4c326195d08fc3dd0f7e2eee1da8595c847bf4c1a9c78b7a82d47e2d
3
+ size 1296245
special_tokens_map.json ADDED
@@ -0,0 +1,137 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<s_h_taxrate2>",
4
+ "</s_h_taxrate2>",
5
+ "<s_h_taxrate1>",
6
+ "</s_h_taxrate1>",
7
+ "<s_h_taxamount2>",
8
+ "</s_h_taxamount2>",
9
+ "<s_h_taxamount1>",
10
+ "</s_h_taxamount1>",
11
+ "<s_h_receiptdate>",
12
+ "</s_h_receiptdate>",
13
+ "<s_h_netamount1>",
14
+ "</s_h_netamount1>",
15
+ "<s_h_invoicerecipient>",
16
+ "</s_h_invoicerecipient>",
17
+ "<s_h_invoicenumber>",
18
+ "</s_h_invoicenumber>",
19
+ "<s_h_grossamount>",
20
+ "</s_h_grossamount>",
21
+ "<s_h_extracharges>",
22
+ "</s_h_extracharges>",
23
+ "<s_h_documenttype>",
24
+ "</s_h_documenttype>",
25
+ "<s_h_documentdate>",
26
+ "</s_h_documentdate>",
27
+ "<s_h_deldate>",
28
+ "</s_h_deldate>",
29
+ "<s_h_currency1>",
30
+ "</s_h_currency1>",
31
+ "<s_h_creditortaxid>",
32
+ "</s_h_creditortaxid>",
33
+ "<s_h_creditorsalestaxid>",
34
+ "</s_h_creditorsalestaxid>",
35
+ "<s_h_creditorname>",
36
+ "</s_h_creditorname>",
37
+ "<s_h_creditoriban>",
38
+ "</s_h_creditoriban>",
39
+ "<s_h_creditorcountry>",
40
+ "</s_h_creditorcountry>",
41
+ "<s_h_creditorcity>",
42
+ "</s_h_creditorcity>",
43
+ "<s_h_taxbasisamount1>",
44
+ "</s_h_taxbasisamount1>",
45
+ "<s_h_ordernumber>",
46
+ "</s_h_ordernumber>",
47
+ "<s_h_creditorstreet>",
48
+ "</s_h_creditorstreet>",
49
+ "<s_h_creditorpostcode>",
50
+ "</s_h_creditorpostcode>",
51
+ "<s_h_web>",
52
+ "</s_h_web>",
53
+ "<s_h_paymentterms>",
54
+ "</s_h_paymentterms>",
55
+ "<s_h_discount>",
56
+ "</s_h_discount>",
57
+ "<s_h_contactperson>",
58
+ "</s_h_contactperson>",
59
+ "<s_h_bic>",
60
+ "</s_h_bic>",
61
+ "<s_h_recipientsalestaxid>",
62
+ "</s_h_recipientsalestaxid>",
63
+ "<s_h_deliverynotenumber>",
64
+ "</s_h_deliverynotenumber>",
65
+ "<s_h_taxbasisamount2>",
66
+ "</s_h_taxbasisamount2>",
67
+ "<s_h_creditorsearch>",
68
+ "</s_h_creditorsearch>",
69
+ "<s_h_taxbasisamount3>",
70
+ "</s_h_taxbasisamount3>",
71
+ "<s_h_creditorpobox>",
72
+ "</s_h_creditorpobox>",
73
+ "<s_summary>",
74
+ "</s_summary>",
75
+ "<s_total_vat>",
76
+ "</s_total_vat>",
77
+ "<s_total_net_worth>",
78
+ "</s_total_net_worth>",
79
+ "<s_total_gross_worth>",
80
+ "</s_total_gross_worth>",
81
+ "<s_items>",
82
+ "</s_items>",
83
+ "<s_item_vat>",
84
+ "</s_item_vat>",
85
+ "<s_item_qty>",
86
+ "</s_item_qty>",
87
+ "<s_item_net_worth>",
88
+ "</s_item_net_worth>",
89
+ "<s_item_net_price>",
90
+ "</s_item_net_price>",
91
+ "<s_item_gross_worth>",
92
+ "</s_item_gross_worth>",
93
+ "<s_item_desc>",
94
+ "</s_item_desc>",
95
+ "<s_header>",
96
+ "</s_header>",
97
+ "<s_seller_tax_id>",
98
+ "</s_seller_tax_id>",
99
+ "<s_seller>",
100
+ "</s_seller>",
101
+ "<s_invoice_no>",
102
+ "</s_invoice_no>",
103
+ "<s_invoice_date>",
104
+ "</s_invoice_date>",
105
+ "<s_iban>",
106
+ "</s_iban>",
107
+ "<s_client_tax_id>",
108
+ "</s_client_tax_id>",
109
+ "<s_client>",
110
+ "</s_client>",
111
+ "<s_None>",
112
+ "</s_None>",
113
+ "<s_total>",
114
+ "</s_total>",
115
+ "<s_date>",
116
+ "</s_date>",
117
+ "<s_company>",
118
+ "</s_company>",
119
+ "<s_address>",
120
+ "</s_address>",
121
+ "<s>",
122
+ "</s>"
123
+ ],
124
+ "bos_token": "<s>",
125
+ "cls_token": "<s>",
126
+ "eos_token": "</s>",
127
+ "mask_token": {
128
+ "content": "<mask>",
129
+ "lstrip": true,
130
+ "normalized": true,
131
+ "rstrip": false,
132
+ "single_word": false
133
+ },
134
+ "pad_token": "<pad>",
135
+ "sep_token": "</s>",
136
+ "unk_token": "<unk>"
137
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": "<s>",
3
+ "clean_up_tokenization_spaces": true,
4
+ "cls_token": "<s>",
5
+ "eos_token": "</s>",
6
+ "mask_token": {
7
+ "__type": "AddedToken",
8
+ "content": "<mask>",
9
+ "lstrip": true,
10
+ "normalized": true,
11
+ "rstrip": false,
12
+ "single_word": false
13
+ },
14
+ "model_max_length": 1000000000000000019884624838656,
15
+ "pad_token": "<pad>",
16
+ "processor_class": "DonutProcessor",
17
+ "sep_token": "</s>",
18
+ "sp_model_kwargs": {},
19
+ "tokenizer_class": "XLMRobertaTokenizer",
20
+ "unk_token": "<unk>"
21
+ }