Jacques2207 commited on
Commit
aa8f6da
1 Parent(s): 3dec1d4

End of training

Browse files
README.md ADDED
@@ -0,0 +1,70 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: cc-by-nc-sa-4.0
3
+ tags:
4
+ - generated_from_trainer
5
+ model-index:
6
+ - name: layoutlmv3-base-ner
7
+ results: []
8
+ ---
9
+
10
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
11
+ should probably proofread and complete it, then remove this comment. -->
12
+
13
+ # layoutlmv3-base-ner
14
+
15
+ This model is a fine-tuned version of [microsoft/layoutlmv3-base](https://huggingface.co/microsoft/layoutlmv3-base) on an unknown dataset.
16
+ It achieves the following results on the evaluation set:
17
+ - Loss: nan
18
+ - Footer: {'precision': 0.9749447310243183, 'recall': 0.9792746113989638, 'f1': 0.9771048744460857, 'number': 1351}
19
+ - Header: {'precision': 0.927519818799547, 'recall': 0.9578947368421052, 'f1': 0.9424626006904488, 'number': 855}
20
+ - Able: {'precision': 0.7589285714285714, 'recall': 0.8531994981179423, 'f1': 0.8033077377436504, 'number': 797}
21
+ - Aption: {'precision': 0.6352785145888594, 'recall': 0.7496087636932708, 'f1': 0.687724335965542, 'number': 639}
22
+ - Ext: {'precision': 0.6819444444444445, 'recall': 0.7897064736630478, 'f1': 0.7318800074529532, 'number': 2487}
23
+ - Icture: {'precision': 0.772196261682243, 'recall': 0.8283208020050126, 'f1': 0.7992744860943168, 'number': 798}
24
+ - Itle: {'precision': 0.4519230769230769, 'recall': 0.415929203539823, 'f1': 0.43317972350230416, 'number': 113}
25
+ - Ootnote: {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 55}
26
+ - Ormula: {'precision': 0.38578680203045684, 'recall': 0.7307692307692307, 'f1': 0.5049833887043189, 'number': 104}
27
+ - Overall Precision: 0.7631
28
+ - Overall Recall: 0.8403
29
+ - Overall F1: 0.7998
30
+ - Overall Accuracy: 0.9572
31
+
32
+ ## Model description
33
+
34
+ More information needed
35
+
36
+ ## Intended uses & limitations
37
+
38
+ More information needed
39
+
40
+ ## Training and evaluation data
41
+
42
+ More information needed
43
+
44
+ ## Training procedure
45
+
46
+ ### Training hyperparameters
47
+
48
+ The following hyperparameters were used during training:
49
+ - learning_rate: 3e-05
50
+ - train_batch_size: 1
51
+ - eval_batch_size: 1
52
+ - seed: 42
53
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
54
+ - lr_scheduler_type: linear
55
+ - num_epochs: 2
56
+
57
+ ### Training results
58
+
59
+ | Training Loss | Epoch | Step | Validation Loss | Footer | Header | Able | Aption | Ext | Icture | Itle | Ootnote | Ormula | Overall Precision | Overall Recall | Overall F1 | Overall Accuracy |
60
+ |:-------------:|:-----:|:----:|:---------------:|:---------------------------------------------------------------------------------------------------------:|:-------------------------------------------------------------------------------------------------------:|:--------------------------------------------------------------------------------------------------------:|:--------------------------------------------------------------------------------------------------------:|:---------------------------------------------------------------------------------------------------------:|:--------------------------------------------------------------------------------------------------------:|:-----------------------------------------------------------------------------------------------------------:|:----------------------------------------------------------:|:----------------------------------------------------------------------------------------------------------:|:-----------------:|:--------------:|:----------:|:----------------:|
61
+ | 0.6151 | 1.0 | 4900 | nan | {'precision': 0.9154334038054969, 'recall': 0.9615099925980755, 'f1': 0.9379061371841154, 'number': 1351} | {'precision': 0.8517316017316018, 'recall': 0.92046783625731, 'f1': 0.8847667228780213, 'number': 855} | {'precision': 0.5285592497868713, 'recall': 0.7779171894604768, 'f1': 0.6294416243654822, 'number': 797} | {'precision': 0.3216326530612245, 'recall': 0.6165884194053208, 'f1': 0.4227467811158798, 'number': 639} | {'precision': 0.4335355763927192, 'recall': 0.632086851628468, 'f1': 0.5143137575658433, 'number': 2487} | {'precision': 0.5630585898709036, 'recall': 0.7105263157894737, 'f1': 0.6282548476454293, 'number': 798} | {'precision': 0.06504065040650407, 'recall': 0.21238938053097345, 'f1': 0.09958506224066391, 'number': 113} | {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 55} | {'precision': 0.07069408740359898, 'recall': 0.5288461538461539, 'f1': 0.12471655328798187, 'number': 104} | 0.5055 | 0.7387 | 0.6002 | 0.9093 |
62
+ | 0.2733 | 2.0 | 9800 | nan | {'precision': 0.9749447310243183, 'recall': 0.9792746113989638, 'f1': 0.9771048744460857, 'number': 1351} | {'precision': 0.927519818799547, 'recall': 0.9578947368421052, 'f1': 0.9424626006904488, 'number': 855} | {'precision': 0.7589285714285714, 'recall': 0.8531994981179423, 'f1': 0.8033077377436504, 'number': 797} | {'precision': 0.6352785145888594, 'recall': 0.7496087636932708, 'f1': 0.687724335965542, 'number': 639} | {'precision': 0.6819444444444445, 'recall': 0.7897064736630478, 'f1': 0.7318800074529532, 'number': 2487} | {'precision': 0.772196261682243, 'recall': 0.8283208020050126, 'f1': 0.7992744860943168, 'number': 798} | {'precision': 0.4519230769230769, 'recall': 0.415929203539823, 'f1': 0.43317972350230416, 'number': 113} | {'precision': 0.0, 'recall': 0.0, 'f1': 0.0, 'number': 55} | {'precision': 0.38578680203045684, 'recall': 0.7307692307692307, 'f1': 0.5049833887043189, 'number': 104} | 0.7631 | 0.8403 | 0.7998 | 0.9572 |
63
+
64
+
65
+ ### Framework versions
66
+
67
+ - Transformers 4.26.0
68
+ - Pytorch 1.12.1
69
+ - Datasets 2.9.0
70
+ - Tokenizers 0.13.2
logs/events.out.tfevents.1678893268.138-2-233-57.233500.0 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:68c136722783a0f2198a81fa2360399f7936175a5bb4cc9dc33693cec13a4451
3
- size 5936
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31fb49d247d99608e9aacb4558100b298cc9a232fa09345c8d656f2fe82dc1c4
3
+ size 6290
merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
preprocessor_config.json ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "apply_ocr": false,
3
+ "do_normalize": true,
4
+ "do_rescale": true,
5
+ "do_resize": true,
6
+ "feature_extractor_type": "LayoutLMv3FeatureExtractor",
7
+ "image_mean": [
8
+ 0.5,
9
+ 0.5,
10
+ 0.5
11
+ ],
12
+ "image_processor_type": "LayoutLMv3ImageProcessor",
13
+ "image_std": [
14
+ 0.5,
15
+ 0.5,
16
+ 0.5
17
+ ],
18
+ "ocr_lang": null,
19
+ "processor_class": "LayoutLMv3Processor",
20
+ "resample": 2,
21
+ "rescale_factor": 0.00392156862745098,
22
+ "size": {
23
+ "height": 224,
24
+ "width": 224
25
+ },
26
+ "tesseract_config": ""
27
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,87 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": true,
3
+ "apply_ocr": false,
4
+ "bos_token": {
5
+ "__type": "AddedToken",
6
+ "content": "<s>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": false
11
+ },
12
+ "cls_token": {
13
+ "__type": "AddedToken",
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false
19
+ },
20
+ "cls_token_box": [
21
+ 0,
22
+ 0,
23
+ 0,
24
+ 0
25
+ ],
26
+ "eos_token": {
27
+ "__type": "AddedToken",
28
+ "content": "</s>",
29
+ "lstrip": false,
30
+ "normalized": true,
31
+ "rstrip": false,
32
+ "single_word": false
33
+ },
34
+ "errors": "replace",
35
+ "mask_token": {
36
+ "__type": "AddedToken",
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": true,
40
+ "rstrip": false,
41
+ "single_word": false
42
+ },
43
+ "model_max_length": 512,
44
+ "name_or_path": "microsoft/layoutlmv3-base",
45
+ "only_label_first_subword": true,
46
+ "pad_token": {
47
+ "__type": "AddedToken",
48
+ "content": "<pad>",
49
+ "lstrip": false,
50
+ "normalized": true,
51
+ "rstrip": false,
52
+ "single_word": false
53
+ },
54
+ "pad_token_box": [
55
+ 0,
56
+ 0,
57
+ 0,
58
+ 0
59
+ ],
60
+ "pad_token_label": -100,
61
+ "processor_class": "LayoutLMv3Processor",
62
+ "sep_token": {
63
+ "__type": "AddedToken",
64
+ "content": "</s>",
65
+ "lstrip": false,
66
+ "normalized": true,
67
+ "rstrip": false,
68
+ "single_word": false
69
+ },
70
+ "sep_token_box": [
71
+ 0,
72
+ 0,
73
+ 0,
74
+ 0
75
+ ],
76
+ "special_tokens_map_file": null,
77
+ "tokenizer_class": "LayoutLMv3Tokenizer",
78
+ "trim_offsets": true,
79
+ "unk_token": {
80
+ "__type": "AddedToken",
81
+ "content": "<unk>",
82
+ "lstrip": false,
83
+ "normalized": true,
84
+ "rstrip": false,
85
+ "single_word": false
86
+ }
87
+ }
vocab.json ADDED
The diff for this file is too large to render. See raw diff