mciancone commited on
Commit
d6043d9
·
verified ·
1 Parent(s): bd27421

Upload model artifacts and classifier scripts

Browse files
README.md CHANGED
@@ -23,12 +23,15 @@
23
 
24
  ## Classes
25
 
26
-
 
 
 
27
 
28
  Pages matching any of the following classes should trigger image embedding:
29
 
30
- - `Visual Essential`
31
- - `Complex Table`
32
 
33
  Default threshold: `0.5`
34
 
 
23
 
24
  ## Classes
25
 
26
+ - `Complex Table`
27
+ - `Simple Table`
28
+ - `Visual - Essential`
29
+ - `Visual - Supportive`
30
 
31
  Pages matching any of the following classes should trigger image embedding:
32
 
33
+ - `Complex Table`
34
+ - `Visual - Essential`
35
 
36
  Default threshold: `0.5`
37
 
config.json CHANGED
@@ -1,128 +1,27 @@
1
  {
2
- "model": {
3
- "name": "efficientnet_lite0",
4
- "pretrained": true,
5
- "dropout": 0,
6
- "use_spatial_pooling": false
7
- },
8
- "classes": [
9
- "Visual - Essential",
10
- "Simple Table",
11
- "Chart/Graph",
12
- "Visual - Supportive",
13
- "Annotated figure",
14
- "No Specific Feature",
15
- "Diagram/Flowchart",
16
- "Visual - Decorative",
17
  "Complex Table",
18
- "Infographic",
19
- "Form",
20
- "Text to OCR"
21
  ],
22
- "class_mapping": {
23
- "Form": null,
24
- "No Specific Feature": null,
25
- "Text to OCR": null,
26
- "Visual - Decorative": null,
27
- "Infographic": null,
28
- "Chart/Graph": null,
29
- "Annotated figure": null,
30
- "Diagram/Flowchart": null
31
- },
32
  "image_required_classes": [
33
- "Visual Essential",
34
- "Complex Table"
35
- ],
36
- "data": {
37
- "train_split": 0.8,
38
- "val_split": 0.1,
39
- "test_split": 0.1,
40
- "image_size": 224,
41
- "batch_size": 32,
42
- "num_workers": 4,
43
- "seed": 42
44
- },
45
- "augmentation": {
46
- "center_crop_shortest": true,
47
- "whiteout_header": false,
48
- "whiteout_fraction": 0.15,
49
- "train": {
50
- "horizontal_flip": 0.5,
51
- "rotation_degrees": 5,
52
- "color_jitter": {
53
- "brightness": 0.2,
54
- "contrast": 0.2,
55
- "saturation": 0.1,
56
- "hue": 0.05
57
- },
58
- "random_erasing": 0.1
59
- },
60
- "val": {
61
- "enabled": false
62
- }
63
- },
64
- "training": {
65
- "epochs": 40,
66
- "learning_rate": 0.0001,
67
- "weight_decay": 0.0001,
68
- "optimizer": "adamw",
69
- "scheduler": "cosine",
70
- "warmup_epochs": 5,
71
- "label_smoothing": 0.0,
72
- "gradient_clip_norm": 1.0,
73
- "pos_weight": [
74
- 3.6715595722198486,
75
- 6.668674468994141,
76
- 2.3281044960021973,
77
- 6.0722222328186035
78
- ]
79
- },
80
- "monitoring": {
81
- "metric": "val_f1",
82
- "mode": "max"
83
- },
84
- "early_stopping": {
85
- "enabled": true,
86
- "patience": 20
87
- },
88
- "evaluation": {
89
- "threshold": 0.5,
90
- "save_confusion_matrix": true,
91
- "save_per_class_metrics": true
92
- },
93
- "checkpointing": {
94
- "save_best_only": true,
95
- "save_last": true
96
- },
97
- "paths": {
98
- "data_dir": "data",
99
- "output_dir": "outputs",
100
- "checkpoint_dir": "checkpoints",
101
- "logs_dir": "logs"
102
- },
103
- "logging": {
104
- "use_tensorboard": false,
105
- "use_wandb": true,
106
- "wandb_project": "pdf-page-classifier",
107
- "log_interval": 10,
108
- "wandb_run_name": "silver-line-69"
109
- },
110
- "qat": {
111
- "enabled": true,
112
- "epochs": 5,
113
- "learning_rate": "1e-5",
114
- "preset": "mixed",
115
- "num_init_samples": 300
116
- },
117
- "onnx": {
118
- "opset_version": 14,
119
- "dynamic_axes": true,
120
- "simplify": true,
121
- "input_names": [
122
- "image"
123
- ],
124
- "output_names": [
125
- "probabilities"
126
- ]
127
- }
128
  }
 
1
  {
2
+ "image_size": 224,
3
+ "mean": [
4
+ 0.485,
5
+ 0.456,
6
+ 0.406
7
+ ],
8
+ "std": [
9
+ 0.229,
10
+ 0.224,
11
+ 0.225
12
+ ],
13
+ "center_crop_shortest": true,
14
+ "whiteout_header": false,
15
+ "whiteout_fraction": 0.15,
16
+ "class_names": [
17
  "Complex Table",
18
+ "Simple Table",
19
+ "Visual - Essential",
20
+ "Visual - Supportive"
21
  ],
22
+ "threshold": 0.5,
 
 
 
 
 
 
 
 
 
23
  "image_required_classes": [
24
+ "Complex Table",
25
+ "Visual - Essential"
26
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
27
  }
model.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:049ad13d69105d029d2b03c26863f6c9f6338ae303679967a51e7a4d6af21271
3
+ size 13461047
model_int8.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c6cfd45f2c34606f96166056019a991509267f754d7b37785ac0fa974cb0b0be
3
+ size 3812747
openvino_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:32751981326b62f992d5d5de7db6a05bd9bf8def264bde1ba6bab8b61568b1d4
3
+ size 6710280
openvino_model.xml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:595d8436a54945623bb3f21f9457b253d07a0bef3b5de4255183cb6ba03c3c68
3
+ size 197760
openvino_model_int8.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:433c7fcc0fece5d3f14df1ce8a83f3856e642bb3904567a65c88c3048b629a12
3
+ size 3507576
openvino_model_int8.xml ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ca09dfb6c67f71def7da80fc655bd07b20df2dac98e1c4459e5a3624c38f8f6
3
+ size 387908