Alexander Slessor commited on
Commit
8a5956b
1 Parent(s): 947072e

added more files

Browse files
.gitignore CHANGED
@@ -1,7 +1,10 @@
1
  __pycache__
2
  .mypy_cache
3
  *.pdf
 
4
 
5
  main.py
6
  setup.md
 
7
  initial_files
 
 
1
  __pycache__
2
  .mypy_cache
3
  *.pdf
4
+ *.png
5
 
6
  main.py
7
  setup.md
8
+ invoice_example.png
9
  initial_files
10
+ test_*
handler.py CHANGED
@@ -3,9 +3,14 @@ from transformers import LayoutLMForTokenClassification, LayoutLMv2Processor
3
  import torch
4
  from subprocess import run
5
 
6
- # install tesseract-ocr and pytesseract
7
  run("apt install -y tesseract-ocr", shell=True, check=True)
8
- run("pip install pytesseract", shell=True, check=True)
 
 
 
 
 
 
9
 
10
  # helper function to unnormalize bboxes for drawing onto the image
11
  def unnormalize_box(bbox, width, height):
@@ -37,28 +42,31 @@ class EndpointHandler:
37
  # process image
38
  encoding = self.processor(image, return_tensors="pt")
39
 
40
- # run prediction
41
- with torch.inference_mode():
42
- outputs = self.model(
43
- input_ids=encoding.input_ids.to(device),
44
- bbox=encoding.bbox.to(device),
45
- attention_mask=encoding.attention_mask.to(device),
46
- token_type_ids=encoding.token_type_ids.to(device),
47
- )
48
- predictions = outputs.logits.softmax(-1)
 
49
 
50
- # post process output
51
- result = []
52
- for item, inp_ids, bbox in zip(
53
- predictions.squeeze(0).cpu(),
54
- encoding.input_ids.squeeze(0).cpu(),
55
- encoding.bbox.squeeze(0).cpu()
56
- ):
57
- label = self.model.config.id2label[int(item.argmax().cpu())]
58
- if label == "O":
59
- continue
60
- score = item.max().item()
61
- text = self.processor.tokenizer.decode(inp_ids)
62
- bbox = unnormalize_box(bbox.tolist(), image.width, image.height)
63
- result.append({"label": label, "score": score, "text": text, "bbox": bbox})
64
- return {"predictions": result}
 
 
 
3
  import torch
4
  from subprocess import run
5
 
 
6
  run("apt install -y tesseract-ocr", shell=True, check=True)
7
+
8
+ class HugEndpointException(Exception):
9
+ def __init__(self, e):
10
+ self.e = e
11
+
12
+ def __str__(self):
13
+ return f'Custom Endpoint Exception: {self.e}'
14
 
15
  # helper function to unnormalize bboxes for drawing onto the image
16
  def unnormalize_box(bbox, width, height):
 
42
  # process image
43
  encoding = self.processor(image, return_tensors="pt")
44
 
45
+ try:
46
+ # run prediction
47
+ with torch.inference_mode():
48
+ outputs = self.model(
49
+ input_ids=encoding.input_ids.to(device),
50
+ bbox=encoding.bbox.to(device),
51
+ attention_mask=encoding.attention_mask.to(device),
52
+ token_type_ids=encoding.token_type_ids.to(device),
53
+ )
54
+ predictions = outputs.logits.softmax(-1)
55
 
56
+ # post process output
57
+ result = []
58
+ for item, inp_ids, bbox in zip(
59
+ predictions.squeeze(0).cpu(),
60
+ encoding.input_ids.squeeze(0).cpu(),
61
+ encoding.bbox.squeeze(0).cpu()
62
+ ):
63
+ label = self.model.config.id2label[int(item.argmax().cpu())]
64
+ if label == "O":
65
+ continue
66
+ score = item.max().item()
67
+ text = self.processor.tokenizer.decode(inp_ids)
68
+ bbox = unnormalize_box(bbox.tolist(), image.width, image.height)
69
+ result.append({"label": label, "score": score, "text": text, "bbox": bbox})
70
+ return {"predictions": result}
71
+ except Exception as e:
72
+ raise HugEndpointException(e)
preprocessor_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "apply_ocr": true,
3
+ "do_resize": true,
4
+ "feature_extractor_type": "LayoutLMv2FeatureExtractor",
5
+ "ocr_lang": null,
6
+ "processor_class": "LayoutLMv2Processor",
7
+ "resample": 2,
8
+ "size": 224
9
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f31380262cd4f276be211189196f190c0268e9cece977d500886a4e4c16fc07
3
+ size 450606565
requirements.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ pytesseract
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": null,
3
+ "apply_ocr": false,
4
+ "cls_token": "[CLS]",
5
+ "cls_token_box": [
6
+ 0,
7
+ 0,
8
+ 0,
9
+ 0
10
+ ],
11
+ "do_basic_tokenize": true,
12
+ "do_lower_case": true,
13
+ "mask_token": "[MASK]",
14
+ "model_max_length": 512,
15
+ "name_or_path": "microsoft/layoutlmv2-base-uncased",
16
+ "never_split": null,
17
+ "only_label_first_subword": true,
18
+ "pad_token": "[PAD]",
19
+ "pad_token_box": [
20
+ 0,
21
+ 0,
22
+ 0,
23
+ 0
24
+ ],
25
+ "pad_token_label": -100,
26
+ "processor_class": "LayoutLMv2Processor",
27
+ "sep_token": "[SEP]",
28
+ "sep_token_box": [
29
+ 1000,
30
+ 1000,
31
+ 1000,
32
+ 1000
33
+ ],
34
+ "special_tokens_map_file": null,
35
+ "strip_accents": null,
36
+ "tokenize_chinese_chars": true,
37
+ "tokenizer_class": "LayoutLMv2Tokenizer",
38
+ "unk_token": "[UNK]"
39
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c56fc4a68a8102016f0d13df85e3cef173b08bfd50400f2f88c520a325d11676
3
+ size 3375
vocab.txt ADDED
The diff for this file is too large to render. See raw diff