jinhybr commited on
Commit
1a5ca8a
1 Parent(s): 2c80e1e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -24
app.py CHANGED
@@ -102,33 +102,20 @@ def draw_boxes(image, boxes, predictions):
102
  def process_image(image):
103
  width, height = image.size
104
 
105
- # encode
106
- encoding = processor(
107
- image, truncation=True, return_offsets_mapping=True, return_tensors="pt"
108
- )
109
- offset_mapping = encoding.pop("offset_mapping")
110
-
111
- # forward pass
112
  outputs = model(**encoding)
113
-
114
- # get predictions
115
  predictions = outputs.logits.argmax(-1).squeeze().tolist()
116
- token_boxes = encoding.bbox.squeeze().tolist()
117
-
118
- # only keep non-subword predictions
119
- is_subword = np.array(offset_mapping.squeeze().tolist())[:, 0] != 0
120
- true_predictions = [
121
- id2label[pred] for idx, pred in enumerate(predictions) if not is_subword[idx]
122
- ]
123
- true_boxes = [
124
- unnormalize_box(box, width, height)
125
- for idx, box in enumerate(token_boxes)
126
- if not is_subword[idx]
127
- ]
128
-
129
- draw_boxes(image, true_boxes, true_predictions)
130
 
131
- return image
132
 
133
 
134
  title = "OCR Document Parser : Information Extraction - Fine Tuned LiLT Language-independent Layout Transformer Model"
 
102
  def process_image(image):
103
  width, height = image.size
104
 
105
+ # create model input
106
+ encoding = processor(image, return_tensors="pt")
107
+ del encoding["pixel_values"]
108
+ # run inference
 
 
 
109
  outputs = model(**encoding)
 
 
110
  predictions = outputs.logits.argmax(-1).squeeze().tolist()
111
+ # get labels
112
+ labels = [model.config.id2label[prediction] for prediction in predictions]
113
+ if output_image:
114
+ return draw_boxes(image, encoding["bbox"][0], labels)
115
+ else:
116
+ return labels
 
 
 
 
 
 
 
 
117
 
118
+
119
 
120
 
121
  title = "OCR Document Parser : Information Extraction - Fine Tuned LiLT Language-independent Layout Transformer Model"