layoutlmv3_invoice

Runtime error

App Files Files Community

phamvi856 commited on Jun 22, 2023

Commit

fd20fcc

1 Parent(s): 9255af8

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -17

app.py CHANGED Viewed

@@ -57,15 +57,13 @@ def process_image(image):
     width, height = image.size
     # Encode image
-    encoding = processor(image, truncation=True, return_offsets_mapping=True, return_tensors="pt")
     input_ids = encoding.input_ids.to(device)
     attention_mask = encoding.attention_mask.to(device)
     bbox = encoding.bbox.to(device)
-    # Predict token labels
-    with torch.no_grad():
-        outputs = model(input_ids=input_ids, bbox=bbox, attention_mask=attention_mask)
     predicted_labels = outputs.logits.argmax(dim=2).squeeze().tolist()
     # Extract content from boxes
@@ -74,12 +72,7 @@ def process_image(image):
         predicted_label = id2label[predicted_labels[idx]]
         box_width = np.array(box)[2] - np.array(box)[0]
         box_height = np.array(box)[3] - np.array(box)[1]
-        normalized_box = [
-            box[0] * width / 1000,
-            box[1] * height / 1000,
-            box_width * width / 1000,
-            box_height * height / 1000,
-        ]
         extracted_content[predicted_label] = image.crop(normalized_box).copy()
     # Draw predictions over the image
@@ -89,12 +82,7 @@ def process_image(image):
         predicted_label = iob_to_label(id2label[prediction])
         box_width = np.array(box)[2] - np.array(box)[0]
         box_height = np.array(box)[3] - np.array(box)[1]
-        normalized_box = [
-            box[0] * width / 1000,
-            box[1] * height / 1000,
-            box_width * width / 1000,
-            box_height * height / 1000,
-        ]
         draw.rectangle(normalized_box, outline=label2color[predicted_label])
         draw.text((normalized_box[0] + 10, normalized_box[1] - 10), text=predicted_label, fill=label2color[predicted_label], font=font)

     width, height = image.size
     # Encode image
+    encoding = processor(image, truncation=True, padding="max_length", max_length=512, return_tensors="pt")
     input_ids = encoding.input_ids.to(device)
     attention_mask = encoding.attention_mask.to(device)
     bbox = encoding.bbox.to(device)
+    # Inference
+    outputs = model(input_ids=input_ids, bbox=bbox, attention_mask=attention_mask)
     predicted_labels = outputs.logits.argmax(dim=2).squeeze().tolist()
     # Extract content from boxes
         predicted_label = id2label[predicted_labels[idx]]
         box_width = np.array(box)[2] - np.array(box)[0]
         box_height = np.array(box)[3] - np.array(box)[1]
+        normalized_box = unnormalize_box(box, width, height)
         extracted_content[predicted_label] = image.crop(normalized_box).copy()
     # Draw predictions over the image
         predicted_label = iob_to_label(id2label[prediction])
         box_width = np.array(box)[2] - np.array(box)[0]
         box_height = np.array(box)[3] - np.array(box)[1]
+        normalized_box = unnormalize_box(box, width, height)
         draw.rectangle(normalized_box, outline=label2color[predicted_label])
         draw.text((normalized_box[0] + 10, normalized_box[1] - 10), text=predicted_label, fill=label2color[predicted_label], font=font)