nielsr HF staff commited on
Commit
11608ea
1 Parent(s): 6a04dd1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -5
app.py CHANGED
@@ -7,19 +7,46 @@ import numpy as np
7
  import csv
8
  import pandas as pd
9
 
10
- from transformers import TableTransformerImageProcessor, AutoModelForObjectDetection
 
 
11
  import torch
12
 
13
  import easyocr
14
 
15
  import gradio as gr
16
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
17
  # load table detection model
18
- processor = TableTransformerImageProcessor(max_size=800)
19
  model = AutoModelForObjectDetection.from_pretrained("microsoft/table-transformer-detection", revision="no_timm")
20
 
21
  # load table structure recognition model
22
- structure_processor = TableTransformerImageProcessor(max_size=1000)
23
  structure_model = AutoModelForObjectDetection.from_pretrained("microsoft/table-transformer-structure-recognition-v1.1-all")
24
 
25
  # load EasyOCR reader
@@ -117,7 +144,8 @@ def visualize_detected_tables(img, det_tables):
117
 
118
  def detect_and_crop_table(image):
119
  # prepare image for the model
120
- pixel_values = processor(image, return_tensors="pt").pixel_values
 
121
 
122
  # forward pass
123
  with torch.no_grad():
@@ -140,7 +168,8 @@ def detect_and_crop_table(image):
140
 
141
  def recognize_table(image):
142
  # prepare image for the model
143
- pixel_values = structure_processor(images=image, return_tensors="pt").pixel_values
 
144
 
145
  # forward pass
146
  with torch.no_grad():
 
7
  import csv
8
  import pandas as pd
9
 
10
+ from torchvision import transforms
11
+
12
+ from transformers import AutoModelForObjectDetection
13
  import torch
14
 
15
  import easyocr
16
 
17
  import gradio as gr
18
 
19
+
20
+ class MaxResize(object):
21
+ def __init__(self, max_size=800):
22
+ self.max_size = max_size
23
+
24
+ def __call__(self, image):
25
+ width, height = image.size
26
+ current_max_size = max(width, height)
27
+ scale = self.max_size / current_max_size
28
+ resized_image = image.resize((int(round(scale*width)), int(round(scale*height))))
29
+
30
+ return resized_image
31
+
32
+ detection_transform = transforms.Compose([
33
+ MaxResize(800),
34
+ transforms.ToTensor(),
35
+ transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
36
+ ])
37
+
38
+ structure_transform = transforms.Compose([
39
+ MaxResize(1000),
40
+ transforms.ToTensor(),
41
+ transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
42
+ ])
43
+
44
  # load table detection model
45
+ # processor = TableTransformerImageProcessor(max_size=800)
46
  model = AutoModelForObjectDetection.from_pretrained("microsoft/table-transformer-detection", revision="no_timm")
47
 
48
  # load table structure recognition model
49
+ # structure_processor = TableTransformerImageProcessor(max_size=1000)
50
  structure_model = AutoModelForObjectDetection.from_pretrained("microsoft/table-transformer-structure-recognition-v1.1-all")
51
 
52
  # load EasyOCR reader
 
144
 
145
  def detect_and_crop_table(image):
146
  # prepare image for the model
147
+ # pixel_values = processor(image, return_tensors="pt").pixel_values
148
+ pixel_values = detection_transform(image).unsqueeze(0)
149
 
150
  # forward pass
151
  with torch.no_grad():
 
168
 
169
  def recognize_table(image):
170
  # prepare image for the model
171
+ # pixel_values = structure_processor(images=image, return_tensors="pt").pixel_values
172
+ pixel_values = structure_transform(image).unsqueeze(0)
173
 
174
  # forward pass
175
  with torch.no_grad():