File size: 1,676 Bytes
0572c8f
 
 
 
6fa980c
0572c8f
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from matplotlib.patches import Patch
import io
from PIL import Image, ImageDraw
import numpy as np
import csv
import pandas as pd

from torchvision import transforms

from transformers import AutoModelForObjectDetection
import torch

import easyocr

import gradio as gr


device = "cuda" if torch.cuda.is_available() else "cpu"

def process_pdf():
    print('process_pdf')
    # cropped_table = detect_and_crop_table(image)
    # image, cells = recognize_table(cropped_table)

    # cell_coordinates = get_cell_coordinates_by_row(cells)
    # df, data = apply_ocr(cell_coordinates, image)

    # return image, df, data
    return [], [], []

title = "Sheriff's Demo: Table Detection & Recognition with Table Transformer (TATR)."
description = """A demo by M Sheriff for table extraction with the Table Transformer.
First, table detection is performed on the input image using https://huggingface.co/microsoft/table-transformer-detection,
after which the detected table is extracted and https://huggingface.co/microsoft/table-transformer-structure-recognition-v1.1-all recognizes the
    individual rows, columns and cells. OCR is then performed per cell, row by row."""
examples = [['image.png'], ['mistral_paper.png']]

app = gr.Interface(fn=process_pdf, 
                     inputs=gr.Image(type="pil"), 
                     outputs=[gr.Image(type="pil", label="Detected table"), gr.Dataframe(label="Table as CSV"), gr.JSON(label="Data as JSON")],
                     title=title,
                     description=description,
                     examples=examples)
app.queue()
app.launch(debug=True)