Spaces:

iakarshu
/

lilt

Runtime error

App Files Files Community

iakarshu commited on Oct 23, 2022

Commit

847d138

1 Parent(s): 3f82d7f

Upload app.py

Browse files

Files changed (1) hide show

app.py +159 -0

app.py ADDED Viewed

	@@ -0,0 +1,159 @@

+# -*- coding: utf-8 -*-
+"""LiLT For Deployment
+Automatically generated by Colaboratory.
+Original file is located at
+    https://colab.research.google.com/drive/1ol6RWyff15SF6ZJPf47X5380hBTEDiUH
+"""
+# ## Installing the dependencies (might take some time)
+# !pip install -q pytesseract
+# !sudo apt install  -q tesseract-ocr
+# !pip install  -q transformers
+# !pip install  -q pytorch-lightning
+# !pip install  -q einops
+# !pip install  -q tqdm
+# !pip install -q gradio
+# !pip install -q Pillow==7.1.2
+# !pip install -q wandb
+# !pip install -q gdown
+# !pip install -q torchmetrics
+## Requirements.txt
+import os
+os.system('pip install pyyaml==5.1')
+## install PyTesseract
+os.system('pip install -q pytesseract')
+os.environ["TOKENIZERS_PARALLELISM"] = "false"
+import pandas as pd
+import os
+from PIL import Image
+from transformers import RobertaTokenizer
+import torch
+from torch.utils.data import Dataset, DataLoader
+import torch.nn as nn
+import pytorch_lightning as pl
+from dataset import create_features
+from modeling import LiLT
+from utils import LiLTPL
+import gdown
+import gradio as gr
+seed = 42
+## One can change this configuration and try out new combination
+config = {
+  "hidden_dropout_prob": 0.1,
+  "hidden_size_t": 768,
+  "hidden_size" : 768,
+  "hidden_size_l": 768 // 6,
+  "intermediate_ff_size_factor": 4,
+  "max_2d_position_embeddings": 1001,
+  "max_seq_len_l": 512,
+  "max_seq_len_t" : 512,
+  "num_attention_heads": 12,
+  "num_hidden_layers": 12,
+  'dim_head' : 64,
+  "shape_size": 96,
+  "vocab_size": 50265,
+  "eps": 1e-12,
+  "fine_tune" : True
+}
+id2label = ['scientific_report',
+ 'resume',
+ 'memo',
+ 'file_folder',
+ 'specification',
+ 'news_article',
+ 'letter',
+ 'form',
+ 'budget',
+ 'handwritten',
+ 'email',
+ 'invoice',
+ 'presentation',
+ 'scientific_publication',
+ 'questionnaire',
+ 'advertisement']
+## Defining tokenizer
+tokenizer = RobertaTokenizer.from_pretrained('roberta-base')
+url = 'https://drive.google.com/uc?id=1eRV4fS_LFwI5MHqcRwLUNQZgewxI6Se_'
+output = 'lilt_ckpt.ckpt'
+gdown.download(url, output, quiet=False)
+class RVLCDIPData(Dataset):
+    def __init__(self, image_list, label_list, tokenizer, max_len = 512, size = 1000):
+        self.image_list = image_list
+        self.label_list = label_list
+        self.tokenizer = tokenizer
+        self.max_seq_length = max_len
+        self.size = size
+    def __len__(self):
+        return len(self.image_list)
+    def __getitem__(self, idx):
+        img_path = self.image_list[idx]
+        label = self.label_list[idx]
+        boxes, words, normal_box = create_features(
+                                img_path = img_path,
+                                tokenizer = self.tokenizer,
+                                max_seq_length = self.max_seq_length,
+                                size = self.size,
+                                use_ocr  = True,
+                               )
+        final_encoding = {'input_boxes': boxes, 'input_words': words}
+        final_encoding['label'] = torch.as_tensor(label).long()
+        return final_encoding
+lilt = LiLTPL(config)
+# path_to_weights = 'drive/MyDrive/docformer_rvl_checkpoint/docformer_v1.ckpt'
+lilt.load_from_checkpoint('lilt_ckpt.ckpt')
+## Taken from LayoutLMV2 space
+image = gr.inputs.Image(type="pil")
+label = gr.outputs.Label(num_top_classes=5)
+examples = [['00093726.png'], ['00866042.png']]
+title = "Interactive demo: LiLT for Image Classification"
+description = "Demo for classifying document images with LiLT model. To use it, \
+simply upload an image or use the example images below and click 'submit' to let the model predict the 5 most probable Document classes. \
+Results will show up in a few seconds."
+def classify_image(image):
+  image.save('sample_img.png')
+  boxes, words, normal_box = create_features(
+                                img_path = 'sample_img.png',
+                                tokenizer = tokenizer,
+                                max_seq_length = 512,
+                                size = 1000,
+                                use_ocr  = True,
+                               )
+  final_encoding = {'input_boxes': boxes.unsqueeze(0), 'input_words': words.unsqueeze(0)}
+  output = lilt.forward(final_encoding)
+  output = output[0].softmax(axis = -1)
+  final_pred = {}
+  for i, score in enumerate(output):
+      score = output[i]
+      final_pred[id2label[i]] = score.detach().cpu().tolist()
+  return final_pred
+gr.Interface(fn=classify_image, inputs=image, outputs=label, title=title, description=description, examples=examples, enable_queue=True).launch(debug=True)