Spaces:

AuditEdge
/

optimised-ocr

Running

App Files Files Community

AuditEdge commited on Dec 15, 2024

Commit

81e13bb

1 Parent(s): cd8989c

initial commit

Browse files

Files changed (39) hide show

.gitattributes +0 -35
.gitignore +2 -0
Dockerfile +16 -0
__pycache__/app.cpython-312.pyc +0 -0
app.py +226 -0
layoutlmv3FineTuning/Copy of annotate_image.py +51 -0
layoutlmv3FineTuning/Copy of run_inference.py +32 -0
layoutlmv3FineTuning/Layoutlm_inference/__init__.py +0 -0
layoutlmv3FineTuning/Layoutlm_inference/__pycache__/__init__.cpython-312.pyc +0 -0
layoutlmv3FineTuning/Layoutlm_inference/__pycache__/__init__.cpython-39.pyc +0 -0
layoutlmv3FineTuning/Layoutlm_inference/__pycache__/annotate_image.cpython-312.pyc +0 -0
layoutlmv3FineTuning/Layoutlm_inference/__pycache__/annotate_image.cpython-39.pyc +0 -0
layoutlmv3FineTuning/Layoutlm_inference/__pycache__/inference_handler.cpython-312.pyc +0 -0
layoutlmv3FineTuning/Layoutlm_inference/__pycache__/inference_handler.cpython-39.pyc +0 -0
layoutlmv3FineTuning/Layoutlm_inference/__pycache__/model_base_path.cpython-312.pyc +0 -0
layoutlmv3FineTuning/Layoutlm_inference/__pycache__/model_base_path.cpython-39.pyc +0 -0
layoutlmv3FineTuning/Layoutlm_inference/__pycache__/ocr.cpython-312.pyc +0 -0
layoutlmv3FineTuning/Layoutlm_inference/__pycache__/ocr.cpython-39.pyc +0 -0
layoutlmv3FineTuning/Layoutlm_inference/__pycache__/utils.cpython-312.pyc +0 -0
layoutlmv3FineTuning/Layoutlm_inference/__pycache__/utils.cpython-39.pyc +0 -0
layoutlmv3FineTuning/Layoutlm_inference/annotate_image.py +54 -0
layoutlmv3FineTuning/Layoutlm_inference/inference_handler.py +268 -0
layoutlmv3FineTuning/Layoutlm_inference/model_base_path.py +2 -0
layoutlmv3FineTuning/Layoutlm_inference/ocr.py +144 -0
layoutlmv3FineTuning/Layoutlm_inference/utils.py +68 -0
layoutlmv3FineTuning/README.md +3 -0
layoutlmv3FineTuning/inference_handler_modified.py +213 -0
layoutlmv3FineTuning/preprocess.py +163 -0
layoutlmv3FineTuning/run_inference.py +31 -0
layoutlmv3FineTuning/run_inferenceM.py +32 -0
multiple_request.py +56 -0
requirements.txt +9 -0
sample.py +22 -0
titanium-scope-436311-t3-966373f5aa2f.json +13 -0
uploads/aadhar/test_one.jpg +0 -0
uploads/aadhar/test_two.jpg +0 -0
uploads/cheque/0f81678a.jpeg +0 -0
uploads/gst/0a52fbcb_page3_image_0.jpg +0 -0
uploads/pan/6ea33087.jpeg +0 -0

.gitattributes DELETED Viewed

@@ -1,35 +0,0 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ .env
2	+ dependencies/

Dockerfile ADDED Viewed

	@@ -0,0 +1,16 @@

+# Read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
+# you will also find guides on how best to write your Dockerfile
+FROM python:3.9
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user . /app
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

__pycache__/app.cpython-312.pyc ADDED Viewed

Binary file (6.58 kB). View file

app.py ADDED Viewed

	@@ -0,0 +1,226 @@

+from fastapi import FastAPI, File, UploadFile, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from typing import Dict
+import os
+import shutil
+import logging
+import torch
+from transformers import LayoutLMv3Processor, LayoutLMv3ForTokenClassification
+from dotenv import load_dotenv
+import os
+# Load .env file
+load_dotenv()
+# Access variables
+dummy_key = os.getenv("dummy_key")
+HUGGINGFACE_AUTH_TOKEN = dummy_key
+# Hugging Face model and token
+aadhar_model = "AuditEdge/doc_ocr_a"  # Replace with your fine-tuned model if applicable
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(f"Using device: {device}")
+# Load the processor (tokenizer + image processor)
+processor_aadhar = LayoutLMv3Processor.from_pretrained(
+    aadhar_model,
+    use_auth_token=HUGGINGFACE_AUTH_TOKEN
+)
+aadhar_model = LayoutLMv3ForTokenClassification.from_pretrained(
+    aadhar_model,
+    use_auth_token=HUGGINGFACE_AUTH_TOKEN
+)
+aadhar_model = aadhar_model.to(device)
+# pan model
+pan_model = "AuditEdge/doc_ocr_p"  # Replace with your fine-tuned model if applicable
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(f"Using device: {device}")
+# Load the processor (tokenizer + image processor)
+processor_pan = LayoutLMv3Processor.from_pretrained(
+    pan_model,
+    use_auth_token=HUGGINGFACE_AUTH_TOKEN
+)
+pan_model = LayoutLMv3ForTokenClassification.from_pretrained(
+    pan_model,
+    use_auth_token=HUGGINGFACE_AUTH_TOKEN
+)
+pan_model = pan_model.to(device)
+#
+# gst model
+gst_model = "AuditEdge/doc_ocr_new_g"  # Replace with your fine-tuned model if applicable
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(f"Using device: {device}")
+# Load the processor (tokenizer + image processor)
+processor_gst = LayoutLMv3Processor.from_pretrained(
+    gst_model,
+    use_auth_token=HUGGINGFACE_AUTH_TOKEN
+)
+gst_model = LayoutLMv3ForTokenClassification.from_pretrained(
+    gst_model,
+    use_auth_token=HUGGINGFACE_AUTH_TOKEN
+)
+gst_model = gst_model.to(device)
+#cheque model
+cheque_model = "AuditEdge/doc_ocr_new_c"  # Replace with your fine-tuned model if applicable
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+print(f"Using device: {device}")
+# Load the processor (tokenizer + image processor)
+processor_cheque = LayoutLMv3Processor.from_pretrained(
+    cheque_model,
+    use_auth_token=HUGGINGFACE_AUTH_TOKEN
+)
+cheque_model = LayoutLMv3ForTokenClassification.from_pretrained(
+    cheque_model,
+    use_auth_token=HUGGINGFACE_AUTH_TOKEN
+)
+cheque_model = cheque_model.to(device)
+# Verify model and processor are loaded
+print("Model and processor loaded successfully!")
+print(f"Model is on device: {next(aadhar_model.parameters()).device}")
+# Import inference modules
+from layoutlmv3FineTuning.Layoutlm_inference.ocr import prepare_batch_for_inference
+from layoutlmv3FineTuning.Layoutlm_inference.inference_handler import handle
+# Create FastAPI instance
+app = FastAPI(debug=True)
+# Enable CORS
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Configure directories
+UPLOAD_FOLDER = './uploads/'
+os.makedirs(UPLOAD_FOLDER, exist_ok=True)  # Ensure the main upload folder exists
+UPLOAD_DIRS = {
+    "aadhar_file": "uploads/aadhar/",
+    "pan_file": "uploads/pan/",
+    "cheque_file": "uploads/cheque/",
+    "gst_file": "uploads/gst/",
+}
+# Ensure individual directories exist
+for dir_path in UPLOAD_DIRS.values():
+    os.makedirs(dir_path, exist_ok=True)
+# Logger configuration
+logging.basicConfig(level=logging.INFO)
+# Perform Inference
+def perform_inference(file_paths: Dict[str, str]):
+    # Dictionary to map document types to their respective model directories
+    model_dirs = {
+        "aadhar_file": aadhar_model,
+        "pan_file": pan_model,
+        "cheque_file": cheque_model,
+        "gst_file": gst_model,
+    }
+    # Dictionary to store results for each document type
+    inference_results = {}
+    # Loop through the file paths and perform inference
+    for doc_type, file_path in file_paths.items():
+        if doc_type in model_dirs:
+            print(f"Processing {doc_type} using model at {model_dirs[doc_type]}")
+            # Prepare batch for inference
+            images_path = [file_path]
+            inference_batch = prepare_batch_for_inference(images_path)
+            # Prepare context for the specific document type
+            # context = {"model_dir": model_dirs[doc_type]}
+            # context = aadhar_model
+            if doc_type == "aadhar_file":
+                context = aadhar_model
+                processor = processor_aadhar
+                name = "aadhar"
+            if doc_type == "pan_file":
+                context = pan_model
+                processor = processor_pan
+                name = "pan"
+            if doc_type == "gst_file":
+                context = gst_model
+                processor = processor_gst
+                name = "gst"
+            if doc_type == "cheque_file":
+                context = cheque_model
+                processor = processor_cheque
+                name = "cheque"
+            # Perform inference (replace `handle` with your actual function)
+            result = handle(inference_batch, context,processor,name)
+            # Store the result
+            inference_results[doc_type] = result
+        else:
+            print(f"Model directory not found for {doc_type}. Skipping.")
+    return inference_results
+# Routes
+@app.get("/")
+def greet_json():
+    return {"Hello": "World!"}
+@app.post("/api/aadhar_ocr")
+async def aadhar_ocr(
+    aadhar_file: UploadFile = File(None),
+    pan_file: UploadFile = File(None),
+    cheque_file: UploadFile = File(None),
+    gst_file: UploadFile = File(None),
+):
+    try:
+        # Handle file uploads
+        file_paths = {}
+        for file_type, folder in UPLOAD_DIRS.items():
+            file = locals()[file_type]  # Dynamically access the file arguments
+            if file:
+                # Save the file in the respective directory
+                file_path = os.path.join(folder, file.filename)
+                with open(file_path, "wb") as buffer:
+                    shutil.copyfileobj(file.file, buffer)
+                file_paths[file_type] = file_path
+        # Log received files
+        logging.info(f"Received files: {list(file_paths.keys())}")
+        print("file_paths",file_paths)
+        import sys
+        # sys.exit()
+        # Perform inference
+        result = perform_inference(file_paths)
+        return {"status": "success", "result": result}
+    except Exception as e:
+        logging.error(f"Error processing files: {e}")
+        raise HTTPException(status_code=500, detail="Internal Server Error")

layoutlmv3FineTuning/Copy of annotate_image.py ADDED Viewed

	@@ -0,0 +1,51 @@

+import os
+from PIL import Image, ImageDraw, ImageFont
+from .utils import image_label_2_color
+def get_flattened_output(docs):
+  flattened_output = []
+  annotation_key = 'output'
+  for doc in docs:
+    flattened_output_item = {annotation_key: []}
+    doc_annotation = doc[annotation_key]
+    for i, span in enumerate(doc_annotation):
+      if len(span['words']) > 1:
+        for span_chunk in span['words']:
+          flattened_output_item[annotation_key].append(
+              {
+                  'label': span['label'],
+                  'text': span_chunk['text'],
+                  'words': [span_chunk]
+              }
+          )
+      else:
+        flattened_output_item[annotation_key].append(span)
+    flattened_output.append(flattened_output_item)
+  return flattened_output
+def annotate_image(image_path, annotation_object):
+  img = None
+  image = Image.open(image_path).convert('RGBA')
+  tmp = image.copy()
+  label2color = image_label_2_color(annotation_object)
+  overlay = Image.new('RGBA', tmp.size, (0, 0, 0)+(0,))
+  draw = ImageDraw.Draw(overlay)
+  font = ImageFont.load_default()
+  predictions = [span['label'] for span in annotation_object['output']]
+  boxes = [span['words'][0]['box'] for span in annotation_object['output']]
+  for prediction, box in zip(predictions, boxes):
+      draw.rectangle(box, outline=label2color[prediction],
+                     width=3, fill=label2color[prediction]+(int(255*0.33),))
+      draw.text((box[0] + 10, box[1] - 10), text=prediction,
+                fill=label2color[prediction], font=font)
+  img = Image.alpha_composite(tmp, overlay)
+  img = img.convert("RGB")
+  image_name = os.path.basename(image_path)
+  image_name = image_name[:image_name.find('.')]
+  save_path = os.path.join('/content', f'{image_name}_annotated.jpg')
+  img.save(save_path)

layoutlmv3FineTuning/Copy of run_inference.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import argparse
+from asyncio.log import logger
+from Layoutlm_inference.ocr import prepare_batch_for_inference
+from Layoutlm_inference.inference_handler import handle
+import logging
+import os
+if __name__ == "__main__":
+    # try:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model_path", type=str, required=True)
+    parser.add_argument("--image_path", type=str, required=True)  # single image path
+    args = parser.parse_args()
+    # Expecting a single image file
+    image_path = args.image_path
+    # Ensure the file exists before processing
+    if not os.path.isfile(image_path):
+        raise FileNotFoundError(f"The provided image path does not exist: {image_path}")
+    # Prepare batch for a single image
+    inference_batch = prepare_batch_for_inference([image_path])  # pass as a list
+    context = {"model_dir": args.model_path}
+    # Handle the inference
+    handle(inference_batch, context)

layoutlmv3FineTuning/Layoutlm_inference/__init__.py ADDED Viewed

File without changes

layoutlmv3FineTuning/Layoutlm_inference/__pycache__/__init__.cpython-312.pyc ADDED Viewed

Binary file (184 Bytes). View file

layoutlmv3FineTuning/Layoutlm_inference/__pycache__/__init__.cpython-39.pyc ADDED Viewed

Binary file (147 Bytes). View file

layoutlmv3FineTuning/Layoutlm_inference/__pycache__/annotate_image.cpython-312.pyc ADDED Viewed

Binary file (2.58 kB). View file

layoutlmv3FineTuning/Layoutlm_inference/__pycache__/annotate_image.cpython-39.pyc ADDED Viewed

Binary file (1.73 kB). View file

layoutlmv3FineTuning/Layoutlm_inference/__pycache__/inference_handler.cpython-312.pyc ADDED Viewed

Binary file (12.3 kB). View file

layoutlmv3FineTuning/Layoutlm_inference/__pycache__/inference_handler.cpython-39.pyc ADDED Viewed

Binary file (7.22 kB). View file

layoutlmv3FineTuning/Layoutlm_inference/__pycache__/model_base_path.cpython-312.pyc ADDED Viewed

Binary file (314 Bytes). View file

layoutlmv3FineTuning/Layoutlm_inference/__pycache__/model_base_path.cpython-39.pyc ADDED Viewed

Binary file (267 Bytes). View file

layoutlmv3FineTuning/Layoutlm_inference/__pycache__/ocr.cpython-312.pyc ADDED Viewed

Binary file (5.27 kB). View file

layoutlmv3FineTuning/Layoutlm_inference/__pycache__/ocr.cpython-39.pyc ADDED Viewed

Binary file (3.52 kB). View file

layoutlmv3FineTuning/Layoutlm_inference/__pycache__/utils.cpython-312.pyc ADDED Viewed

Binary file (3.19 kB). View file

layoutlmv3FineTuning/Layoutlm_inference/__pycache__/utils.cpython-39.pyc ADDED Viewed

Binary file (2.52 kB). View file

layoutlmv3FineTuning/Layoutlm_inference/annotate_image.py ADDED Viewed

	@@ -0,0 +1,54 @@

+import os
+from PIL import Image, ImageDraw, ImageFont
+from .utils import image_label_2_color
+def get_flattened_output(docs):
+  flattened_output = []
+  annotation_key = 'output'
+  for doc in docs:
+    flattened_output_item = {annotation_key: []}
+    doc_annotation = doc[annotation_key]
+    for i, span in enumerate(doc_annotation):
+      if len(span['words']) > 1:
+        for span_chunk in span['words']:
+          flattened_output_item[annotation_key].append(
+              {
+                  'label': span['label'],
+                  'text': span_chunk['text'],
+                  'words': [span_chunk]
+              }
+          )
+      else:
+        flattened_output_item[annotation_key].append(span)
+    flattened_output.append(flattened_output_item)
+  return flattened_output
+def annotate_image(image_path, annotation_object):
+  print("image_path",image_path)
+  img = None
+  image = Image.open(image_path).convert('RGBA')
+  tmp = image.copy()
+  label2color = image_label_2_color(annotation_object)
+  overlay = Image.new('RGBA', tmp.size, (0, 0, 0)+(0,))
+  draw = ImageDraw.Draw(overlay)
+  font = ImageFont.load_default()
+  predictions = [span['label'] for span in annotation_object['output']]
+  boxes = [span['words'][0]['box'] for span in annotation_object['output']]
+  for prediction, box in zip(predictions, boxes):
+      print("prediction",prediction)
+      print("box",box)
+      draw.rectangle(box, outline=label2color[prediction],
+                     width=3, fill=label2color[prediction]+(int(255*0.33),))
+      draw.text((box[0] + 10, box[1] - 10), text=prediction,
+                fill=label2color[prediction], font=font)
+  # img = Image.alpha_composite(tmp, overlay)
+  # img = img.convert("RGB")
+  # image_name = os.path.basename(image_path)
+  # image_name = image_name[:image_name.find('.')]
+  # save_path = os.path.join('/home/ec2-user/sample_project/inferred_images', f'{image_name}_annotated_1.jpg')
+  # img.save(save_path)

layoutlmv3FineTuning/Layoutlm_inference/inference_handler.py ADDED Viewed

	@@ -0,0 +1,268 @@

+from .utils import load_model,load_processor,normalize_box,compare_boxes,adjacent
+from .model_base_path import LAYOUTLMV2_BASE_PATH,LAYOUTLMV3_BASE_PATH
+from .annotate_image import get_flattened_output,annotate_image
+from PIL import Image,ImageDraw, ImageFont
+import logging
+import torch
+import json
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+logger = logging.getLogger(__name__)
+class ModelHandler(object):
+    """
+    A base Model handler implementation.
+    """
+    def __init__(self):
+        # self.model = None
+        # self.model_dir = None
+        # self.device = 'cpu'
+        # self.error = None
+        # self._context = None
+        # self._batch_size = 0
+        self.initialized = False
+        self._raw_input_data = None
+        self._processed_data = None
+        self._images_size = None
+    def initialize(self, context,preprocessor,name):
+        """
+        Initialize model. This will be called during model loading time
+        :param context: Initial context contains model server system properties.
+        :return:
+        """
+        logger.info("Loading transformer model")
+        # self._context = context
+        # properties = self._context
+        # self._batch_size = properties["batch_size"] or 1
+        # self.model_dir = properties.get("model_dir")
+        self.name = name
+        self.model = context
+        self.preprocessor = preprocessor
+        self.initialized = True
+    def preprocess(self, batch):
+        """
+        Transform raw input into model input data.
+        :param batch: list of raw requests, should match batch size
+        :return: list of preprocessed model input data
+        """
+        # Take the input data and pre-process it make it inference ready
+        # assert self._batch_size == len(batch), "Invalid input batch size: {}".format(len(batch))
+        inference_dict = batch
+        print("inference_dict",inference_dict)
+        self._raw_input_data = inference_dict
+        # model_name_or_path = None
+        # if 'v2' in self.model.config.architectures[0]:
+        #     model_name_or_path = LAYOUTLMV2_BASE_PATH
+        # elif 'v3' in self.model.config.architectures[0]:
+        #     model_name_or_path = LAYOUTLMV3_BASE_PATH
+        # else:
+        #     raise ValueError('invalid model architecture, please make sure the model is either Layoutlmv2 or Layoutlmv3')
+        # processor = load_processor(model_name_or_path)
+        processor = self.preprocessor
+        images = [Image.open(path).convert("RGB")
+                  for path in inference_dict['image_path']]
+        self._images_size = [img.size for img in images]
+        words = inference_dict['words']
+        boxes = [[normalize_box(box, images[i].size[0], images[i].size[1])
+                  for box in doc] for i, doc in enumerate(inference_dict['bboxes'])]
+        encoded_inputs = processor(
+            images, words, boxes=boxes, return_tensors="pt", padding="max_length", truncation=True)
+        self._processed_data = encoded_inputs
+        encoded_inputs = {key: val.to(device) for key, val in encoded_inputs.items()}
+        print("encoded_inputs",encoded_inputs)
+        return encoded_inputs
+    def load(self, model_dir):
+        """The load handler is responsible for loading the hunggingface transformer model.
+        Returns:
+            hf_pipeline (Pipeline): A Hugging Face Transformer pipeline.
+        """
+        # TODO model dir should be microsoft/layoutlmv2-base-uncased
+        model = load_model(model_dir)
+        return model
+    def inference(self, model_input):
+        """
+        Internal inference methods
+        :param model_input: transformed model input data
+        :return: list of inference output in NDArray
+        """
+        # TODO load the model state_dict before running the inference
+        # Do some inference call to engine here and return output
+        with torch.no_grad():
+            inference_outputs = self.model(**model_input)
+            predictions = inference_outputs.logits.argmax(-1).tolist()
+        print("these are predictions",predictions)
+        results = []
+        for i in range(len(predictions)):
+            tmp = dict()
+            tmp[f'output_{i}'] = predictions[i]
+            results.append(tmp)
+        return [results]
+    def postprocess(self, inference_output):
+        print("self._raw_input_data['words']",self._raw_input_data['words'])
+        print("inference_output",inference_output)
+        docs = []
+        k = 0
+        for page, doc_words in enumerate(self._raw_input_data['words']):
+            print(page,doc_words)
+            doc_list = []
+            width, height = self._images_size[page]
+            for i, doc_word in enumerate(doc_words, start=0):
+                word_tagging = None
+                word_labels = []
+                word = dict()
+                word['id'] = k
+                k += 1
+                word['text'] = doc_word
+                word['pageNum'] = page + 1
+                word['box'] = self._raw_input_data['bboxes'][page][i]
+                _normalized_box = normalize_box(
+                    self._raw_input_data['bboxes'][page][i], width, height)
+                for j, box in enumerate(self._processed_data['bbox'].tolist()[page]):
+                    if compare_boxes(box, _normalized_box):
+                        if self.model.config.id2label[inference_output[0][page][f'output_{page}'][j]] != 'O':
+                            word_labels.append(
+                                self.model.config.id2label[inference_output[0][page][f'output_{page}'][j]][2:])
+                        else:
+                            word_labels.append('other')
+                if word_labels != []:
+                    word_tagging = word_labels[0] if word_labels[0] != 'other' else word_labels[-1]
+                else:
+                    word_tagging = 'other'
+                word['label'] = word_tagging
+                word['pageSize'] = {'width': width, 'height': height}
+                if word['label'] != 'other':
+                    doc_list.append(word)
+            spans = []
+            def adjacents(entity): return [
+                adj for adj in doc_list if adjacent(entity, adj)]
+            output_test_tmp = doc_list[:]
+            for entity in doc_list:
+                if adjacents(entity) == []:
+                    spans.append([entity])
+                    output_test_tmp.remove(entity)
+            while output_test_tmp != []:
+                span = [output_test_tmp[0]]
+                output_test_tmp = output_test_tmp[1:]
+                while output_test_tmp != [] and adjacent(span[-1], output_test_tmp[0]):
+                    span.append(output_test_tmp[0])
+                    output_test_tmp.remove(output_test_tmp[0])
+                spans.append(span)
+            output_spans = []
+            for span in spans:
+                if len(span) == 1:
+                    output_span = {"text": span[0]['text'],
+                                   "label": span[0]['label'],
+                                   "words": [{
+                                       'id': span[0]['id'],
+                                       'box': span[0]['box'],
+                                       'text': span[0]['text']
+                                   }],
+                                   }
+                else:
+                    output_span = {"text": ' '.join([entity['text'] for entity in span]),
+                                   "label": span[0]['label'],
+                                   "words": [{
+                                       'id': entity['id'],
+                                       'box': entity['box'],
+                                       'text': entity['text']
+                                   } for entity in span]
+                                   }
+                output_spans.append(output_span)
+            docs.append({f'output': output_spans})
+        return [json.dumps(docs, ensure_ascii=False)]
+    def handle(self, data, context):
+        """
+        Call preprocess, inference and post-process functions
+        :param data: input data
+        :param context: mms context
+        """
+        # print("\nmodel_input\n",data)
+        print("context",context)
+        model_input = self.preprocess(data)
+        print("this is model input",model_input)
+        model_out = self.inference(model_input)
+        print("\nmodel_output\n",model_out)
+        inference_out = self.postprocess(model_out)[0]
+        print("\nprocessed output\n",inference_out)
+        # with open('LayoutlMV3InferenceOutput.json', 'w') as inf_out:
+        #     inf_out.write(inference_out)
+        inference_out_list = json.loads(inference_out)
+        flattened_output_list = get_flattened_output(inference_out_list)
+        print("flattened_output_list",flattened_output_list)
+        if self.name == "cheque":
+            acc_num = "".join(item['text'] for item in flattened_output_list[0]['output'] if item['label'] == 'AN')
+            IFSC = "".join(item['text'] for item in flattened_output_list[0]['output'] if item['label'] == 'IFSC')
+            print("entered cheque\n\n",flattened_output_list,"\n\n")
+            result = {"acc_num":acc_num,
+                      "IFSC":IFSC}
+        if self.name  == "aadhar":
+            aadhar_num = "".join(item['text'] for item in flattened_output_list[0]['output'] if item['label'] == 'AN')
+            print("entered aadhar\n\n",flattened_output_list,"\n\n")
+                # IFSC = "".join(item['text'] for item in flattened_output_list[0]['output'] if item['label'] == 'IFSC')
+            result = {"aadhar_num":aadhar_num}
+        if self.name == "pan":
+            pan_num = "".join(item['text'] for item in flattened_output_list[0]['output'] if item['label'] == 'PAN_VALUE')
+            print("entered pan\n\n",flattened_output_list,"\n\n")
+            # IFSC = "".join(item['text'] for item in flattened_output_list[0]['output'] if item['label'] == 'IFSC')
+            result = {"pan_num":pan_num}
+        if self.name == "gst":
+            gstin_num = "".join(item['text'] for item in flattened_output_list[0]['output'] if item['label'] == 'GSTIN')
+            print("entered gst\n\n",flattened_output_list,"\n\n")
+            # IFSC = "".join(item['text'] for item in flattened_output_list[0]['output'] if item['label'] == 'IFSC')
+            result = {"gstin_num":gstin_num}
+        # if
+        # an_tokens = "".join(item['text'] for item in flattened_output_list[0]['output'] if item['label'] == 'AN')
+        #PAN_VALUE
+        #AN
+        #IFSC
+        # print(f"Concatenated AN tokens: {an_tokens}")
+        # print("this is flattened output",flattened_output_list)
+        for i, flattened_output in enumerate(flattened_output_list):
+            annotate_image(data['image_path'][i], flattened_output)
+        return result
+_service = ModelHandler()
+def handle(data, context,processor,name):
+    # if not _service.initialized:
+    _service.initialize(context,processor,name)
+    # if data is None:
+    #     return None
+    return _service.handle(data, context)

layoutlmv3FineTuning/Layoutlm_inference/model_base_path.py ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ LAYOUTLMV2_BASE_PATH = "microsoft/layoutlmv2-base-uncased"
2	+ LAYOUTLMV3_BASE_PATH = "microsoft/layoutlmv3-base"

layoutlmv3FineTuning/Layoutlm_inference/ocr.py ADDED Viewed

	@@ -0,0 +1,144 @@

+import os
+import pandas as pd
+import os
+os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = "./titanium-scope-436311-t3-966373f5aa2f.json"
+def run_tesseract_on_image(image_path):  # -> tsv output path
+  print("image_path",image_path)
+  image_name = os.path.basename(image_path)
+  image_name = image_name[:image_name.find('.')]
+  error_code = os.system(f'''
+  tesseract "{image_path}" "/content/{image_name}" -l eng tsv
+  ''')
+  if not error_code:
+    return f"/content/{image_name}.tsv"
+  else:
+    raise ValueError('Tesseract OCR Error please verify image format PNG,JPG,JPEG')
+def clean_tesseract_output(tsv_output_path):
+  print("tsv_output_path",tsv_output_path)
+  ocr_df = pd.read_csv(tsv_output_path, sep='\t')
+  ocr_df = ocr_df.dropna()
+  ocr_df = ocr_df.drop(ocr_df[ocr_df.text.str.strip() == ''].index)
+  text_output = ' '.join(ocr_df.text.tolist())
+  words = []
+  for index, row in ocr_df.iterrows():
+    word = {}
+    origin_box = [row['left'], row['top'], row['left'] +
+                  row['width'], row['top']+row['height']]
+    word['word_text'] = row['text']
+    word['word_box'] = origin_box
+    words.append(word)
+  return words
+def detect_text(path):
+    print("this is path:",path)
+    """Detects text in the file."""
+    from google.cloud import vision
+    client = vision.ImageAnnotatorClient()
+    with open(path, "rb") as image_file:
+        content = image_file.read()
+    image = vision.Image(content=content)
+    response = client.text_detection(image=image)
+    texts = response.text_annotations
+    print("Texts:")
+    list_of_dict = []
+    for text in texts[1:]:
+        data_dic = {}
+        print(f'\n"{text.description}"')
+        data_dic["word_text"] = text.description
+        vertices_list = [[int(vertex.x),int(vertex.y)] for vertex in text.bounding_poly.vertices]
+        print("vertices_list",vertices_list)
+        coords = vertices_list
+        sorted_coords = sorted(coords, key=lambda coord: (coord[0] + coord[1]))
+        # Top-left is the first in the sorted list (smallest sum of x, y)
+        top_left = sorted_coords[0]
+        # Bottom-right is the last in the sorted list (largest sum of x, y)
+        bottom_right = sorted_coords[-1]
+        ls = []
+        ls.append(top_left[0])
+        ls.append(top_left[1])
+        ls.append(bottom_right[0])
+        ls.append(bottom_right[1])
+        # print(ls)
+        # ls = []
+        # ls.append(vertices_list[0][0])
+        # ls.append(vertices_list[0][1])
+        # ls.append(vertices_list[2][0])
+        # ls.append(vertices_list[2][1])
+        data_dic["word_box"] = ls
+        list_of_dict.append(data_dic)
+    if response.error.message:
+        raise Exception(
+            "{}\nFor more info on error messages, check: "
+            "https://cloud.google.com/apis/design/errors".format(response.error.message)
+        )
+    return list_of_dict
+def prepare_batch_for_inference(image_paths):
+  # tesseract_outputs is a list of paths
+  inference_batch = dict()
+  # tesseract_outputs = [run_tesseract_on_image(
+  #     image_path) for image_path in image_paths]
+  # tesseract_outputs = []
+  # for image_path in image_paths:
+  #   output = run_tesseract_on_image(image_path)
+  #   tesseract_outputs.append(output)
+  # clean_outputs is a list of lists
+  # clean_outputs = [clean_tesseract_output(
+      # tsv_path) for tsv_path in tesseract_outputs]
+  # clean_outputs = []
+  # for tsv_path in tesseract_outputs:
+  #   output = clean_tesseract_output(tsv_path)
+  #   clean_outputs.append(output)
+  clean_outputs = []
+  for image_path in image_paths:
+    output = detect_text(image_path)
+    clean_outputs.append(output)
+  print("clean_outputs",clean_outputs)
+  word_lists = [[word['word_text'] for word in clean_output]
+                for clean_output in clean_outputs]
+  boxes_lists = [[word['word_box'] for word in clean_output]
+                 for clean_output in clean_outputs]
+  inference_batch = {
+      "image_path": image_paths,
+      "bboxes": boxes_lists,
+      "words": word_lists
+  }
+  return inference_batch

layoutlmv3FineTuning/Layoutlm_inference/utils.py ADDED Viewed

	@@ -0,0 +1,68 @@

+import numpy as np
+from transformers import AutoModelForTokenClassification, AutoProcessor
+from dotenv import load_dotenv
+import os
+# Load .env file
+load_dotenv()
+# Access variables
+dummy_key = os.getenv("dummy_key")
+# secret_key = os.getenv("SECRET_KEY")
+# debug_mode = os.getenv("DEBUG")
+# print(f"Database URL: {database_url}")
+# print(f"Secret Key: {secret_key}")
+# print(f"Debug Mode: {debug_mode}")
+def normalize_box(bbox, width, height):
+    return [
+        int(bbox[0]*(1000/width)),
+        int(bbox[1]*(1000/height)),
+        int(bbox[2]*(1000/width)),
+        int(bbox[3]*(1000/height)),
+    ]
+def compare_boxes(b1, b2):
+    b1 = np.array([c for c in b1])
+    b2 = np.array([c for c in b2])
+    equal = np.array_equal(b1, b2)
+    return equal
+def unnormalize_box(bbox, width, height):
+    return [
+        width * (bbox[0] / 1000),
+        height * (bbox[1] / 1000),
+        width * (bbox[2] / 1000),
+        height * (bbox[3] / 1000),
+    ]
+def adjacent(w1, w2):
+  if w1['label'] == w2['label'] and abs(w1['id'] - w2['id']) == 1:
+    return True
+  return False
+def random_color():
+  return np.random.randint(0, 255, 3)
+def image_label_2_color(annotation):
+  if 'output' in annotation.keys():
+    image_labels = set([span['label'] for span in annotation['output']])
+    label2color = {f'{label}': (random_color()[0], random_color()[
+                                1], random_color()[2]) for label in image_labels}
+    return label2color
+  else:
+    raise ValueError('please use "output" as annotation key')
+def load_model(model_path):
+    model = AutoModelForTokenClassification.from_pretrained(model_path,use_auth_token=dummy_key)
+    return model
+def load_processor(model_name_or_path):
+    processor = AutoProcessor.from_pretrained(
+        model_name_or_path, apply_ocr=False,use_auth_token=dummy_key)
+    return processor

layoutlmv3FineTuning/README.md ADDED Viewed

	@@ -0,0 +1,3 @@

+# layoutlmFineTuning
+this repo aims to train a layoutlmv3 model using __ubiai__ ocr annotated dataset with a preprocess and train  scripts  and then test the model via inference script
+* Note that the provided inference Module support both Layoutlmv3 and Layoutlmv2 models

layoutlmv3FineTuning/inference_handler_modified.py ADDED Viewed

	@@ -0,0 +1,213 @@

+from .utils import load_model,load_processor,normalize_box,compare_boxes,adjacent
+from .model_base_path import LAYOUTLMV2_BASE_PATH,LAYOUTLMV3_BASE_PATH
+from .annotate_image import get_flattened_output,annotate_image
+from PIL import Image,ImageDraw, ImageFont
+import logging
+import torch
+import json
+logger = logging.getLogger(__name__)
+class ModelHandler(object):
+    """
+    A base Model handler implementation.
+    """
+    def __init__(self):
+        self.model = None
+        self.model_dir = None
+        self.device = 'cpu'
+        self.error = None
+        # self._context = None
+        # self._batch_size = 0
+        self.initialized = False
+        self._raw_input_data = None
+        self._processed_data = None
+        self._images_size = None
+    def initialize(self, context):
+        """
+        Initialize model. This will be called during model loading time
+        :param context: Initial context contains model server system properties.
+        :return:
+        """
+        logger.info("Loading transformer model")
+        self._context = context
+        properties = self._context
+        # self._batch_size = properties["batch_size"] or 1
+        self.model_dir = properties.get("model_dir")
+        self.model = self.load(self.model_dir)
+        self.initialized = True
+    def preprocess(self, batch):
+        """
+        Transform raw input into model input data.
+        :param batch: list of raw requests, should match batch size
+        :return: list of preprocessed model input data
+        """
+        # Take the input data and pre-process it make it inference ready
+        # assert self._batch_size == len(batch), "Invalid input batch size: {}".format(len(batch))
+        inference_dict = batch
+        self._raw_input_data = inference_dict
+        model_name_or_path = None
+        if 'v2' in self.model.config.architectures[0]:
+            model_name_or_path = LAYOUTLMV2_BASE_PATH
+        elif 'v3' in self.model.config.architectures[0]:
+            model_name_or_path = LAYOUTLMV3_BASE_PATH
+        else:
+            raise ValueError('invalid model architecture, please make sure the model is either Layoutlmv2 or Layoutlmv3')
+        processor = load_processor(model_name_or_path)
+        images = [Image.open(path).convert("RGB")
+                  for path in inference_dict['image_path']]
+        self._images_size = [img.size for img in images]
+        words = inference_dict['words']
+        boxes = [[normalize_box(box, images[i].size[0], images[i].size[1])
+                  for box in doc] for i, doc in enumerate(inference_dict['bboxes'])]
+        encoded_inputs = processor(
+            images, words, boxes=boxes, return_tensors="pt", padding="max_length", truncation=True)
+        self._processed_data = encoded_inputs
+        return encoded_inputs
+    def load(self, model_dir):
+        """The load handler is responsible for loading the hunggingface transformer model.
+        Returns:
+            hf_pipeline (Pipeline): A Hugging Face Transformer pipeline.
+        """
+        # TODO model dir should be microsoft/layoutlmv2-base-uncased
+        model = load_model(model_dir)
+        return model
+    def inference(self, model_input):
+        """
+        Internal inference methods
+        :param model_input: transformed model input data
+        :return: list of inference output in NDArray
+        """
+        # TODO load the model state_dict before running the inference
+        # Do some inference call to engine here and return output
+        with torch.no_grad():
+            inference_outputs = self.model(**model_input)
+            predictions = inference_outputs.logits.argmax(-1).tolist()
+        results = []
+        for i in range(len(predictions)):
+            tmp = dict()
+            tmp[f'output_{i}'] = predictions[i]
+            results.append(tmp)
+        return [results]
+    def postprocess(self, inference_output):
+        docs = []
+        k = 0
+        for page, doc_words in enumerate(self._raw_input_data['words']):
+            doc_list = []
+            width, height = self._images_size[page]
+            for i, doc_word in enumerate(doc_words, start=0):
+                word_tagging = None
+                word_labels = []
+                word = dict()
+                word['id'] = k
+                k += 1
+                word['text'] = doc_word
+                word['pageNum'] = page + 1
+                word['box'] = self._raw_input_data['bboxes'][page][i]
+                _normalized_box = normalize_box(
+                    self._raw_input_data['bboxes'][page][i], width, height)
+                for j, box in enumerate(self._processed_data['bbox'].tolist()[page]):
+                    if compare_boxes(box, _normalized_box):
+                        if self.model.config.id2label[inference_output[0][page][f'output_{page}'][j]] != 'O':
+                            word_labels.append(
+                                self.model.config.id2label[inference_output[0][page][f'output_{page}'][j]][2:])
+                        else:
+                            word_labels.append('other')
+                if word_labels != []:
+                    word_tagging = word_labels[0] if word_labels[0] != 'other' else word_labels[-1]
+                else:
+                    word_tagging = 'other'
+                word['label'] = word_tagging
+                word['pageSize'] = {'width': width, 'height': height}
+                if word['label'] != 'other':
+                    doc_list.append(word)
+            spans = []
+            def adjacents(entity): return [
+                adj for adj in doc_list if adjacent(entity, adj)]
+            output_test_tmp = doc_list[:]
+            for entity in doc_list:
+                if adjacents(entity) == []:
+                    spans.append([entity])
+                    output_test_tmp.remove(entity)
+            while output_test_tmp != []:
+                span = [output_test_tmp[0]]
+                output_test_tmp = output_test_tmp[1:]
+                while output_test_tmp != [] and adjacent(span[-1], output_test_tmp[0]):
+                    span.append(output_test_tmp[0])
+                    output_test_tmp.remove(output_test_tmp[0])
+                spans.append(span)
+            output_spans = []
+            label_to_span_map = {}
+            for span in spans:
+                label = span[0]['label']
+                if label in label_to_span_map:
+        # If the label already exists, merge the current span with the existing span
+                   existing_span = label_to_span_map[label]
+                   existing_span["text"] += ' ' + ' '.join([entity['text'] for entity in span])
+                   existing_span["words"].extend([{
+                       'id': entity['id'],
+                       'box': entity['box'],
+                       'text': entity['text']
+                   } for entity in span])
+                else:
+        # Create a new span for this label if it doesn't exist
+                    output_span = {
+                        "text": ' '.join([entity['text'] for entity in span]),
+                        "label": label,
+                        "words": [{
+                            'id': entity['id'],
+                            'box': entity['box'],
+                            'text': entity['text']
+                        } for entity in span]
+                    }
+                    label_to_span_map[label] = output_span
+# Convert label_to_span_map to output_spans
+            output_spans = list(label_to_span_map.values())
+            docs.append({f'output': output_spans})
+        return [json.dumps(docs, ensure_ascii=False)]
+    def handle(self, data, context):
+        """
+        Call preprocess, inference and post-process functions
+        :param data: input data
+        :param context: mms context
+        """
+        model_input = self.preprocess(data)
+        model_out = self.inference(model_input)
+        inference_out = self.postprocess(model_out)[0]
+        import os
+        print("cwd",os.getcwd())
+        with open('LayoutlMV3InferenceOutput.json', 'w') as inf_out:
+            inf_out.write(inference_out)
+        inference_out_list = json.loads(inference_out)
+        flattened_output_list = get_flattened_output(inference_out_list)
+        for i, flattened_output in enumerate(flattened_output_list):
+            annotate_image(data['image_path'][i], flattened_output)
+_service = ModelHandler()
+def handle(data, context):
+    if not _service.initialized:
+        _service.initialize(context)
+    if data is None:
+        return None
+    return _service.handle(data, context)

layoutlmv3FineTuning/preprocess.py ADDED Viewed

	@@ -0,0 +1,163 @@

+import pandas as pd
+import numpy as np
+import os
+import argparse
+from datasets.features import ClassLabel
+from transformers import AutoProcessor
+from sklearn.model_selection import train_test_split
+from datasets import Features, Sequence, ClassLabel, Value, Array2D, Array3D, Dataset
+from datasets import Image as Img
+from PIL import Image
+import warnings
+warnings.filterwarnings('ignore')
+def read_text_file(file_path):
+    with open(file_path, 'r') as f:
+        return (f.readlines())
+def prepare_examples(examples):
+  images = examples[image_column_name]
+  words = examples[text_column_name]
+  boxes = examples[boxes_column_name]
+  word_labels = examples[label_column_name]
+  encoding = processor(images, words, boxes=boxes, word_labels=word_labels,
+                       truncation=True, padding="max_length")
+  return encoding
+def get_zip_dir_name():
+    try:
+        os.chdir('/content/data')
+        dir_list = os.listdir()
+        any_file_name = dir_list[0]
+        zip_dir_name = any_file_name[:any_file_name.find('\\')]
+        if all(list(map(lambda x: x.startswith(zip_dir_name), dir_list))):
+            return zip_dir_name
+        return False
+    finally:
+        os.chdir('./../')
+def filter_out_unannotated(example):
+    tags = example['ner_tags']
+    return not all([tag == label2id['O'] for tag in tags])
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--valid_size')
+    parser.add_argument('--output_path')
+    args = parser.parse_args()
+    TEST_SIZE = float(args.valid_size)
+    OUTPUT_PATH = args.output_path
+    os.makedirs(args.output_path, exist_ok=True)
+    files = {}
+    zip_dir_name = get_zip_dir_name()
+    if zip_dir_name:
+        files['train_box'] = read_text_file(os.path.join(
+            os.curdir, 'data', f'{zip_dir_name}\\{zip_dir_name}_box.txt'))
+        files['train_image'] = read_text_file(os.path.join(
+            os.curdir, 'data', f'{zip_dir_name}\\{zip_dir_name}_image.txt'))
+        files['train'] = read_text_file(os.path.join(
+            os.curdir, 'data', f'{zip_dir_name}\\{zip_dir_name}.txt'))
+    else:
+        for f in os.listdir():
+            if f.endswith('.txt') and f.find('box') != -1:
+                files['train_box'] = read_text_file(os.path.join(os.curdir, f))
+            elif f.endswith('.txt') and f.find('image') != -1:
+                files['train_image'] = read_text_file(
+                    os.path.join(os.curdir, f))
+            elif f.endswith('.txt') and f.find('labels') == -1:
+                files['train'] = read_text_file(os.path.join(os.curdir, f))
+    assert(len(files['train']) == len(files['train_box']))
+    assert(len(files['train_box']) == len(files['train_image']))
+    assert(len(files['train_image']) == len(files['train']))
+    images = {}
+    for i, row in enumerate(files['train_image']):
+        if row != '\n':
+            image_name = row.split('\t')[-1]
+            images.setdefault(image_name.replace('\n', ''), []).append(i)
+    words, bboxes, ner_tags, image_path = [], [], [], []
+    for image, rows in images.items():
+        words.append([row.split('\t')[0].replace('\n', '')
+                     for row in files['train'][rows[0]:rows[-1]+1]])
+        ner_tags.append([row.split('\t')[1].replace('\n', '')
+                        for row in files['train'][rows[0]:rows[-1]+1]])
+        bboxes.append([box.split('\t')[1].replace('\n', '')
+                      for box in files['train_box'][rows[0]:rows[-1]+1]])
+        if zip_dir_name:
+            image_path.append(f"/content/data/{zip_dir_name}\\{image}")
+        else:
+            image_path.append(f"/content/data/{image}")
+    labels = list(set([tag for doc_tag in ner_tags for tag in doc_tag]))
+    id2label = {v: k for v, k in enumerate(labels)}
+    label2id = {k: v for v, k in enumerate(labels)}
+    dataset_dict = {
+        'id': range(len(words)),
+        'tokens': words,
+        'bboxes': [[list(map(int, bbox.split())) for bbox in doc] for doc in bboxes],
+        'ner_tags': [[label2id[tag] for tag in ner_tag] for ner_tag in ner_tags],
+        'image': [Image.open(path).convert("RGB") for path in image_path]
+    }
+    #raw features
+    features = Features({
+        'id': Value(dtype='string', id=None),
+        'tokens': Sequence(feature=Value(dtype='string', id=None), length=-1, id=None),
+        'bboxes': Sequence(feature=Sequence(feature=Value(dtype='int64', id=None), length=-1, id=None), length=-1, id=None),
+        'ner_tags': Sequence(feature=ClassLabel(num_classes=len(labels), names=labels, names_file=None, id=None), length=-1, id=None),
+        'image': Img(decode=True, id=None)
+    })
+    full_data_set = Dataset.from_dict(dataset_dict, features=features)
+    dataset = full_data_set.train_test_split(test_size=TEST_SIZE)
+    dataset["train"] = dataset["train"].filter(filter_out_unannotated)
+    processor = AutoProcessor.from_pretrained(
+        "microsoft/layoutlmv3-base", apply_ocr=False)
+    features = dataset["train"].features
+    column_names = dataset["train"].column_names
+    image_column_name = "image"
+    text_column_name = "tokens"
+    boxes_column_name = "bboxes"
+    label_column_name = "ner_tags"
+    # we need to define custom features for `set_format` (used later on) to work properly
+    features = Features({
+        'pixel_values': Array3D(dtype="float32", shape=(3, 224, 224)),
+        'input_ids': Sequence(feature=Value(dtype='int64')),
+        'attention_mask': Sequence(Value(dtype='int64')),
+        'bbox': Array2D(dtype="int64", shape=(512, 4)),
+        'labels': Sequence(ClassLabel(names=labels)),
+    })
+    train_dataset = dataset["train"].map(
+        prepare_examples,
+        batched=True,
+        remove_columns=column_names,
+        features=features,
+    )
+    eval_dataset = dataset["test"].map(
+        prepare_examples,
+        batched=True,
+        remove_columns=column_names,
+        features=features,
+    )
+    train_dataset.set_format("torch")
+    if not OUTPUT_PATH.endswith('/'):
+        OUTPUT_PATH += '/'
+    train_dataset.save_to_disk(f'{OUTPUT_PATH}train_split')
+    eval_dataset.save_to_disk(f'{OUTPUT_PATH}eval_split')
+    dataset.save_to_disk(f'{OUTPUT_PATH}raw_data')

layoutlmv3FineTuning/run_inference.py ADDED Viewed

	@@ -0,0 +1,31 @@

+import argparse
+from asyncio.log import logger
+from Layoutlm_inference.ocr import prepare_batch_for_inference
+from Layoutlm_inference.inference_handler import handle
+import logging
+import os
+if __name__ == "__main__":
+    # try:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model_path", type=str)
+    parser.add_argument("--images_path", type=str)
+    args, _ = parser.parse_known_args()
+    images_path = args.images_path
+    image_files = os.listdir(images_path)
+    images_path = [images_path+f'/{image_file}' for image_file in image_files]
+    inference_batch = prepare_batch_for_inference(images_path)
+    context = {"model_dir": args.model_path}
+    output_ls = handle(inference_batch,context)
+    print("output_ls",output_ls)
+    # except Exception as err:
+        # os.makedirs('log', exist_ok=True)
+        # logging.basicConfig(filename='log/error_output.log', level=logging.ERROR,
+        #                     format='%(asctime)s %(levelname)s %(name)s %(message)s')
+        # logger = logging.getLogger(__name__)
+        # logger.error(err)

layoutlmv3FineTuning/run_inferenceM.py ADDED Viewed

	@@ -0,0 +1,32 @@

+import argparse
+from asyncio.log import logger
+from Layoutlm_inference.ocr import prepare_batch_for_inference
+from Layoutlm_inference.inference_handler import handle
+import logging
+import os
+if __name__ == "__main__":
+    # try:
+    parser = argparse.ArgumentParser()
+    parser.add_argument("--model_path", type=str, required=True)
+    parser.add_argument("--image_path", type=str, required=True)  # single image path
+    args = parser.parse_args()
+    # Expecting a single image file
+    image_path = args.image_path
+    # Ensure the file exists before processing
+    if not os.path.isfile(image_path):
+        raise FileNotFoundError(f"The provided image path does not exist: {image_path}")
+    # Prepare batch for a single image
+    inference_batch = prepare_batch_for_inference([image_path])  # pass as a list
+    context = {"model_dir": args.model_path}
+    # Handle the inference
+    handle(inference_batch, context)

multiple_request.py ADDED Viewed

	@@ -0,0 +1,56 @@

+import requests
+import concurrent.futures
+import time
+# Define the API endpoint
+#http://43.204.234.114:8000/api/aadhar_ocr
+# API_URL = "http://127.0.0.1:8000/api/aadhar_ocr"
+API_URL = "http://localhost:8000/api/aadhar_ocr"
+# Define the file paths
+FILE_PATHS = {
+    "aadhar_file": "uploads/aadhar/test_one.jpg",
+    # "pan_file": "test_images_pan/6ea33087.jpeg",
+    # "cheque_file": "test_images_cheque/0f81678a.jpeg",
+    # "gst_file": "test_images_gst/0a52fbcb_page3_image_0.jpg",
+}
+# Function to send a single POST request
+def send_request():
+    try:
+        start_time = time.time()
+        # Open files dynamically for each request
+        files = {key: open(path, "rb") for key, path in FILE_PATHS.items()}
+        response = requests.post(API_URL, files=files)
+        print("this is response\n\n",response)
+        end_time = time.time()
+        print(f"\nTime taken for one request: {end_time - start_time:.2f} seconds")
+        # Close the files after the request
+        for file in files.values():
+            file.close()
+        return response.status_code, response.text
+    except requests.exceptions.RequestException as e:
+        return "Error", str(e)
+# Main function to send multiple concurrent requests
+def test_api_concurrency(num_requests):
+    start_time = time.time()
+    with concurrent.futures.ThreadPoolExecutor() as executor:
+        # Launch multiple requests concurrently
+        results = list(executor.map(lambda _: send_request(), range(num_requests)))
+    end_time = time.time()
+    # Print results
+    for idx, (status, text) in enumerate(results):
+        print(f"Request {idx + 1}: Status Code: {status}, Response: {text}")
+    print(f"\nTotal time taken: {end_time - start_time:.2f} seconds")
+# Number of concurrent requests
+NUM_REQUESTS = 8  # Adjust this number based on your testing needs
+if __name__ == "__main__":
+    test_api_concurrency(NUM_REQUESTS)

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+fastapi
+uvicorn[standard]
+python-multipart
+git+https://github.com/huggingface/transformers.git
+git+https://github.com/huggingface/datasets.git
+transformers[torch]
+pillow
+google-cloud-vision
+python-dotenv

sample.py ADDED Viewed

	@@ -0,0 +1,22 @@

+import requests
+# Define the API endpoint
+# url = "http://127.0.0.0:7860/api/home"
+post_url = "http://localhost:7860/api/aadhar_ocr"
+# response = requests.get(url)
+# print()
+# Define the file pathscd
+files = {
+    "aadhar_file": open("/home/javmulla/model_one/test_images_aadhar/test_two.jpg", "rb"),
+    "pan_file": open("/home/javmulla/model_one/test_images_pan/6ea33087.jpeg", "rb"),
+    "cheque_file": open("/home/javmulla/model_one/test_images_cheque/0f81678a.jpeg", "rb"),
+    "gst_file": open("/home/javmulla/model_one/test_images_gst/0a52fbcb_page3_image_0.jpg", "rb"),
+}
+response = requests.post(post_url, files=files)
+# # Print the response
+print("Status Code:", response.status_code)
+print("Response Text:", response.text)

titanium-scope-436311-t3-966373f5aa2f.json ADDED Viewed

	@@ -0,0 +1,13 @@

+{
+  "type": "service_account",
+  "project_id": "titanium-scope-436311-t3",
+  "private_key_id": "966373f5aa2f27bb48fee7cd9d4afd6b1b432387",
+  "private_key": "-----BEGIN PRIVATE KEY-----\nMIIEvAIBADANBgkqhkiG9w0BAQEFAASCBKYwggSiAgEAAoIBAQCUMbbv+N8zoYiY\nBECTcq6vZR/biV5sYlToXujzDw5iYHtMAPX5V6Z2ORgOuvq4WTAmozwKG2LLrapr\ntFKqeKBAhQR79Jlrek6efYwIgI/PVtDNhvylBg7ZfINX5HVE9tHxSS66jKEQwBmq\nW4tILP3BvkxDv0FhjkKtO9D1tm3omQIhg7B0+0T9IjGlQ/8Y67NMHDuP4MWTP2r6\njU9ulYp3r10ZSc+jZHX3jXA0UCM5LehYorZb3/GEldMdKvZ4RJvMDaolbu6aE9zY\nTkrVuzo6uNJgv1h+FYTjvnbjT2AYq0H4KcLXgQZVf0F72ibmisjMqA0XehIrTbhY\nbEGe26oZAgMBAAECggEAPhdSYdtxkY110NO/Rsg/PsftACvfPyQ4FSBnFCfTzA5G\nusKQTQeXfGNRnCJlmEXuMdIk/ssYquQ5ymTEWh6ubjoNde43NdwKAsfxm0JafvIO\nDH8pbe9K238a/QGAzQNpVWJnTMxNU9pZJpKymewX6kxUYfJJb5mOgEzWsYzdIh4O\nl1XuylR2m0OK+NgAqhuFvqFkRqem6tlfDhGl+dIQNZ60OVXew0xEMV6x/z1OYTqR\nS+S0GUcfZB6OVIv/anKZ8s49noBuR/JkMX2sIaXCTcicL0o44n2ROUw0jcxKxi+6\nv8IQNcxm28b9SbPNgxb4KCdCOqF9iePcLLLr4S/QHQKBgQDEwi51FynUzCm4sEVE\ndEa32xkrkYe/gZxg4kTH5Sn4Ts1Xidg2z1HreaxTM3Nomu+2PUWrFu7n2YwAkvdx\nrWGoegRZNNTlga8yFid24BqjeszS/hO5Fg7PGN+beDcp63NVirYTkQjZ8FM6UA50\nKZ8c6Qyt/bGaihDUWsXdbuTobwKBgQDA0EatcxAW3sl7f6Mw82wHiC2mmMF7g+f0\ntC0B7xirrf9TSSXDSwYxWUJ6rAxTjoskjmi+lBw+XAIDz5bWPkuwya0zeOdCh9yp\ndEvv3pm8puPzwFNLh7OWyROW3cmV5C1tLGqdGyYr7WkHGXAZCkc9U5wFQY683j5o\n3b7skCSJ9wKBgCH7g7iXapMlO+N5Fk2PY5NnlP5QYUizIwYcrlJ0Av6u5YpD9YLp\n5bUsy5WHIlyjvdkU1g6JpHOIwERtHa2Vi3Nkt5GMrWSCNHcLGn/OjutDT1L1rQRf\niek824nnhmeIEeBpV68jcorplgZRQ13OvntoyNbYJS+SvvtePiRTfdejAoGATqQk\nT5ZAl7NiZjaW7t45z5ChXfOr5p7UOqBKQyGr5Enhe6y39EFjUzlevf3yQRpAcjaL\nTj/GjUClqbw/fz6FTKPVOsszN5WGUK8YUctu1N0U2FQ3JPVCMFvu23e2QqaASKj3\nCwEJvpzkW3rql6vzhnXViudEOpBC0C6xMndQD90CgYAQYkIA1O5aGjT8TMhxa1l+\ng0/mxmeVowUUzG1Yntr3LQKQE/+tRZ/gE9N57PutecYIrWmOGPhEF89LF4nI1uqF\nWEdatBSr80jcHL8LiNPdVHwa8G3AH7MoR5Tq7RzYyDKxJKtf9IZJZ9dMve0iVb1R\n5J6yvGjzRzs7xnG3lvUpYQ==\n-----END PRIVATE KEY-----\n",
+  "client_email": "vision-service@titanium-scope-436311-t3.iam.gserviceaccount.com",
+  "client_id": "105182341558314183890",
+  "auth_uri": "https://accounts.google.com/o/oauth2/auth",
+  "token_uri": "https://oauth2.googleapis.com/token",
+  "auth_provider_x509_cert_url": "https://www.googleapis.com/oauth2/v1/certs",
+  "client_x509_cert_url": "https://www.googleapis.com/robot/v1/metadata/x509/vision-service%40titanium-scope-436311-t3.iam.gserviceaccount.com",
+  "universe_domain": "googleapis.com"
+}

uploads/aadhar/test_one.jpg ADDED Viewed

uploads/aadhar/test_two.jpg ADDED Viewed

uploads/cheque/0f81678a.jpeg ADDED Viewed

uploads/gst/0a52fbcb_page3_image_0.jpg ADDED Viewed

uploads/pan/6ea33087.jpeg ADDED Viewed