Spaces:

MJobe
/

document-vqa-v2

Sleeping

App Files Files Community

MJobe commited on Dec 12, 2023

Commit

d3f8141

•

1 Parent(s): 2181fee

Update main.py

Browse files

Files changed (1) hide show

main.py +13 -19

main.py CHANGED Viewed

@@ -1,23 +1,17 @@
 from io import BytesIO
 from PIL import Image
 from fastapi import FastAPI, File, UploadFile, Form
 from fastapi.responses import JSONResponse
-import fitz
 from transformers import pipeline
-import requests
-from typing import List
-from pytesseract import pytesseract
 app = FastAPI()
-# Load a BERT-based question answering pipeline
-nlp_qa = pipeline('question-answering', model='bert-large-uncased-whole-word-masking-finetuned-squad')
 description = """
 ## Image-based Document QA
-This API extracts text from an uploaded image using OCR and performs document question answering using a BERT-based model.
 ### Endpoints:
 - **POST /uploadfile/:** Upload an image file to extract text and answer provided questions.
@@ -44,13 +38,13 @@ async def perform_document_qa(
         # Split the questions string into a list
         question_list = [q.strip() for q in questions.split(',')]
-        # Perform document question answering for each question using BERT-based model
         answers_dict = {}
         for question in question_list:
-            result = nlp_qa({
-                'question': question,
-                'context': text_content
-            })
             answers_dict[question] = result['answer']
         return answers_dict
@@ -66,13 +60,13 @@ async def load_file(
         # Read the uploaded file as bytes
         contents = await file.read()
-        # Perform document question answering for each question using BERT-based model
         answers_dict = {}
         for question in questions.split(','):
-            result = nlp_qa({
-                'question': question.strip(),
-                'context': contents.decode('utf-8')  # Assuming the content is text, adjust as needed
-            })
             answers_dict[question] = result['answer']
         return answers_dict

 from io import BytesIO
 from PIL import Image
 from fastapi import FastAPI, File, UploadFile, Form
 from fastapi.responses import JSONResponse
 from transformers import pipeline
 app = FastAPI()
+# Use a pipeline as a high-level helper
+nlp_qa = pipeline("document-question-answering", model="impira/layoutlm-document-qa")
 description = """
 ## Image-based Document QA
+This API extracts text from an uploaded image using OCR and performs document question answering using a LayoutLM-based model.
 ### Endpoints:
 - **POST /uploadfile/:** Upload an image file to extract text and answer provided questions.
         # Split the questions string into a list
         question_list = [q.strip() for q in questions.split(',')]
+        # Perform document question answering for each question using LayoutLM-based model
         answers_dict = {}
         for question in question_list:
+            result = nlp_qa(
+                text_content,
+                question
+            )
             answers_dict[question] = result['answer']
         return answers_dict
         # Read the uploaded file as bytes
         contents = await file.read()
+        # Perform document question answering for each question using LayoutLM-based model
         answers_dict = {}
         for question in questions.split(','):
+            result = nlp_qa(
+                contents.decode('utf-8'),  # Assuming the content is text, adjust as needed
+                question.strip()
+            )
             answers_dict[question] = result['answer']
         return answers_dict