Spaces:

MJobe
/

document-vqa-v2

Running

App Files Files Community

MJobe commited on Dec 15, 2023

Commit

836458e

•

1 Parent(s): 1fb76bb

Update main.py

Browse files

Files changed (1) hide show

main.py +14 -21

main.py CHANGED Viewed

@@ -1,21 +1,23 @@
 import fitz
-from fastapi import FastAPI, File, UploadFile, Form
 from fastapi.responses import JSONResponse
 from transformers import pipeline
 from PIL import Image
 from io import BytesIO
-import pytesseract
 from starlette.middleware import Middleware
 from starlette.middleware.cors import CORSMiddleware
 app = FastAPI()
-# Use a pipeline as a high-level helper for question answering
-nlp_qa = pipeline('question-answering', model='bert-large-uncased-whole-word-masking-finetuned-squad', tokenizer='bert-large-uncased-whole-word-masking-finetuned-squad')
 description = """
 ## Image-based Document QA
-This API performs document question answering using a BERT-based model.
 ### Endpoints:
 - **POST /uploadfile/:** Upload an image file to extract text and answer provided questions.
@@ -36,19 +38,16 @@ async def perform_document_qa(
         # Open the image using PIL
         image = Image.open(BytesIO(contents))
-        # Perform OCR to extract text from the image
-        text = extract_text_from_image(image)
-        # Perform document question answering for each question using BERT-based model
         answers_dict = {}
         for question in questions.split(','):
-            result = nlp_qa({
-                'question': question.strip(),
-                'context': text
-            })
-            # Access the 'answer' key from the result
-            answer = result['answer']
             # Format the question as a string without extra characters
             formatted_question = question.strip("[]")
@@ -59,12 +58,6 @@ async def perform_document_qa(
     except Exception as e:
         return JSONResponse(content=f"Error processing file: {str(e)}", status_code=500)
-def extract_text_from_image(image):
-    # Perform OCR to extract text from the image using Tesseract
-    text = pytesseract.image_to_string(image, lang='eng')
-    return text
 @app.post("/pdfQA/", description=description)
 async def pdf_question_answering(
     file: UploadFile = File(...),

 import fitz
+from fastapi import FastAPI, File, UploadFile, Form, Request, Response
 from fastapi.responses import JSONResponse
 from transformers import pipeline
 from PIL import Image
 from io import BytesIO
 from starlette.middleware import Middleware
 from starlette.middleware.cors import CORSMiddleware
 app = FastAPI()
+# Use a pipeline as a high-level helper
+nlp_qa = pipeline("document-question-answering", model="impira/layoutlm-invoices")
+# Use a pipeline as a high-level helper
+nlp_ner = pipeline('question-answering', model='deepset/roberta-base-squad2', tokenizer='deepset/roberta-base-squad2')
 description = """
 ## Image-based Document QA
+This API performs document question answering using a LayoutLM-based model.
 ### Endpoints:
 - **POST /uploadfile/:** Upload an image file to extract text and answer provided questions.
         # Open the image using PIL
         image = Image.open(BytesIO(contents))
+        # Perform document question answering for each question using LayoutLM-based model
         answers_dict = {}
         for question in questions.split(','):
+            result = nlp_qa(
+                image,
+                question.strip()
+            )
+            # Access the 'answer' key from the first item in the result list
+            answer = result[0]['answer']
             # Format the question as a string without extra characters
             formatted_question = question.strip("[]")
     except Exception as e:
         return JSONResponse(content=f"Error processing file: {str(e)}", status_code=500)
 @app.post("/pdfQA/", description=description)
 async def pdf_question_answering(
     file: UploadFile = File(...),