Spaces:

MJobe
/

document-vqa-v2

Sleeping

App Files Files Community

MJobe commited on Dec 12, 2023

Commit

8700a34

•

1 Parent(s): f198fb3

Update main.py

Browse files

Files changed (1) hide show

main.py +16 -11

main.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from fastapi import FastAPI, File, UploadFile, Form
 from fastapi.responses import JSONResponse
 from transformers import pipeline
@@ -8,6 +9,8 @@ app = FastAPI()
 # Use a pipeline as a high-level helper
 nlp_qa = pipeline("document-question-answering", model="impira/layoutlm-document-qa")
 description = """
 ## Image-based Document QA
@@ -52,6 +55,7 @@ async def perform_document_qa(
     except Exception as e:
         return JSONResponse(content=f"Error processing file: {str(e)}", status_code=500)
 @app.post("/pdfUpload/", description=description)
 async def load_file(
     file: UploadFile = File(...),
@@ -61,25 +65,26 @@ async def load_file(
         # Read the uploaded file as bytes
         contents = await file.read()
-        # Open the image using PIL
-        image = Image.open(BytesIO(contents))
-        # Perform document question answering for each question using LayoutLM-based model
         answers_dict = {}
         for question in questions.split(','):
-            result = nlp_qa(
-                image,
-                question.strip()
-            )
-            # Access the 'answer' key from the first item in the result list
-            answer = result[0]['answer']
             # Format the question as a string without extra characters
             formatted_question = question.strip("[]")
-            answers_dict[formatted_question] = answer
         return answers_dict
     except Exception as e:
-        return JSONResponse(content=f"Error processing file: {str(e)}", status_code=500)

+import fitz
 from fastapi import FastAPI, File, UploadFile, Form
 from fastapi.responses import JSONResponse
 from transformers import pipeline
 # Use a pipeline as a high-level helper
 nlp_qa = pipeline("document-question-answering", model="impira/layoutlm-document-qa")
+# Use a pipeline as a high-level helper for NER
+nlp_ner = pipeline("ner", model="microsoft/layoutlm-base-ner")
 description = """
 ## Image-based Document QA
     except Exception as e:
         return JSONResponse(content=f"Error processing file: {str(e)}", status_code=500)
 @app.post("/pdfUpload/", description=description)
 async def load_file(
     file: UploadFile = File(...),
         # Read the uploaded file as bytes
         contents = await file.read()
+        # Extract text from the PDF using PyMuPDF (fitz)
+        pdf_document = fitz.open("file.pdf", pdf_bytes=contents)
+        text_content = ""
+        for page_num in range(pdf_document.page_count):
+            page = pdf_document[page_num]
+            text_content += page.get_text()
+        # Perform named entity recognition for each question using LayoutLM-based NER
         answers_dict = {}
         for question in questions.split(','):
+            result = nlp_ner(text_content, question.strip())
+            # Extract the named entity from the result
+            named_entity = result[0]['word'] if result else "Not Found"
             # Format the question as a string without extra characters
             formatted_question = question.strip("[]")
+            answers_dict[formatted_question] = named_entity
         return answers_dict
     except Exception as e:
+        return JSONResponse(content=f"Error processing PDF file: {str(e)}", status_code=500)