MJobe commited on
Commit
0ddbc70
1 Parent(s): 1106695

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +21 -42
main.py CHANGED
@@ -21,6 +21,7 @@ app.add_middleware(
21
  )
22
 
23
  nlp_qa = pipeline("document-question-answering", model="jinhybr/OCR-DocVQA-Donut")
 
24
 
25
  description = """
26
  ## Image-based Document QA
@@ -65,8 +66,8 @@ async def perform_document_qa(
65
  except Exception as e:
66
  return JSONResponse(content=f"Error processing file: {str(e)}", status_code=500)
67
 
68
- @app.post("/pdfQA/", description="Provide a PDF file to extract text and answer provided questions.")
69
- async def pdf_question_answering(
70
  file: UploadFile = File(...),
71
  questions: str = Form(...),
72
  ):
@@ -74,50 +75,28 @@ async def pdf_question_answering(
74
  # Read the uploaded file as bytes
75
  contents = await file.read()
76
 
77
- # Save the PDF bytes to a temporary file
78
- with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_pdf:
79
- temp_pdf.write(contents)
80
- temp_pdf_path = temp_pdf.name
81
-
82
- # Initialize an empty list to store image bytes
83
- images = []
84
-
85
- # Use PyMuPDF to process the PDF and convert each page to an image
86
- pdf_document = fitz.open(temp_pdf_path)
87
-
88
- for page_num in range(pdf_document.page_count):
89
- page = pdf_document.load_page(page_num)
90
- print(f"Converting page {page_num + 1} to image...")
91
-
92
- # Convert the page to an image
93
- image = Image.frombytes("RGB", page.get_size(), page.get_pixmap().samples)
94
-
95
- # Convert the image to bytes
96
- img_byte_array = BytesIO()
97
- image.save(img_byte_array, format='PNG')
98
- images.append(img_byte_array.getvalue())
99
-
100
- # Perform document question answering for each image
101
  answers_dict = {}
102
- for idx, image_bytes in enumerate(images):
103
- image = Image.open(BytesIO(image_bytes))
104
- for question in questions.split(','):
105
- result = nlp_qa(
106
- image,
107
- question.strip()
108
- )
109
- answer = result[0]['answer']
110
- formatted_question = f"{question.strip('[]')} (Page {idx + 1})"
111
- answers_dict[formatted_question] = answer
112
-
113
- # Delete the temporary PDF file
114
- temp_pdf.close()
115
- os.remove(temp_pdf_path)
116
 
117
- return answers_dict
 
 
 
 
118
 
 
 
 
119
  except Exception as e:
120
- return JSONResponse(content=f"Error processing PDF file: {str(e)}", status_code=500)
121
 
122
  # Set up CORS middleware
123
  origins = ["*"] # or specify your list of allowed origins
 
21
  )
22
 
23
  nlp_qa = pipeline("document-question-answering", model="jinhybr/OCR-DocVQA-Donut")
24
+ nlp_qa_v2 = pipeline("document-question-answering", model="fxmarty/tiny-doc-qa-vision-encoder-decoder")
25
 
26
  description = """
27
  ## Image-based Document QA
 
66
  except Exception as e:
67
  return JSONResponse(content=f"Error processing file: {str(e)}", status_code=500)
68
 
69
+ @app.post("/uploadfilev2/", description="Upload an image file to extract text and answer provided questions.")
70
+ async def perform_document_qa(
71
  file: UploadFile = File(...),
72
  questions: str = Form(...),
73
  ):
 
75
  # Read the uploaded file as bytes
76
  contents = await file.read()
77
 
78
+ # Open the image using PIL
79
+ image = Image.open(BytesIO(contents))
80
+
81
+ # Perform document question answering for each question using LayoutLMv2-based model
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
82
  answers_dict = {}
83
+ for question in questions.split(','):
84
+ result = nlp_qa_v2(
85
+ image,
86
+ question.strip()
87
+ )
 
 
 
 
 
 
 
 
 
88
 
89
+ # Access the 'answer' key from the first item in the result list
90
+ answer = result[0]['answer']
91
+
92
+ # Format the question as a string without extra characters
93
+ formatted_question = question.strip("[]")
94
 
95
+ answers_dict[formatted_question] = answer
96
+
97
+ return answers_dict
98
  except Exception as e:
99
+ return JSONResponse(content=f"Error processing file: {str(e)}", status_code=500)
100
 
101
  # Set up CORS middleware
102
  origins = ["*"] # or specify your list of allowed origins