Spaces:
Sleeping
Sleeping
Update main.py
Browse files
main.py
CHANGED
@@ -83,55 +83,6 @@ def process_document(image, questions):
|
|
83 |
|
84 |
return answers
|
85 |
|
86 |
-
@app.post("/pdfQA/", description=description)
|
87 |
-
async def pdf_question_answering(
|
88 |
-
file: UploadFile = File(...),
|
89 |
-
questions: str = Form(...),
|
90 |
-
):
|
91 |
-
try:
|
92 |
-
# Read the uploaded file as bytes
|
93 |
-
contents = await file.read()
|
94 |
-
|
95 |
-
# Initialize an empty string to store the text content of the PDF
|
96 |
-
all_text = ""
|
97 |
-
|
98 |
-
# Use PyMuPDF to process the PDF and extract text
|
99 |
-
pdf_document = fitz.open_from_bytes(contents)
|
100 |
-
|
101 |
-
# Loop through each page and perform OCR
|
102 |
-
for page_num in range(pdf_document.page_count):
|
103 |
-
page = pdf_document.load_page(page_num)
|
104 |
-
print(f"Processing page {page_num + 1}...")
|
105 |
-
text = page.get_text()
|
106 |
-
all_text += text + '\n'
|
107 |
-
|
108 |
-
# Print or do something with the collected text
|
109 |
-
print(all_text)
|
110 |
-
|
111 |
-
# List of questions
|
112 |
-
question_list = questions.split(',')
|
113 |
-
|
114 |
-
# Initialize an empty dictionary to store questions and answers
|
115 |
-
qa_dict = {}
|
116 |
-
|
117 |
-
# Get answers for each question with the same context
|
118 |
-
for question in question_list:
|
119 |
-
result = nlp_qa({
|
120 |
-
'question': question,
|
121 |
-
'context': all_text
|
122 |
-
})
|
123 |
-
|
124 |
-
# Access the 'answer' key from the result
|
125 |
-
answer = result['answer']
|
126 |
-
|
127 |
-
# Store the question and answer in the dictionary
|
128 |
-
qa_dict[question] = answer
|
129 |
-
|
130 |
-
return qa_dict
|
131 |
-
|
132 |
-
except Exception as e:
|
133 |
-
return JSONResponse(content=f"Error processing PDF file: {str(e)}", status_code=500)
|
134 |
-
|
135 |
# Set up CORS middleware
|
136 |
origins = ["*"] # or specify your list of allowed origins
|
137 |
app.add_middleware(
|
|
|
83 |
|
84 |
return answers
|
85 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
86 |
# Set up CORS middleware
|
87 |
origins = ["*"] # or specify your list of allowed origins
|
88 |
app.add_middleware(
|