Spaces:
Running
Running
Update main.py
Browse files
main.py
CHANGED
@@ -1,26 +1,40 @@
|
|
1 |
import fitz
|
2 |
-
from fastapi import FastAPI, File, UploadFile, Form
|
3 |
from fastapi.responses import JSONResponse
|
4 |
from transformers import pipeline
|
5 |
from PIL import Image
|
6 |
from io import BytesIO
|
7 |
from starlette.middleware import Middleware
|
8 |
from starlette.middleware.cors import CORSMiddleware
|
9 |
-
import torch
|
10 |
-
import re
|
11 |
-
|
12 |
-
from transformers import DonutProcessor, VisionEncoderDecoderModel
|
13 |
|
14 |
app = FastAPI()
|
15 |
|
16 |
-
|
17 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
-
|
20 |
-
model.to(device)
|
21 |
|
22 |
-
@app.post("/
|
23 |
-
async def
|
24 |
file: UploadFile = File(...),
|
25 |
questions: str = Form(...),
|
26 |
):
|
@@ -31,68 +45,71 @@ async def donut_question_answering(
|
|
31 |
# Open the image using PIL
|
32 |
image = Image.open(BytesIO(contents))
|
33 |
|
34 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
35 |
|
36 |
-
|
37 |
-
|
38 |
|
39 |
-
|
40 |
-
|
41 |
|
42 |
-
|
43 |
-
result_dict = dict(zip(question_list, answers))
|
44 |
-
return result_dict
|
45 |
|
|
|
46 |
except Exception as e:
|
47 |
-
return
|
48 |
-
|
49 |
-
|
50 |
-
|
51 |
-
|
52 |
-
|
53 |
-
|
54 |
-
|
55 |
-
|
56 |
-
|
57 |
-
|
58 |
-
|
59 |
-
|
60 |
-
|
61 |
-
|
62 |
-
|
63 |
-
prompt = task_prompt.replace("{user_input}", question)
|
64 |
-
decoder_input_ids = processor.tokenizer(prompt, add_special_tokens=False, return_tensors="pt").input_ids
|
65 |
-
|
66 |
-
# generate answer
|
67 |
-
outputs = model.generate(
|
68 |
-
pixel_values.to(device),
|
69 |
-
decoder_input_ids=decoder_input_ids.to(device),
|
70 |
-
max_length=model.decoder.config.max_position_embeddings,
|
71 |
-
early_stopping=True,
|
72 |
-
pad_token_id=processor.tokenizer.pad_token_id,
|
73 |
-
eos_token_id=processor.tokenizer.eos_token_id,
|
74 |
-
use_cache=True,
|
75 |
-
num_beams=1,
|
76 |
-
bad_words_ids=[[processor.tokenizer.unk_token_id]],
|
77 |
-
return_dict_in_generate=True,
|
78 |
-
)
|
79 |
|
80 |
-
#
|
81 |
-
|
82 |
-
|
83 |
-
|
|
|
|
|
84 |
|
85 |
-
#
|
86 |
-
|
87 |
|
88 |
-
|
|
|
89 |
|
90 |
-
#
|
91 |
-
|
92 |
-
|
93 |
-
|
94 |
-
|
95 |
-
|
96 |
-
|
97 |
-
|
98 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import fitz
|
2 |
+
from fastapi import FastAPI, File, UploadFile, Form
|
3 |
from fastapi.responses import JSONResponse
|
4 |
from transformers import pipeline
|
5 |
from PIL import Image
|
6 |
from io import BytesIO
|
7 |
from starlette.middleware import Middleware
|
8 |
from starlette.middleware.cors import CORSMiddleware
|
|
|
|
|
|
|
|
|
9 |
|
10 |
app = FastAPI()
|
11 |
|
12 |
+
# Set up CORS middleware
|
13 |
+
origins = ["*"] # or specify your list of allowed origins
|
14 |
+
app.add_middleware(
|
15 |
+
CORSMiddleware,
|
16 |
+
allow_origins=origins,
|
17 |
+
allow_credentials=True,
|
18 |
+
allow_methods=["*"],
|
19 |
+
allow_headers=["*"],
|
20 |
+
)
|
21 |
+
|
22 |
+
# Use a pipeline as a high-level helper
|
23 |
+
nlp_qa = pipeline("document-question-answering", model="tiennvcs/layoutlmv2-base-uncased-finetuned-docvqa")
|
24 |
+
|
25 |
+
description = """
|
26 |
+
## Image-based Document QA
|
27 |
+
This API performs document question answering using a LayoutLMv2-based model.
|
28 |
+
|
29 |
+
### Endpoints:
|
30 |
+
- **POST /uploadfile/:** Upload an image file to extract text and answer provided questions.
|
31 |
+
- **POST /pdfQA/:** Provide a PDF file to extract text and answer provided questions.
|
32 |
+
"""
|
33 |
|
34 |
+
app = FastAPI(docs_url="/", description=description)
|
|
|
35 |
|
36 |
+
@app.post("/uploadfile/", description="Upload an image file to extract text and answer provided questions.")
|
37 |
+
async def perform_document_qa(
|
38 |
file: UploadFile = File(...),
|
39 |
questions: str = Form(...),
|
40 |
):
|
|
|
45 |
# Open the image using PIL
|
46 |
image = Image.open(BytesIO(contents))
|
47 |
|
48 |
+
# Perform document question answering for each question using LayoutLMv2-based model
|
49 |
+
answers_dict = {}
|
50 |
+
for question in questions.split(','):
|
51 |
+
result = nlp_qa(
|
52 |
+
image,
|
53 |
+
question.strip()
|
54 |
+
)
|
55 |
|
56 |
+
# Access the 'answer' key from the first item in the result list
|
57 |
+
answer = result[0]['answer']
|
58 |
|
59 |
+
# Format the question as a string without extra characters
|
60 |
+
formatted_question = question.strip("[]")
|
61 |
|
62 |
+
answers_dict[formatted_question] = answer
|
|
|
|
|
63 |
|
64 |
+
return answers_dict
|
65 |
except Exception as e:
|
66 |
+
return JSONResponse(content=f"Error processing file: {str(e)}", status_code=500)
|
67 |
+
|
68 |
+
@app.post("/pdfQA/", description="Provide a PDF file to extract text and answer provided questions.")
|
69 |
+
async def pdf_question_answering(
|
70 |
+
file: UploadFile = File(...),
|
71 |
+
questions: str = Form(...),
|
72 |
+
):
|
73 |
+
try:
|
74 |
+
# Read the uploaded file as bytes
|
75 |
+
contents = await file.read()
|
76 |
+
|
77 |
+
# Initialize an empty string to store the text content of the PDF
|
78 |
+
all_text = ""
|
79 |
+
|
80 |
+
# Use PyMuPDF to process the PDF and extract text
|
81 |
+
pdf_document = fitz.open_from_bytes(contents)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
82 |
|
83 |
+
# Loop through each page and perform OCR
|
84 |
+
for page_num in range(pdf_document.page_count):
|
85 |
+
page = pdf_document.load_page(page_num)
|
86 |
+
print(f"Processing page {page_num + 1}...")
|
87 |
+
text = page.get_text()
|
88 |
+
all_text += text + '\n'
|
89 |
|
90 |
+
# Print or do something with the collected text
|
91 |
+
print(all_text)
|
92 |
|
93 |
+
# List of questions
|
94 |
+
question_list = questions.split(',')
|
95 |
|
96 |
+
# Initialize an empty dictionary to store questions and answers
|
97 |
+
qa_dict = {}
|
98 |
+
|
99 |
+
# Get answers for each question with the same context
|
100 |
+
for question in question_list:
|
101 |
+
result = nlp_qa({
|
102 |
+
'question': question,
|
103 |
+
'context': all_text
|
104 |
+
})
|
105 |
+
|
106 |
+
# Access the 'answer' key from the result
|
107 |
+
answer = result['answer']
|
108 |
+
|
109 |
+
# Store the question and answer in the dictionary
|
110 |
+
qa_dict[question] = answer
|
111 |
+
|
112 |
+
return qa_dict
|
113 |
+
|
114 |
+
except Exception as e:
|
115 |
+
return JSONResponse(content=f"Error processing PDF file: {str(e)}", status_code=500)
|