Spaces:
Running
Running
add app
Browse files
app.py
ADDED
@@ -0,0 +1,54 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
from transformers import pipeline
|
3 |
+
from PyPDF2 import PdfReader
|
4 |
+
import docx
|
5 |
+
|
6 |
+
# Initialize the NLP pipeline
|
7 |
+
nlp = pipeline(
|
8 |
+
"document-question-answering",
|
9 |
+
model="impira/layoutlm-document-qa",
|
10 |
+
)
|
11 |
+
|
12 |
+
# Set the title of the app
|
13 |
+
st.title("LayoutLM Example")
|
14 |
+
|
15 |
+
# Create a file uploader that accepts various document formats
|
16 |
+
uploaded_file = st.file_uploader("Drag and drop a document here", type=['txt', 'pdf', 'docx'])
|
17 |
+
|
18 |
+
# Create a text box for user input
|
19 |
+
question = st.text_area("What would you like to know?")
|
20 |
+
|
21 |
+
def extract_text_from_file(uploaded_file):
|
22 |
+
if uploaded_file.type == "text/plain":
|
23 |
+
return uploaded_file.read().decode("utf-8")
|
24 |
+
elif uploaded_file.type == "application/pdf":
|
25 |
+
reader = PdfReader(uploaded_file)
|
26 |
+
text = ""
|
27 |
+
for page in reader.pages:
|
28 |
+
text += page.extract_text()
|
29 |
+
return text
|
30 |
+
elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
|
31 |
+
doc = docx.Document(uploaded_file)
|
32 |
+
text = "\n".join([paragraph.text for paragraph in doc.paragraphs])
|
33 |
+
return text
|
34 |
+
else:
|
35 |
+
return None
|
36 |
+
|
37 |
+
if uploaded_file and question:
|
38 |
+
# Extract text from the uploaded document
|
39 |
+
document_text = extract_text_from_file(uploaded_file)
|
40 |
+
|
41 |
+
if document_text:
|
42 |
+
# Run the NLP model on the extracted text and the user's question
|
43 |
+
answer = nlp(
|
44 |
+
{
|
45 |
+
"context": document_text,
|
46 |
+
"question": question
|
47 |
+
}
|
48 |
+
)
|
49 |
+
|
50 |
+
# Display the answer
|
51 |
+
st.write("Answer:")
|
52 |
+
st.write(answer['answer'])
|
53 |
+
else:
|
54 |
+
st.write("Unsupported file type or failed to extract text from the document.")
|