sms07 commited on
Commit
c1ba64e
1 Parent(s): f061141
Files changed (1) hide show
  1. app.py +54 -0
app.py ADDED
@@ -0,0 +1,54 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import streamlit as st
2
+ from transformers import pipeline
3
+ from PyPDF2 import PdfReader
4
+ import docx
5
+
6
+ # Initialize the NLP pipeline
7
+ nlp = pipeline(
8
+ "document-question-answering",
9
+ model="impira/layoutlm-document-qa",
10
+ )
11
+
12
+ # Set the title of the app
13
+ st.title("LayoutLM Example")
14
+
15
+ # Create a file uploader that accepts various document formats
16
+ uploaded_file = st.file_uploader("Drag and drop a document here", type=['txt', 'pdf', 'docx'])
17
+
18
+ # Create a text box for user input
19
+ question = st.text_area("What would you like to know?")
20
+
21
+ def extract_text_from_file(uploaded_file):
22
+ if uploaded_file.type == "text/plain":
23
+ return uploaded_file.read().decode("utf-8")
24
+ elif uploaded_file.type == "application/pdf":
25
+ reader = PdfReader(uploaded_file)
26
+ text = ""
27
+ for page in reader.pages:
28
+ text += page.extract_text()
29
+ return text
30
+ elif uploaded_file.type == "application/vnd.openxmlformats-officedocument.wordprocessingml.document":
31
+ doc = docx.Document(uploaded_file)
32
+ text = "\n".join([paragraph.text for paragraph in doc.paragraphs])
33
+ return text
34
+ else:
35
+ return None
36
+
37
+ if uploaded_file and question:
38
+ # Extract text from the uploaded document
39
+ document_text = extract_text_from_file(uploaded_file)
40
+
41
+ if document_text:
42
+ # Run the NLP model on the extracted text and the user's question
43
+ answer = nlp(
44
+ {
45
+ "context": document_text,
46
+ "question": question
47
+ }
48
+ )
49
+
50
+ # Display the answer
51
+ st.write("Answer:")
52
+ st.write(answer['answer'])
53
+ else:
54
+ st.write("Unsupported file type or failed to extract text from the document.")