Spaces:
Sleeping
Sleeping
dinhquangson
commited on
Commit
•
4fac9ec
1
Parent(s):
cae8ab9
Update app.py
Browse files
app.py
CHANGED
@@ -36,14 +36,17 @@ def get_pdf_pages(pdf_docs):
|
|
36 |
|
37 |
"""
|
38 |
pages = []
|
39 |
-
|
40 |
-
|
41 |
-
|
42 |
-
|
|
|
|
|
|
|
43 |
|
44 |
-
|
45 |
-
|
46 |
-
|
47 |
return pages
|
48 |
|
49 |
|
|
|
36 |
|
37 |
"""
|
38 |
pages = []
|
39 |
+
import tempfile
|
40 |
+
|
41 |
+
with tempfile.TemporaryDirectory() as tmpdirname:
|
42 |
+
for pdf in pdf_docs:
|
43 |
+
pdf_path=os.path.join(tmpdirname,pdf.name)
|
44 |
+
with open(pdf_path, "wb") as f:
|
45 |
+
f.write(pdf.getbuffer())
|
46 |
|
47 |
+
pdf_loader = UnstructuredPDFLoader(pdf_path)
|
48 |
+
pdf_pages = pdf_loader.load_and_split()
|
49 |
+
pages=paegs+pdf_pages
|
50 |
return pages
|
51 |
|
52 |
|