Update app.py
Browse files
app.py
CHANGED
@@ -19,16 +19,17 @@ class MyApp:
|
|
19 |
self.index = None
|
20 |
self.model = SentenceTransformer('all-MiniLM-L6-v2')
|
21 |
|
22 |
-
def load_pdfs(self, files: List[
|
23 |
"""Extracts text from multiple PDF files and stores them."""
|
24 |
self.documents = []
|
25 |
-
for
|
26 |
-
|
|
|
27 |
for page_num in range(len(doc)):
|
28 |
page = doc[page_num]
|
29 |
text = page.get_text()
|
30 |
self.documents.append({
|
31 |
-
"file_name":
|
32 |
"page": page_num + 1,
|
33 |
"content": text
|
34 |
})
|
@@ -55,7 +56,7 @@ class MyApp:
|
|
55 |
|
56 |
app = MyApp()
|
57 |
|
58 |
-
def upload_files(files: List[
|
59 |
return app.load_pdfs(files)
|
60 |
|
61 |
def build_vector_db() -> str:
|
|
|
19 |
self.index = None
|
20 |
self.model = SentenceTransformer('all-MiniLM-L6-v2')
|
21 |
|
22 |
+
def load_pdfs(self, files: List[Dict]) -> str:
|
23 |
"""Extracts text from multiple PDF files and stores them."""
|
24 |
self.documents = []
|
25 |
+
for file_dict in files:
|
26 |
+
file_path = file_dict['name'] # Access the file path
|
27 |
+
doc = fitz.open(file_path) # Open the PDF using the file path
|
28 |
for page_num in range(len(doc)):
|
29 |
page = doc[page_num]
|
30 |
text = page.get_text()
|
31 |
self.documents.append({
|
32 |
+
"file_name": os.path.basename(file_path),
|
33 |
"page": page_num + 1,
|
34 |
"content": text
|
35 |
})
|
|
|
56 |
|
57 |
app = MyApp()
|
58 |
|
59 |
+
def upload_files(files: List[Dict]) -> str:
|
60 |
return app.load_pdfs(files)
|
61 |
|
62 |
def build_vector_db() -> str:
|