Rahatara commited on
Commit
0b6997c
1 Parent(s): 3c3edd2

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +6 -5
app.py CHANGED
@@ -19,16 +19,17 @@ class MyApp:
19
  self.index = None
20
  self.model = SentenceTransformer('all-MiniLM-L6-v2')
21
 
22
- def load_pdfs(self, files: List[gr.File]) -> str:
23
  """Extracts text from multiple PDF files and stores them."""
24
  self.documents = []
25
- for file in files:
26
- doc = fitz.open(stream=file.read(), filetype="pdf")
 
27
  for page_num in range(len(doc)):
28
  page = doc[page_num]
29
  text = page.get_text()
30
  self.documents.append({
31
- "file_name": file.name,
32
  "page": page_num + 1,
33
  "content": text
34
  })
@@ -55,7 +56,7 @@ class MyApp:
55
 
56
  app = MyApp()
57
 
58
- def upload_files(files: List[gr.File]) -> str:
59
  return app.load_pdfs(files)
60
 
61
  def build_vector_db() -> str:
 
19
  self.index = None
20
  self.model = SentenceTransformer('all-MiniLM-L6-v2')
21
 
22
+ def load_pdfs(self, files: List[Dict]) -> str:
23
  """Extracts text from multiple PDF files and stores them."""
24
  self.documents = []
25
+ for file_dict in files:
26
+ file_path = file_dict['name'] # Access the file path
27
+ doc = fitz.open(file_path) # Open the PDF using the file path
28
  for page_num in range(len(doc)):
29
  page = doc[page_num]
30
  text = page.get_text()
31
  self.documents.append({
32
+ "file_name": os.path.basename(file_path),
33
  "page": page_num + 1,
34
  "content": text
35
  })
 
56
 
57
  app = MyApp()
58
 
59
+ def upload_files(files: List[Dict]) -> str:
60
  return app.load_pdfs(files)
61
 
62
  def build_vector_db() -> str: