engrphoenix commited on
Commit
81bc43b
·
verified ·
1 Parent(s): 4af7e0e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -4
app.py CHANGED
@@ -4,18 +4,19 @@ from PyPDF2 import PdfReader
4
  import numpy as np
5
  from groq import Groq
6
  import faiss
 
7
 
8
  # Set up Groq API client
9
  groq_client = Groq(api_key="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")
10
 
11
  # Function to extract text from PDF
12
  def extract_pdf_content(pdf_file):
13
- reader = PdfReader(pdf_file)
14
  content = ""
15
- for page in reader.pages:
16
- content += page.extract_text()
17
  return content
18
-
19
  # Function to split content into chunks
20
  def chunk_text(text, chunk_size=500):
21
  words = text.split()
 
4
  import numpy as np
5
  from groq import Groq
6
  import faiss
7
+ import fitz
8
 
9
  # Set up Groq API client
10
  groq_client = Groq(api_key="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")
11
 
12
  # Function to extract text from PDF
13
  def extract_pdf_content(pdf_file):
14
+ doc = fitz.open(pdf_file)
15
  content = ""
16
+ for page in doc:
17
+ content += page.get_text()
18
  return content
19
+
20
  # Function to split content into chunks
21
  def chunk_text(text, chunk_size=500):
22
  words = text.split()