2001muhammadumair commited on
Commit
c836f81
·
verified ·
1 Parent(s): 74b836e

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -13
app.py CHANGED
@@ -9,7 +9,7 @@ import PyPDF2
9
  from sklearn.metrics.pairwise import cosine_similarity
10
  from collections import Counter
11
 
12
- # ---------------------- Setup ---------------------
13
 
14
  logging.basicConfig(
15
  filename='query_logs.log',
@@ -23,6 +23,21 @@ PDF_PATH = 'Generative_AI_Foundations_in_Python_Discover_key_techniques_and.pdf'
23
  sentence_transformer_model = SentenceTransformer('all-MiniLM-L6-v2')
24
  cache = {}
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  # --------------------- PDF Processing ---------------------
27
 
28
  def read_pdf(file_path):
@@ -41,21 +56,10 @@ def read_pdf(file_path):
41
  sentences_with_pages.append({'sentence': sentence, 'page_number': page_num + 1})
42
  return sentences_with_pages
43
 
 
44
  sentences_with_pages = read_pdf(PDF_PATH)
45
  vector_index, sentences_with_pages = vectorize_text(sentences_with_pages)
46
 
47
- def vectorize_text(sentences_with_pages):
48
- try:
49
- sentences = [item['sentence'] for item in sentences_with_pages]
50
- embeddings = sentence_transformer_model.encode(sentences, show_progress_bar=True)
51
- index = faiss.IndexFlatL2(embeddings.shape[1])
52
- index.add(np.array(embeddings))
53
- logging.info(f"Added {len(sentences)} sentences to the vector store.")
54
- return index, sentences_with_pages
55
- except Exception as e:
56
- logging.error(f"Error during vectorization: {str(e)}")
57
- return None, None
58
-
59
  # --------------------- Query Handling ---------------------
60
 
61
  def generate_query_embedding(query):
 
9
  from sklearn.metrics.pairwise import cosine_similarity
10
  from collections import Counter
11
 
12
+ # --------------------- Setup ---------------------
13
 
14
  logging.basicConfig(
15
  filename='query_logs.log',
 
23
  sentence_transformer_model = SentenceTransformer('all-MiniLM-L6-v2')
24
  cache = {}
25
 
26
+ # --------------------- Vectorization Function ---------------------
27
+
28
+ def vectorize_text(sentences_with_pages):
29
+ """Vectorize sentences using SentenceTransformer and create a FAISS index."""
30
+ try:
31
+ sentences = [item['sentence'] for item in sentences_with_pages]
32
+ embeddings = sentence_transformer_model.encode(sentences, show_progress_bar=True)
33
+ index = faiss.IndexFlatL2(embeddings.shape[1])
34
+ index.add(np.array(embeddings))
35
+ logging.info(f"Added {len(sentences)} sentences to the vector store.")
36
+ return index, sentences_with_pages
37
+ except Exception as e:
38
+ logging.error(f"Error during vectorization: {str(e)}")
39
+ return None, None
40
+
41
  # --------------------- PDF Processing ---------------------
42
 
43
  def read_pdf(file_path):
 
56
  sentences_with_pages.append({'sentence': sentence, 'page_number': page_num + 1})
57
  return sentences_with_pages
58
 
59
+ # Read and Vectorize PDF Content
60
  sentences_with_pages = read_pdf(PDF_PATH)
61
  vector_index, sentences_with_pages = vectorize_text(sentences_with_pages)
62
 
 
 
 
 
 
 
 
 
 
 
 
 
63
  # --------------------- Query Handling ---------------------
64
 
65
  def generate_query_embedding(query):