philmui commited on
Commit
7ba780f
1 Parent(s): 7700877

adding logs

Browse files
Files changed (1) hide show
  1. semantic.py +5 -0
semantic.py CHANGED
@@ -90,13 +90,16 @@ class SemanticStoreFactory:
90
  path.mkdir(parents=True, exist_ok=True)
91
  _logger.info(f"Directory '{path}' created.")
92
 
 
93
  documents = PyMuPDFLoader(META_10K_FILE_PATH).load()
94
  semantic_chunker = SemanticChunker(
95
  embeddings=embeddings,
96
  breakpoint_threshold_type="percentile"
97
  )
98
  semantic_chunks = semantic_chunker.create_documents([d.page_content for d in documents])
 
99
  if USE_MEMORY == True:
 
100
  semantic_chunk_vectorstore = Qdrant.from_documents(
101
  semantic_chunks,
102
  embeddings,
@@ -104,6 +107,7 @@ class SemanticStoreFactory:
104
  collection_name=META_SEMANTIC_COLLECTION,
105
  force_recreate=True
106
  )
 
107
  else:
108
  semantic_chunk_vectorstore = Qdrant.from_documents(
109
  semantic_chunks,
@@ -112,6 +116,7 @@ class SemanticStoreFactory:
112
  collection_name=META_SEMANTIC_COLLECTION,
113
  force_recreate=True
114
  )
 
115
 
116
  return semantic_chunk_vectorstore
117
 
 
90
  path.mkdir(parents=True, exist_ok=True)
91
  _logger.info(f"Directory '{path}' created.")
92
 
93
+ _logger.info(f"loading {META_10K_FILE_PATH}")
94
  documents = PyMuPDFLoader(META_10K_FILE_PATH).load()
95
  semantic_chunker = SemanticChunker(
96
  embeddings=embeddings,
97
  breakpoint_threshold_type="percentile"
98
  )
99
  semantic_chunks = semantic_chunker.create_documents([d.page_content for d in documents])
100
+ _logger.info(f"created semantic_chunks: {len(semantic_chunks)}")
101
  if USE_MEMORY == True:
102
+ _logger.info(f"\t==> creating memory vectorstore ...")
103
  semantic_chunk_vectorstore = Qdrant.from_documents(
104
  semantic_chunks,
105
  embeddings,
 
107
  collection_name=META_SEMANTIC_COLLECTION,
108
  force_recreate=True
109
  )
110
+ _logger.info(f"\t==> DONE")
111
  else:
112
  semantic_chunk_vectorstore = Qdrant.from_documents(
113
  semantic_chunks,
 
116
  collection_name=META_SEMANTIC_COLLECTION,
117
  force_recreate=True
118
  )
119
+ _logger.info(f"\t==> return vectorstore")
120
 
121
  return semantic_chunk_vectorstore
122