philmui commited on
Commit
cfdb7b7
1 Parent(s): 12aebdf

adding USE_MEMORY flag

Browse files
Files changed (1) hide show
  1. semantic.py +44 -22
semantic.py CHANGED
@@ -26,9 +26,15 @@ from globals import (
26
  VECTOR_STORE_PATH
27
  )
28
 
 
 
29
  from qdrant_client import QdrantClient
30
- qdrant_client = QdrantClient(path=VECTOR_STORE_PATH)
31
- # qdrant_client = QdrantClient(":memory:")
 
 
 
 
32
 
33
 
34
  RAG_PROMPT = """
@@ -61,7 +67,7 @@ class SemanticStoreFactory:
61
  if path.exists() and path.is_dir() and any(path.iterdir()):
62
  _logger.info(f"\tQdrant loading ...")
63
  store = Qdrant(
64
- client=qdrant_client,
65
  embeddings=embeddings,
66
  collection_name=META_SEMANTIC_COLLECTION,
67
  )
@@ -75,11 +81,14 @@ class SemanticStoreFactory:
75
  cls
76
  ) -> VectorStore:
77
 
78
- _logger.info(f"creating semantic vector store: {VECTOR_STORE_PATH}")
79
- path = Path(VECTOR_STORE_PATH)
80
- if not path.exists():
81
- path.mkdir(parents=True, exist_ok=True)
82
- _logger.info(f"Directory '{path}' created.")
 
 
 
83
 
84
  documents = PyMuPDFLoader(META_10K_FILE_PATH).load()
85
  semantic_chunker = SemanticChunker(
@@ -87,13 +96,23 @@ class SemanticStoreFactory:
87
  breakpoint_threshold_type="percentile"
88
  )
89
  semantic_chunks = semantic_chunker.create_documents([d.page_content for d in documents])
90
- semantic_chunk_vectorstore = Qdrant.from_documents(
91
- semantic_chunks,
92
- embeddings,
93
- location=":memory:",
94
- collection_name=META_SEMANTIC_COLLECTION,
95
- force_recreate=True
96
- )
 
 
 
 
 
 
 
 
 
 
97
  return semantic_chunk_vectorstore
98
 
99
  @classmethod
@@ -101,13 +120,16 @@ class SemanticStoreFactory:
101
  cls
102
  ) -> VectorStore:
103
  if cls._semantic_vectorstore is None:
104
- # print(f"Loading semantic vectorstore {META_SEMANTIC_COLLECTION} from: {VECTOR_STORE_PATH}")
105
- # try:
106
- # # first try to load the store
107
- # cls._semantic_vectorstore = cls.__load_semantic_store()
108
- # except Exception as e:
109
- # _logger.warning(f"cannot load: {e}")
110
- cls._semantic_vectorstore = cls.__create_semantic_store()
 
 
 
111
 
112
  return cls._semantic_vectorstore
113
 
 
26
  VECTOR_STORE_PATH
27
  )
28
 
29
+
30
+ USE_MEMORY = True
31
  from qdrant_client import QdrantClient
32
+
33
+ qclient: QdrantClient
34
+ if USE_MEMORY == True:
35
+ qclient = QdrantClient(":memory:")
36
+ else:
37
+ qclient = QdrantClient(path=VECTOR_STORE_PATH)
38
 
39
 
40
  RAG_PROMPT = """
 
67
  if path.exists() and path.is_dir() and any(path.iterdir()):
68
  _logger.info(f"\tQdrant loading ...")
69
  store = Qdrant(
70
+ client=qclient,
71
  embeddings=embeddings,
72
  collection_name=META_SEMANTIC_COLLECTION,
73
  )
 
81
  cls
82
  ) -> VectorStore:
83
 
84
+ if USE_MEMORY == True:
85
+ _logger.info(f"creating semantic vector store: {USE_MEMORY}")
86
+ else:
87
+ _logger.info(f"creating semantic vector store: {VECTOR_STORE_PATH}")
88
+ path = Path(VECTOR_STORE_PATH)
89
+ if not path.exists():
90
+ path.mkdir(parents=True, exist_ok=True)
91
+ _logger.info(f"Directory '{path}' created.")
92
 
93
  documents = PyMuPDFLoader(META_10K_FILE_PATH).load()
94
  semantic_chunker = SemanticChunker(
 
96
  breakpoint_threshold_type="percentile"
97
  )
98
  semantic_chunks = semantic_chunker.create_documents([d.page_content for d in documents])
99
+ if USE_MEMORY == True:
100
+ semantic_chunk_vectorstore = Qdrant.from_documents(
101
+ semantic_chunks,
102
+ embeddings,
103
+ location=":memory:",
104
+ collection_name=META_SEMANTIC_COLLECTION,
105
+ force_recreate=True
106
+ )
107
+ else:
108
+ semantic_chunk_vectorstore = Qdrant.from_documents(
109
+ semantic_chunks,
110
+ embeddings,
111
+ path=VECTOR_STORE_PATH,
112
+ collection_name=META_SEMANTIC_COLLECTION,
113
+ force_recreate=True
114
+ )
115
+
116
  return semantic_chunk_vectorstore
117
 
118
  @classmethod
 
120
  cls
121
  ) -> VectorStore:
122
  if cls._semantic_vectorstore is None:
123
+ if USE_MEMORY == True:
124
+ cls._semantic_vectorstore = cls.__create_semantic_store()
125
+ else:
126
+ print(f"Loading semantic vectorstore {META_SEMANTIC_COLLECTION} from: {VECTOR_STORE_PATH}")
127
+ try:
128
+ # first try to load the store
129
+ cls._semantic_vectorstore = cls.__load_semantic_store()
130
+ except Exception as e:
131
+ _logger.warning(f"cannot load: {e}")
132
+ cls._semantic_vectorstore = cls.__create_semantic_store()
133
 
134
  return cls._semantic_vectorstore
135