Update app.py
Browse files
app.py
CHANGED
@@ -171,10 +171,10 @@ def document_loading_splitting():
|
|
171 |
#Chroma DB die splits ablegen - vektorisiert...
|
172 |
def document_storage_chroma(splits):
|
173 |
#OpenAi embediings
|
174 |
-
|
175 |
|
176 |
#HF embeddings
|
177 |
-
Chroma.from_documents(documents = splits, embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False}), persist_directory = PATH_WORK + CHROMA_DIR)
|
178 |
|
179 |
#Mongo DB die splits ablegen - vektorisiert...
|
180 |
def document_storage_mongodb(splits):
|
@@ -185,11 +185,11 @@ def document_storage_mongodb(splits):
|
|
185 |
|
186 |
#dokumente in chroma db vektorisiert ablegen können - die Db vorbereiten daüfur
|
187 |
def document_retrieval_chroma(llm, prompt):
|
188 |
-
|
189 |
#Alternative Embedding - für Vektorstore, um Ähnlichkeitsvektoren zu erzeugen - die ...InstructEmbedding ist sehr rechenaufwendig
|
190 |
#embeddings = HuggingFaceInstructEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": "cpu"})
|
191 |
#etwas weniger rechenaufwendig:
|
192 |
-
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False})
|
193 |
|
194 |
#ChromaDb für OpenAI embedinngs
|
195 |
db = Chroma(embedding_function = embeddings, persist_directory = PATH_WORK + CHROMA_DIR)
|
|
|
171 |
#Chroma DB die splits ablegen - vektorisiert...
|
172 |
def document_storage_chroma(splits):
|
173 |
#OpenAi embediings
|
174 |
+
Chroma.from_documents(documents = splits, embedding = OpenAIEmbeddings(disallowed_special = ()), persist_directory = PATH_WORK + CHROMA_DIR)
|
175 |
|
176 |
#HF embeddings
|
177 |
+
#Chroma.from_documents(documents = splits, embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False}), persist_directory = PATH_WORK + CHROMA_DIR)
|
178 |
|
179 |
#Mongo DB die splits ablegen - vektorisiert...
|
180 |
def document_storage_mongodb(splits):
|
|
|
185 |
|
186 |
#dokumente in chroma db vektorisiert ablegen können - die Db vorbereiten daüfur
|
187 |
def document_retrieval_chroma(llm, prompt):
|
188 |
+
embeddings = OpenAIEmbeddings()
|
189 |
#Alternative Embedding - für Vektorstore, um Ähnlichkeitsvektoren zu erzeugen - die ...InstructEmbedding ist sehr rechenaufwendig
|
190 |
#embeddings = HuggingFaceInstructEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": "cpu"})
|
191 |
#etwas weniger rechenaufwendig:
|
192 |
+
#embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False})
|
193 |
|
194 |
#ChromaDb für OpenAI embedinngs
|
195 |
db = Chroma(embedding_function = embeddings, persist_directory = PATH_WORK + CHROMA_DIR)
|