Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
|
@@ -171,10 +171,10 @@ def document_loading_splitting():
|
|
| 171 |
#Chroma DB die splits ablegen - vektorisiert...
|
| 172 |
def document_storage_chroma(splits):
|
| 173 |
#OpenAi embediings
|
| 174 |
-
|
| 175 |
|
| 176 |
#HF embeddings
|
| 177 |
-
Chroma.from_documents(documents = splits, embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False}), persist_directory = PATH_WORK + CHROMA_DIR)
|
| 178 |
|
| 179 |
#Mongo DB die splits ablegen - vektorisiert...
|
| 180 |
def document_storage_mongodb(splits):
|
|
@@ -185,11 +185,11 @@ def document_storage_mongodb(splits):
|
|
| 185 |
|
| 186 |
#dokumente in chroma db vektorisiert ablegen können - die Db vorbereiten daüfur
|
| 187 |
def document_retrieval_chroma(llm, prompt):
|
| 188 |
-
|
| 189 |
#Alternative Embedding - für Vektorstore, um Ähnlichkeitsvektoren zu erzeugen - die ...InstructEmbedding ist sehr rechenaufwendig
|
| 190 |
#embeddings = HuggingFaceInstructEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": "cpu"})
|
| 191 |
#etwas weniger rechenaufwendig:
|
| 192 |
-
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False})
|
| 193 |
|
| 194 |
#ChromaDb für OpenAI embedinngs
|
| 195 |
db = Chroma(embedding_function = embeddings, persist_directory = PATH_WORK + CHROMA_DIR)
|
|
|
|
| 171 |
#Chroma DB die splits ablegen - vektorisiert...
|
| 172 |
def document_storage_chroma(splits):
|
| 173 |
#OpenAi embediings
|
| 174 |
+
Chroma.from_documents(documents = splits, embedding = OpenAIEmbeddings(disallowed_special = ()), persist_directory = PATH_WORK + CHROMA_DIR)
|
| 175 |
|
| 176 |
#HF embeddings
|
| 177 |
+
#Chroma.from_documents(documents = splits, embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False}), persist_directory = PATH_WORK + CHROMA_DIR)
|
| 178 |
|
| 179 |
#Mongo DB die splits ablegen - vektorisiert...
|
| 180 |
def document_storage_mongodb(splits):
|
|
|
|
| 185 |
|
| 186 |
#dokumente in chroma db vektorisiert ablegen können - die Db vorbereiten daüfur
|
| 187 |
def document_retrieval_chroma(llm, prompt):
|
| 188 |
+
embeddings = OpenAIEmbeddings()
|
| 189 |
#Alternative Embedding - für Vektorstore, um Ähnlichkeitsvektoren zu erzeugen - die ...InstructEmbedding ist sehr rechenaufwendig
|
| 190 |
#embeddings = HuggingFaceInstructEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": "cpu"})
|
| 191 |
#etwas weniger rechenaufwendig:
|
| 192 |
+
#embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False})
|
| 193 |
|
| 194 |
#ChromaDb für OpenAI embedinngs
|
| 195 |
db = Chroma(embedding_function = embeddings, persist_directory = PATH_WORK + CHROMA_DIR)
|