alexkueck commited on
Commit
16b7808
1 Parent(s): d7af9ae

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -7
app.py CHANGED
@@ -19,6 +19,7 @@ from langchain.embeddings.openai import OpenAIEmbeddings
19
  from langchain.prompts import PromptTemplate
20
  from langchain.text_splitter import RecursiveCharacterTextSplitter
21
  from langchain.vectorstores import Chroma
 
22
  #from langchain.vectorstores import MongoDBAtlasVectorSearch
23
 
24
  #from pymongo import MongoClient
@@ -75,6 +76,7 @@ YOUTUBE_URL_2 = "https://www.youtube.com/watch?v=hdhZwyf24mE"
75
 
76
 
77
 
 
78
  ################################################
79
  #LLM Model mit dem gearbeitet wird
80
  #openai
@@ -168,10 +170,12 @@ def document_loading_splitting():
168
 
169
  #Chroma DB die splits ablegen - vektorisiert...
170
  def document_storage_chroma(splits):
171
- Chroma.from_documents(documents = splits,
172
- embedding = OpenAIEmbeddings(disallowed_special = ()),
173
- persist_directory = PATH_WORK + CHROMA_DIR)
174
 
 
 
 
175
  #Mongo DB die splits ablegen - vektorisiert...
176
  def document_storage_mongodb(splits):
177
  MongoDBAtlasVectorSearch.from_documents(documents = splits,
@@ -186,10 +190,9 @@ def document_retrieval_chroma(llm, prompt):
186
  #embeddings = HuggingFaceInstructEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": "cpu"})
187
  #etwas weniger rechenaufwendig:
188
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False})
189
-
190
- db = Chroma(embedding_function = embeddings,
191
- persist_directory = PATH_WORK + CHROMA_DIR)
192
-
193
  return db
194
 
195
  #dokumente in mongo db vektorisiert ablegen können - die Db vorbereiten daüfür
@@ -304,6 +307,7 @@ def invoke (prompt, history, rag_option, openai_api_key, temperature=0.9, max_n
304
  #llm = HuggingFaceHub(url_??? = "https://wdgsjd6zf201mufn.us-east-1.aws.endpoints.huggingface.cloud", model_kwargs={"temperature": 0.5, "max_length": 64})
305
  #llm = HuggingFaceTextGenInference( inference_server_url="http://localhost:8010/", max_new_tokens=max_new_tokens,top_k=10,top_p=top_p,typical_p=0.95,temperature=temperature,repetition_penalty=repetition_penalty,)
306
 
 
307
  #zusätzliche Dokumenten Splits aus DB zum Prompt hinzufügen (aus VektorDB - Chroma oder Mongo DB)
308
  if (rag_option == "An"):
309
  #muss nur einmal ausgeführt werden...
 
19
  from langchain.prompts import PromptTemplate
20
  from langchain.text_splitter import RecursiveCharacterTextSplitter
21
  from langchain.vectorstores import Chroma
22
+ from chromadb.errors import InvalidDimensionException
23
  #from langchain.vectorstores import MongoDBAtlasVectorSearch
24
 
25
  #from pymongo import MongoClient
 
76
 
77
 
78
 
79
+
80
  ################################################
81
  #LLM Model mit dem gearbeitet wird
82
  #openai
 
170
 
171
  #Chroma DB die splits ablegen - vektorisiert...
172
  def document_storage_chroma(splits):
173
+ #OpenAi embediings
174
+ #Chroma.from_documents(documents = splits, embedding = OpenAIEmbeddings(disallowed_special = ()), persist_directory = PATH_WORK + CHROMA_DIR)
 
175
 
176
+ #HF embeddings
177
+ Chroma.from_documents(documents = splits, embedding = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False}), persist_directory = PATH_WORK + CHROMA_DIR)
178
+
179
  #Mongo DB die splits ablegen - vektorisiert...
180
  def document_storage_mongodb(splits):
181
  MongoDBAtlasVectorSearch.from_documents(documents = splits,
 
190
  #embeddings = HuggingFaceInstructEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2", model_kwargs={"device": "cpu"})
191
  #etwas weniger rechenaufwendig:
192
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-mpnet-base-v2", model_kwargs={"device": "cpu"}, encode_kwargs={'normalize_embeddings': False})
193
+
194
+ #ChromaDb für OpenAI embedinngs
195
+ db = Chroma(embedding_function = embeddings, persist_directory = PATH_WORK + CHROMA_DIR)
 
196
  return db
197
 
198
  #dokumente in mongo db vektorisiert ablegen können - die Db vorbereiten daüfür
 
307
  #llm = HuggingFaceHub(url_??? = "https://wdgsjd6zf201mufn.us-east-1.aws.endpoints.huggingface.cloud", model_kwargs={"temperature": 0.5, "max_length": 64})
308
  #llm = HuggingFaceTextGenInference( inference_server_url="http://localhost:8010/", max_new_tokens=max_new_tokens,top_k=10,top_p=top_p,typical_p=0.95,temperature=temperature,repetition_penalty=repetition_penalty,)
309
 
310
+
311
  #zusätzliche Dokumenten Splits aus DB zum Prompt hinzufügen (aus VektorDB - Chroma oder Mongo DB)
312
  if (rag_option == "An"):
313
  #muss nur einmal ausgeführt werden...