bstraehle commited on
Commit
4115e3a
1 Parent(s): 15fb20f

Update rag.py

Browse files
Files changed (1) hide show
  1. rag.py +6 -4
rag.py CHANGED
@@ -52,7 +52,8 @@ def document_loading():
52
  #loader = GenericLoader(YoutubeAudioLoader([YOUTUBE_URL], YOUTUBE_DIR),
53
  # OpenAIWhisperParser())
54
  #docs.extend(loader.load())
55
-
 
56
  return docs
57
 
58
  def document_splitting(config, docs):
@@ -63,23 +64,25 @@ def document_splitting(config, docs):
63
  return text_splitter.split_documents(docs)
64
 
65
  def document_storage_chroma(chunks):
66
- print("### Store")
67
  Chroma.from_documents(documents = chunks,
68
  embedding = OpenAIEmbeddings(disallowed_special = ()),
69
  persist_directory = CHROMA_DIR)
70
 
71
  def document_storage_mongodb(chunks):
72
- print("### Store")
73
  MongoDBAtlasVectorSearch.from_documents(documents = chunks,
74
  embedding = OpenAIEmbeddings(disallowed_special = ()),
75
  collection = collection,
76
  index_name = MONGODB_INDEX_NAME)
77
 
78
  def document_retrieval_chroma():
 
79
  return Chroma(embedding_function = OpenAIEmbeddings(disallowed_special = ()),
80
  persist_directory = CHROMA_DIR)
81
 
82
  def document_retrieval_mongodb():
 
83
  return MongoDBAtlasVectorSearch.from_connection_string(MONGODB_ATLAS_CLUSTER_URI,
84
  MONGODB_DB_NAME + "." + MONGODB_COLLECTION_NAME,
85
  OpenAIEmbeddings(disallowed_special = ()),
@@ -118,7 +121,6 @@ def rag_chain(config, openai_api_key, rag_option, prompt):
118
  rag_chain = RetrievalQA.from_chain_type(llm,
119
  chain_type_kwargs = {"prompt": RAG_CHAIN_PROMPT},
120
  retriever = db.as_retriever(search_kwargs = {"k": config["k"]}),
121
- include_run_info = True,
122
  return_source_documents = True,
123
  verbose = True)
124
 
 
52
  #loader = GenericLoader(YoutubeAudioLoader([YOUTUBE_URL], YOUTUBE_DIR),
53
  # OpenAIWhisperParser())
54
  #docs.extend(loader.load())
55
+ print("### Load YouTube")
56
+
57
  return docs
58
 
59
  def document_splitting(config, docs):
 
64
  return text_splitter.split_documents(docs)
65
 
66
  def document_storage_chroma(chunks):
67
+ print("### Store Chroma")
68
  Chroma.from_documents(documents = chunks,
69
  embedding = OpenAIEmbeddings(disallowed_special = ()),
70
  persist_directory = CHROMA_DIR)
71
 
72
  def document_storage_mongodb(chunks):
73
+ print("### Store MongoDB")
74
  MongoDBAtlasVectorSearch.from_documents(documents = chunks,
75
  embedding = OpenAIEmbeddings(disallowed_special = ()),
76
  collection = collection,
77
  index_name = MONGODB_INDEX_NAME)
78
 
79
  def document_retrieval_chroma():
80
+ print("### Retrieve Chroma")
81
  return Chroma(embedding_function = OpenAIEmbeddings(disallowed_special = ()),
82
  persist_directory = CHROMA_DIR)
83
 
84
  def document_retrieval_mongodb():
85
+ print("### Retrieve MongoDB")
86
  return MongoDBAtlasVectorSearch.from_connection_string(MONGODB_ATLAS_CLUSTER_URI,
87
  MONGODB_DB_NAME + "." + MONGODB_COLLECTION_NAME,
88
  OpenAIEmbeddings(disallowed_special = ()),
 
121
  rag_chain = RetrievalQA.from_chain_type(llm,
122
  chain_type_kwargs = {"prompt": RAG_CHAIN_PROMPT},
123
  retriever = db.as_retriever(search_kwargs = {"k": config["k"]}),
 
124
  return_source_documents = True,
125
  verbose = True)
126