Update rag.py
Browse files
rag.py
CHANGED
@@ -52,7 +52,8 @@ def document_loading():
|
|
52 |
#loader = GenericLoader(YoutubeAudioLoader([YOUTUBE_URL], YOUTUBE_DIR),
|
53 |
# OpenAIWhisperParser())
|
54 |
#docs.extend(loader.load())
|
55 |
-
|
|
|
56 |
return docs
|
57 |
|
58 |
def document_splitting(config, docs):
|
@@ -63,23 +64,25 @@ def document_splitting(config, docs):
|
|
63 |
return text_splitter.split_documents(docs)
|
64 |
|
65 |
def document_storage_chroma(chunks):
|
66 |
-
print("### Store")
|
67 |
Chroma.from_documents(documents = chunks,
|
68 |
embedding = OpenAIEmbeddings(disallowed_special = ()),
|
69 |
persist_directory = CHROMA_DIR)
|
70 |
|
71 |
def document_storage_mongodb(chunks):
|
72 |
-
print("### Store")
|
73 |
MongoDBAtlasVectorSearch.from_documents(documents = chunks,
|
74 |
embedding = OpenAIEmbeddings(disallowed_special = ()),
|
75 |
collection = collection,
|
76 |
index_name = MONGODB_INDEX_NAME)
|
77 |
|
78 |
def document_retrieval_chroma():
|
|
|
79 |
return Chroma(embedding_function = OpenAIEmbeddings(disallowed_special = ()),
|
80 |
persist_directory = CHROMA_DIR)
|
81 |
|
82 |
def document_retrieval_mongodb():
|
|
|
83 |
return MongoDBAtlasVectorSearch.from_connection_string(MONGODB_ATLAS_CLUSTER_URI,
|
84 |
MONGODB_DB_NAME + "." + MONGODB_COLLECTION_NAME,
|
85 |
OpenAIEmbeddings(disallowed_special = ()),
|
@@ -118,7 +121,6 @@ def rag_chain(config, openai_api_key, rag_option, prompt):
|
|
118 |
rag_chain = RetrievalQA.from_chain_type(llm,
|
119 |
chain_type_kwargs = {"prompt": RAG_CHAIN_PROMPT},
|
120 |
retriever = db.as_retriever(search_kwargs = {"k": config["k"]}),
|
121 |
-
include_run_info = True,
|
122 |
return_source_documents = True,
|
123 |
verbose = True)
|
124 |
|
|
|
52 |
#loader = GenericLoader(YoutubeAudioLoader([YOUTUBE_URL], YOUTUBE_DIR),
|
53 |
# OpenAIWhisperParser())
|
54 |
#docs.extend(loader.load())
|
55 |
+
print("### Load YouTube")
|
56 |
+
|
57 |
return docs
|
58 |
|
59 |
def document_splitting(config, docs):
|
|
|
64 |
return text_splitter.split_documents(docs)
|
65 |
|
66 |
def document_storage_chroma(chunks):
|
67 |
+
print("### Store Chroma")
|
68 |
Chroma.from_documents(documents = chunks,
|
69 |
embedding = OpenAIEmbeddings(disallowed_special = ()),
|
70 |
persist_directory = CHROMA_DIR)
|
71 |
|
72 |
def document_storage_mongodb(chunks):
|
73 |
+
print("### Store MongoDB")
|
74 |
MongoDBAtlasVectorSearch.from_documents(documents = chunks,
|
75 |
embedding = OpenAIEmbeddings(disallowed_special = ()),
|
76 |
collection = collection,
|
77 |
index_name = MONGODB_INDEX_NAME)
|
78 |
|
79 |
def document_retrieval_chroma():
|
80 |
+
print("### Retrieve Chroma")
|
81 |
return Chroma(embedding_function = OpenAIEmbeddings(disallowed_special = ()),
|
82 |
persist_directory = CHROMA_DIR)
|
83 |
|
84 |
def document_retrieval_mongodb():
|
85 |
+
print("### Retrieve MongoDB")
|
86 |
return MongoDBAtlasVectorSearch.from_connection_string(MONGODB_ATLAS_CLUSTER_URI,
|
87 |
MONGODB_DB_NAME + "." + MONGODB_COLLECTION_NAME,
|
88 |
OpenAIEmbeddings(disallowed_special = ()),
|
|
|
121 |
rag_chain = RetrievalQA.from_chain_type(llm,
|
122 |
chain_type_kwargs = {"prompt": RAG_CHAIN_PROMPT},
|
123 |
retriever = db.as_retriever(search_kwargs = {"k": config["k"]}),
|
|
|
124 |
return_source_documents = True,
|
125 |
verbose = True)
|
126 |
|