ShynBui commited on
Commit
7323674
1 Parent(s): aab3dcd

Update utils.py

Browse files
Files changed (1) hide show
  1. utils.py +24 -3
utils.py CHANGED
@@ -4,14 +4,17 @@ from langchain.text_splitter import CharacterTextSplitter, RecursiveCharacterTex
4
  from langchain_community.vectorstores import Chroma
5
  from langchain_community.embeddings import HuggingFaceEmbeddings
6
  from langchain_community.retrievers import BM25Retriever
7
-
 
 
8
  import os
9
 
10
  def split_with_source(text, source):
11
  splitter = CharacterTextSplitter(
12
  separator = "\n",
13
- chunk_size = 512,
14
- chunk_overlap = 172,
 
15
  add_start_index = True,
16
  )
17
  documents = splitter.create_documents([text])
@@ -83,5 +86,23 @@ def load_the_bm25_retrieve(k = 3):
83
 
84
  return bm25_retriever
85
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
86
 
87
 
 
4
  from langchain_community.vectorstores import Chroma
5
  from langchain_community.embeddings import HuggingFaceEmbeddings
6
  from langchain_community.retrievers import BM25Retriever
7
+ from langchain.llms import OpenAI
8
+ from langchain_openai import ChatOpenAI
9
+ from langchain.chains import RetrievalQA
10
  import os
11
 
12
  def split_with_source(text, source):
13
  splitter = CharacterTextSplitter(
14
  separator = "\n",
15
+ chunk_size = 256,
16
+ chunk_overlap = 0,
17
+ length_function = len,
18
  add_start_index = True,
19
  )
20
  documents = splitter.create_documents([text])
 
86
 
87
  return bm25_retriever
88
 
89
+ def get_qachain(llm_name = "gpt-3.5-turbo-0125", chain_type = "stuff", retriever = None, return_source_documents = True):
90
+ llm = ChatOpenAI(temperature=0,
91
+ model_name=llm_name)
92
+ return RetrievalQA.from_chain_type(llm=llm,
93
+ chain_type=chain_type,
94
+ retriever=retriever,
95
+ return_source_documents=return_source_documents)
96
+
97
+ def process_llm_response(llm_response):
98
+ print(llm_response['result'])
99
+ print('\n\nSources:')
100
+ for source in llm_response["source_documents"]:
101
+ print(source.metadata['source'])
102
+
103
+
104
+
105
+
106
+
107
 
108