|
from langchain import LLMChain, PromptTemplate |
|
from langchain.document_loaders import NotionDirectoryLoader |
|
from langchain.text_splitter import MarkdownTextSplitter, SpacyTextSplitter |
|
from langchain.embeddings import HuggingFaceEmbeddings |
|
from langchain.vectorstores import FAISS |
|
from langchain.chains import RetrievalQA |
|
from langchain.chains.question_answering import load_qa_chain |
|
|
|
from langchain.document_loaders import NotionDirectoryLoader |
|
from langchain.chat_models import ChatOpenAI |
|
from langchain.memory import ConversationBufferMemory |
|
from langchain.chains import ConversationalRetrievalChain |
|
|
|
|
|
class CustomEmbedding: |
|
notionDirectoryLoader = NotionDirectoryLoader( |
|
"documents/bussiness_context") |
|
embeddings = HuggingFaceEmbeddings() |
|
|
|
def calculateEmbedding(self): |
|
documents = self.notionDirectoryLoader.load() |
|
text_splitter = SpacyTextSplitter( |
|
chunk_size=2048, pipeline="zh_core_web_sm", chunk_overlap=0) |
|
|
|
|
|
|
|
texts = text_splitter.split_documents(documents) |
|
|
|
docsearch = FAISS.from_documents(texts, self.embeddings) |
|
docsearch.save_local( |
|
folder_path="./documents/business_context.faiss") |
|
|
|
|
|
|
|
def getFAQChain(self, llm=ChatOpenAI(temperature=0.7)): |
|
memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True) |
|
docsearch = FAISS.load_local( |
|
"./documents/business_context.faiss", self.embeddings) |
|
|
|
_template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question in chinese. |
|
|
|
Chat History: |
|
{chat_history} |
|
Follow Up Input: {question} |
|
Standalone question:""" |
|
CONDENSE_QUESTION_PROMPT = PromptTemplate.from_template(_template) |
|
question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT) |
|
|
|
doc_chain = load_qa_chain(llm, chain_type="map_reduce") |
|
qa = ConversationalRetrievalChain( retriever= docsearch.as_retriever(), |
|
question_generator=question_generator, |
|
combine_docs_chain=doc_chain, |
|
memory=memory) |
|
return qa |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|