peichao.dong commited on
Commit
2a0c033
1 Parent(s): 066c6cf

update embedding

Browse files
app.py CHANGED
@@ -50,12 +50,13 @@ def feedBack(context, story, chatbot=[], input=""):
50
 
51
  customerEmbedding = CustomEmbedding()
52
 
53
- faqChain = customerEmbedding.getFAQChain()
54
 
55
  code_agent_executor = code_agent_executor()
56
  def faqFromLocal(input, chatbot=[]):
57
- response = faqChain({"question": f"{input}"})
58
- chatbot.append((input, response["answer"]))
 
59
  return chatbot, ""
60
 
61
 
 
50
 
51
  customerEmbedding = CustomEmbedding()
52
 
53
+ faqChain = customerEmbedding.getFAQAgent()
54
 
55
  code_agent_executor = code_agent_executor()
56
  def faqFromLocal(input, chatbot=[]):
57
+ # response = faqChain({"question": f"{input}"})
58
+ response = faqChain.run(input)
59
+ chatbot.append((input, response))
60
  return chatbot, ""
61
 
62
 
documents/abstract.faiss/index.faiss ADDED
Binary file (58.4 kB). View file
 
documents/abstract.faiss/index.pkl ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65b241ca9d637fc607f43c0190c682677b635dbd36cddb0b754c0f74ea6988da
3
+ size 26724
embedding.py CHANGED
@@ -9,34 +9,35 @@ from langchain.chains.question_answering import load_qa_chain
9
  from langchain.document_loaders import NotionDirectoryLoader
10
  from langchain.memory import ConversationBufferMemory
11
  from langchain.chains import ConversationalRetrievalChain
 
12
 
13
  from models import llm
14
 
15
 
16
  class CustomEmbedding:
17
  notionDirectoryLoader = NotionDirectoryLoader(
18
- "documents/bussiness_context")
19
  embeddings = HuggingFaceEmbeddings()
20
 
21
  def calculateEmbedding(self):
22
  documents = self.notionDirectoryLoader.load()
23
- text_splitter = SpacyTextSplitter(
24
- chunk_size=2048, pipeline="zh_core_web_sm", chunk_overlap=0)
25
 
26
- # text_splitter = MarkdownTextSplitter(
27
- # chunk_size=4000, chunk_overlap=0)
28
  texts = text_splitter.split_documents(documents)
29
 
30
  docsearch = FAISS.from_documents(texts, self.embeddings)
31
  docsearch.save_local(
32
- folder_path="./documents/business_context.faiss")
33
 
34
 
35
 
36
  def getFAQChain(self, llm=llm(temperature=0.7)):
37
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
38
  docsearch = FAISS.load_local(
39
- "./documents/business_context.faiss", self.embeddings)
40
  # retriever = VectorStoreRetriever(vectorstore=docsearch)
41
  _template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question in chinese.
42
 
@@ -48,15 +49,45 @@ class CustomEmbedding:
48
  question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
49
 
50
  doc_chain = load_qa_chain(llm, chain_type="map_reduce")
51
- qa = ConversationalRetrievalChain( retriever= docsearch.as_retriever(),
52
  question_generator=question_generator,
53
  combine_docs_chain=doc_chain,
54
  memory=memory)
55
  return qa
56
 
 
 
 
 
57
 
58
- # customerEmbedding = CustomEmbedding()
59
- # # customerEmbedding.calculateEmbedding()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
  # # customerEmbedding.calculateNotionEmbedding()
61
 
62
  # faq_chain = customerEmbedding.getFAQChain()
 
9
  from langchain.document_loaders import NotionDirectoryLoader
10
  from langchain.memory import ConversationBufferMemory
11
  from langchain.chains import ConversationalRetrievalChain
12
+ from langchain.agents import initialize_agent, AgentType, Tool, ZeroShotAgent, AgentExecutor
13
 
14
  from models import llm
15
 
16
 
17
  class CustomEmbedding:
18
  notionDirectoryLoader = NotionDirectoryLoader(
19
+ "/Users/peichao.dong/Documents/projects/dpc/ABstract/docs/pages")
20
  embeddings = HuggingFaceEmbeddings()
21
 
22
  def calculateEmbedding(self):
23
  documents = self.notionDirectoryLoader.load()
24
+ # text_splitter = SpacyTextSplitter(
25
+ # chunk_size=2048, pipeline="zh_core_web_sm", chunk_overlap=0)
26
 
27
+ text_splitter = MarkdownTextSplitter(
28
+ chunk_size=2048, chunk_overlap=0)
29
  texts = text_splitter.split_documents(documents)
30
 
31
  docsearch = FAISS.from_documents(texts, self.embeddings)
32
  docsearch.save_local(
33
+ folder_path="./documents/abstract.faiss")
34
 
35
 
36
 
37
  def getFAQChain(self, llm=llm(temperature=0.7)):
38
  memory = ConversationBufferMemory(memory_key="chat_history", return_messages=True)
39
  docsearch = FAISS.load_local(
40
+ "./documents/abstract.faiss", self.embeddings)
41
  # retriever = VectorStoreRetriever(vectorstore=docsearch)
42
  _template = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question in chinese.
43
 
 
49
  question_generator = LLMChain(llm=llm, prompt=CONDENSE_QUESTION_PROMPT)
50
 
51
  doc_chain = load_qa_chain(llm, chain_type="map_reduce")
52
+ qa = ConversationalRetrievalChain( retriever= docsearch.as_retriever(search_kwargs={"k": 1}),
53
  question_generator=question_generator,
54
  combine_docs_chain=doc_chain,
55
  memory=memory)
56
  return qa
57
 
58
+ def faq(self, input):
59
+ qa = self.getFAQChain()
60
+ response = qa({"question": f"{input}"})
61
+ return response["answer"]
62
 
63
+ def getFAQAgent(self):
64
+ tools = [Tool(name="ABstract system FAQ", func= self.faq, description="Useful for anwer questions about ABstract system")]
65
+ memory = ConversationBufferMemory(memory_key="chat_history")
66
+
67
+ prefix = """Have a conversation with a human, answering the following questions as best you can. You have access to the following tools:"""
68
+ suffix = """Begin!"
69
+
70
+ {chat_history}
71
+ Question: {input}
72
+ {agent_scratchpad}"""
73
+
74
+ prompt = ZeroShotAgent.create_prompt(
75
+ tools,
76
+ prefix=prefix,
77
+ suffix=suffix,
78
+ input_variables=["input", "chat_history", "agent_scratchpad"]
79
+ )
80
+
81
+ llm_chain = LLMChain(llm=llm(), prompt=prompt)
82
+ agent = ZeroShotAgent(llm_chain=llm_chain, tools=tools, verbose=True)
83
+ faq_agent = AgentExecutor.from_agent_and_tools(agent=agent, tools=tools, verbose=True, memory=memory)
84
+ return faq_agent
85
+ # faq_agent = initialize_agent(tools= tools, llm=llm(), agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION, verbose=True)
86
+
87
+
88
+ if __name__ == "__main__":
89
+ customerEmbedding = CustomEmbedding()
90
+ customerEmbedding.calculateEmbedding()
91
  # # customerEmbedding.calculateNotionEmbedding()
92
 
93
  # faq_chain = customerEmbedding.getFAQChain()