Update functions.py
Browse files- functions.py +69 -29
functions.py
CHANGED
@@ -26,7 +26,7 @@ from pyvis.network import Network
|
|
26 |
import torch
|
27 |
from pydub import AudioSegment
|
28 |
from langchain.docstore.document import Document
|
29 |
-
from langchain.embeddings import HuggingFaceEmbeddings,HuggingFaceInstructEmbeddings
|
30 |
from langchain.vectorstores import FAISS
|
31 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
32 |
from langchain.chat_models import ChatOpenAI
|
@@ -34,7 +34,13 @@ from langchain.callbacks import StdOutCallbackHandler
|
|
34 |
from langchain.chains import ConversationalRetrievalChain, QAGenerationChain, LLMChain
|
35 |
from langchain.memory import ConversationBufferMemory
|
36 |
from langchain.chains.question_answering import load_qa_chain
|
37 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
38 |
|
39 |
from langchain.prompts.chat import (
|
40 |
ChatPromptTemplate,
|
@@ -97,6 +103,7 @@ def load_prompt():
|
|
97 |
|
98 |
return prompt
|
99 |
|
|
|
100 |
###################### Functions #######################################################################################
|
101 |
|
102 |
# @st.cache_data
|
@@ -528,6 +535,30 @@ def generate_eval(raw_text, N, chunk):
|
|
528 |
|
529 |
return eval_set_full
|
530 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
531 |
@st.cache_resource
|
532 |
def gen_embeddings(embedding_model):
|
533 |
|
@@ -539,14 +570,21 @@ def gen_embeddings(embedding_model):
|
|
539 |
query_instruction='Represent the Financial question for retrieving supporting paragraphs: ',
|
540 |
embed_instruction='Represent the Financial paragraph for retrieval: ')
|
541 |
|
542 |
-
|
543 |
|
544 |
embeddings = HuggingFaceEmbeddings(model_name=embedding_model)
|
545 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
546 |
return embeddings
|
547 |
|
548 |
@st.cache_data
|
549 |
-
def
|
550 |
|
551 |
'''Process text for Semantic Search'''
|
552 |
|
@@ -560,34 +598,36 @@ def process_corpus(corpus, title, embedding_model, chunk_size=1000, overlap=50):
|
|
560 |
|
561 |
return vectorstore
|
562 |
|
563 |
-
def
|
564 |
|
565 |
'''Embed text and generate semantic search scores'''
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
566 |
|
567 |
-
|
568 |
-
|
569 |
-
|
570 |
-
|
571 |
-
|
572 |
-
|
573 |
-
|
574 |
-
|
575 |
-
|
576 |
-
|
577 |
-
# return_source_documents=True)
|
578 |
-
|
579 |
-
question_generator = LLMChain(llm=chat_llm, prompt=CONDENSE_QUESTION_PROMPT)
|
580 |
-
doc_chain = load_qa_chain(llm=chat_llm,chain_type="stuff",prompt=load_prompt())
|
581 |
-
chain = ConversationalRetrievalChain(retriever=_docsearch.as_retriever(search_kwags={"k": 3}),
|
582 |
-
question_generator=question_generator,
|
583 |
-
combine_docs_chain=doc_chain,
|
584 |
-
memory=memory,
|
585 |
-
return_source_documents=True,
|
586 |
-
get_chat_history=lambda h :h)
|
587 |
-
|
588 |
-
answer = chain({"question": query})
|
589 |
-
|
590 |
-
return answer
|
591 |
|
592 |
@st.cache_data
|
593 |
def gen_sentiment(text):
|
|
|
26 |
import torch
|
27 |
from pydub import AudioSegment
|
28 |
from langchain.docstore.document import Document
|
29 |
+
from langchain.embeddings import HuggingFaceEmbeddings, HuggingFaceBgeEmbeddings, HuggingFaceInstructEmbeddings
|
30 |
from langchain.vectorstores import FAISS
|
31 |
from langchain.text_splitter import RecursiveCharacterTextSplitter
|
32 |
from langchain.chat_models import ChatOpenAI
|
|
|
34 |
from langchain.chains import ConversationalRetrievalChain, QAGenerationChain, LLMChain
|
35 |
from langchain.memory import ConversationBufferMemory
|
36 |
from langchain.chains.question_answering import load_qa_chain
|
37 |
+
|
38 |
+
from langchain.callbacks import StreamlitCallbackHandler
|
39 |
+
from langchain.agents import OpenAIFunctionsAgent, AgentExecutor
|
40 |
+
from langchain.agents.agent_toolkits import create_retriever_tool
|
41 |
+
from langchain.agents.openai_functions_agent.agent_token_buffer_memory import (
|
42 |
+
AgentTokenBufferMemory,
|
43 |
+
)
|
44 |
|
45 |
from langchain.prompts.chat import (
|
46 |
ChatPromptTemplate,
|
|
|
103 |
|
104 |
return prompt
|
105 |
|
106 |
+
|
107 |
###################### Functions #######################################################################################
|
108 |
|
109 |
# @st.cache_data
|
|
|
535 |
|
536 |
return eval_set_full
|
537 |
|
538 |
+
@st.cache_resource
|
539 |
+
def create_prompt_and_llm():
|
540 |
+
'''Create prompt'''
|
541 |
+
|
542 |
+
llm = ChatOpenAI(temperature=0, streaming=True, model="gpt-4")
|
543 |
+
|
544 |
+
message = SystemMessage(
|
545 |
+
content=(
|
546 |
+
"You are a helpful chatbot who is tasked with answering questions acuurately about earnings call transcript provided. "
|
547 |
+
"Unless otherwise explicitly stated, it is probably fair to assume that questions are about the earnings call transcript. "
|
548 |
+
"If there is any ambiguity, you probably assume they are about that."
|
549 |
+
"Do not use any information not provided in the earnings context and remember you are a to speak like a finance expert."
|
550 |
+
"If you don't know the answer, just say 'There is no relevant answer in the given earnings call transcript'"
|
551 |
+
"don't try to make up an answer"
|
552 |
+
)
|
553 |
+
)
|
554 |
+
|
555 |
+
prompt = OpenAIFunctionsAgent.create_prompt(
|
556 |
+
system_message=message,
|
557 |
+
extra_prompt_messages=[MessagesPlaceholder(variable_name="history")],
|
558 |
+
)
|
559 |
+
|
560 |
+
return prompt, llm
|
561 |
+
|
562 |
@st.cache_resource
|
563 |
def gen_embeddings(embedding_model):
|
564 |
|
|
|
570 |
query_instruction='Represent the Financial question for retrieving supporting paragraphs: ',
|
571 |
embed_instruction='Represent the Financial paragraph for retrieval: ')
|
572 |
|
573 |
+
elif 'mpnet' in embedding_model:
|
574 |
|
575 |
embeddings = HuggingFaceEmbeddings(model_name=embedding_model)
|
576 |
|
577 |
+
elif 'FlagEmbedding' in embedding_model:
|
578 |
+
|
579 |
+
encode_kwargs = {'normalize_embeddings': True}
|
580 |
+
embeddings = HuggingFaceBgeEmbeddings(model_name=embedding_model,
|
581 |
+
encode_kwargs = encode_kwargs
|
582 |
+
)
|
583 |
+
|
584 |
return embeddings
|
585 |
|
586 |
@st.cache_data
|
587 |
+
def create_vectorstore(corpus, title, embedding_model, chunk_size=1000, overlap=50):
|
588 |
|
589 |
'''Process text for Semantic Search'''
|
590 |
|
|
|
598 |
|
599 |
return vectorstore
|
600 |
|
601 |
+
def create_memory_and_agent(query,_docsearch):
|
602 |
|
603 |
'''Embed text and generate semantic search scores'''
|
604 |
+
|
605 |
+
#create vectorstore
|
606 |
+
vectorstore = _docsearch.as_retriever(search_kwargs={"k": 4})
|
607 |
+
|
608 |
+
#create retriever tool
|
609 |
+
tool = create_retriever_tool(
|
610 |
+
vectorstore,
|
611 |
+
"earnings_call_search",
|
612 |
+
"Searches and returns documents using the earnings context provided as a source, relevant to the user input question.",
|
613 |
+
)
|
614 |
+
|
615 |
+
tools = [tool]
|
616 |
+
|
617 |
+
prompt,llm = create_prompt_and_llm()
|
618 |
+
|
619 |
+
agent = OpenAIFunctionsAgent(llm=llm, tools=tools, prompt=prompt)
|
620 |
|
621 |
+
agent_executor = AgentExecutor(
|
622 |
+
agent=agent,
|
623 |
+
tools=tools,
|
624 |
+
verbose=True,
|
625 |
+
return_intermediate_steps=True,
|
626 |
+
)
|
627 |
+
|
628 |
+
memory = AgentTokenBufferMemory(llm=llm)
|
629 |
+
|
630 |
+
return memory, agent_executor
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
631 |
|
632 |
@st.cache_data
|
633 |
def gen_sentiment(text):
|