Spaces:

nickmuchi
/

Earnings-Call-Analysis-Whisperer

Running

App Files Files Community

nickmuchi commited on Aug 12, 2023

Commit

b3f5eda

•

1 Parent(s): 882e722

Update functions.py

Browse files

Files changed (1) hide show

functions.py +69 -29

functions.py CHANGED Viewed

@@ -26,7 +26,7 @@ from pyvis.network import Network
 import torch
 from pydub import AudioSegment
 from langchain.docstore.document import Document
-from langchain.embeddings import HuggingFaceEmbeddings,HuggingFaceInstructEmbeddings
 from langchain.vectorstores import FAISS
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.chat_models import ChatOpenAI
@@ -34,7 +34,13 @@ from langchain.callbacks import StdOutCallbackHandler
 from langchain.chains import ConversationalRetrievalChain, QAGenerationChain, LLMChain
 from langchain.memory import ConversationBufferMemory
 from langchain.chains.question_answering import load_qa_chain
-from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT
 from langchain.prompts.chat import (
     ChatPromptTemplate,
@@ -97,6 +103,7 @@ def load_prompt():
     return prompt
 ###################### Functions #######################################################################################
 # @st.cache_data
@@ -528,6 +535,30 @@ def generate_eval(raw_text, N, chunk):
     return eval_set_full
 @st.cache_resource
 def gen_embeddings(embedding_model):
@@ -539,14 +570,21 @@ def gen_embeddings(embedding_model):
                                            query_instruction='Represent the Financial question for retrieving supporting paragraphs: ',
                                            embed_instruction='Represent the Financial paragraph for retrieval: ')
-    else:
         embeddings = HuggingFaceEmbeddings(model_name=embedding_model)
     return embeddings
 @st.cache_data
-def process_corpus(corpus, title, embedding_model, chunk_size=1000, overlap=50):
     '''Process text for Semantic Search'''
@@ -560,34 +598,36 @@ def process_corpus(corpus, title, embedding_model, chunk_size=1000, overlap=50):
     return vectorstore
-def embed_text(query,_docsearch):
     '''Embed text and generate semantic search scores'''
-    # llm = OpenAI(temperature=0)
-    chat_llm = ChatOpenAI(streaming=True,
-                          model_name = 'gpt-4',
-                          callbacks=[StdOutCallbackHandler()],
-                          verbose=True,
-                          temperature=0
-                         )
-    # chain = RetrievalQA.from_chain_type(llm=chat_llm, chain_type="stuff",
-    #                              retriever=_docsearch.as_retriever(),
-    #                              return_source_documents=True)
-    question_generator = LLMChain(llm=chat_llm, prompt=CONDENSE_QUESTION_PROMPT)
-    doc_chain = load_qa_chain(llm=chat_llm,chain_type="stuff",prompt=load_prompt())
-    chain = ConversationalRetrievalChain(retriever=_docsearch.as_retriever(search_kwags={"k": 3}),
-                                     question_generator=question_generator,
-                                     combine_docs_chain=doc_chain,
-                                     memory=memory,
-                                     return_source_documents=True,
-                                     get_chat_history=lambda h :h)
-    answer = chain({"question": query})
-    return answer
 @st.cache_data
 def gen_sentiment(text):

 import torch
 from pydub import AudioSegment
 from langchain.docstore.document import Document
+from langchain.embeddings import HuggingFaceEmbeddings, HuggingFaceBgeEmbeddings, HuggingFaceInstructEmbeddings
 from langchain.vectorstores import FAISS
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.chat_models import ChatOpenAI
 from langchain.chains import ConversationalRetrievalChain, QAGenerationChain, LLMChain
 from langchain.memory import ConversationBufferMemory
 from langchain.chains.question_answering import load_qa_chain
+from langchain.callbacks import StreamlitCallbackHandler
+from langchain.agents import OpenAIFunctionsAgent, AgentExecutor
+from langchain.agents.agent_toolkits import create_retriever_tool
+from langchain.agents.openai_functions_agent.agent_token_buffer_memory import (
+    AgentTokenBufferMemory,
+)
 from langchain.prompts.chat import (
     ChatPromptTemplate,
     return prompt
 ###################### Functions #######################################################################################
 # @st.cache_data
     return eval_set_full
+@st.cache_resource
+def create_prompt_and_llm():
+    '''Create prompt'''
+    llm = ChatOpenAI(temperature=0, streaming=True, model="gpt-4")
+    message = SystemMessage(
+        content=(
+            "You are a helpful chatbot who is tasked with answering questions acuurately about earnings call transcript provided. "
+            "Unless otherwise explicitly stated, it is probably fair to assume that questions are about the earnings call transcript. "
+            "If there is any ambiguity, you probably assume they are about that."
+            "Do not use any information not provided in the earnings context and remember you are a to speak like a finance expert."
+            "If you don't know the answer, just say 'There is no relevant answer in the given earnings call transcript'"
+            "don't try to make up an answer"
+        )
+    )
+    prompt = OpenAIFunctionsAgent.create_prompt(
+        system_message=message,
+        extra_prompt_messages=[MessagesPlaceholder(variable_name="history")],
+    )
+    return prompt, llm
 @st.cache_resource
 def gen_embeddings(embedding_model):
                                            query_instruction='Represent the Financial question for retrieving supporting paragraphs: ',
                                            embed_instruction='Represent the Financial paragraph for retrieval: ')
+    elif 'mpnet' in embedding_model:
         embeddings = HuggingFaceEmbeddings(model_name=embedding_model)
+    elif 'FlagEmbedding' in embedding_model:
+        encode_kwargs = {'normalize_embeddings': True}
+        embeddings = HuggingFaceBgeEmbeddings(model_name=embedding_model,
+                                                   encode_kwargs = encode_kwargs
+                                                   )
     return embeddings
 @st.cache_data
+def create_vectorstore(corpus, title, embedding_model, chunk_size=1000, overlap=50):
     '''Process text for Semantic Search'''
     return vectorstore
+def create_memory_and_agent(query,_docsearch):
     '''Embed text and generate semantic search scores'''
+    #create vectorstore
+    vectorstore = _docsearch.as_retriever(search_kwargs={"k": 4})
+    #create retriever tool
+    tool = create_retriever_tool(
+    vectorstore,
+    "earnings_call_search",
+    "Searches and returns documents using the earnings context provided as a source, relevant to the user input question.",
+    )
+    tools = [tool]
+    prompt,llm = create_prompt_and_llm()
+    agent = OpenAIFunctionsAgent(llm=llm, tools=tools, prompt=prompt)
+    agent_executor = AgentExecutor(
+        agent=agent,
+        tools=tools,
+        verbose=True,
+        return_intermediate_steps=True,
+    )
+    memory = AgentTokenBufferMemory(llm=llm)
+    return memory, agent_executor
 @st.cache_data
 def gen_sentiment(text):