nickmuchi commited on
Commit
b3f5eda
1 Parent(s): 882e722

Update functions.py

Browse files
Files changed (1) hide show
  1. functions.py +69 -29
functions.py CHANGED
@@ -26,7 +26,7 @@ from pyvis.network import Network
26
  import torch
27
  from pydub import AudioSegment
28
  from langchain.docstore.document import Document
29
- from langchain.embeddings import HuggingFaceEmbeddings,HuggingFaceInstructEmbeddings
30
  from langchain.vectorstores import FAISS
31
  from langchain.text_splitter import RecursiveCharacterTextSplitter
32
  from langchain.chat_models import ChatOpenAI
@@ -34,7 +34,13 @@ from langchain.callbacks import StdOutCallbackHandler
34
  from langchain.chains import ConversationalRetrievalChain, QAGenerationChain, LLMChain
35
  from langchain.memory import ConversationBufferMemory
36
  from langchain.chains.question_answering import load_qa_chain
37
- from langchain.chains.conversational_retrieval.prompts import CONDENSE_QUESTION_PROMPT
 
 
 
 
 
 
38
 
39
  from langchain.prompts.chat import (
40
  ChatPromptTemplate,
@@ -97,6 +103,7 @@ def load_prompt():
97
 
98
  return prompt
99
 
 
100
  ###################### Functions #######################################################################################
101
 
102
  # @st.cache_data
@@ -528,6 +535,30 @@ def generate_eval(raw_text, N, chunk):
528
 
529
  return eval_set_full
530
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
531
  @st.cache_resource
532
  def gen_embeddings(embedding_model):
533
 
@@ -539,14 +570,21 @@ def gen_embeddings(embedding_model):
539
  query_instruction='Represent the Financial question for retrieving supporting paragraphs: ',
540
  embed_instruction='Represent the Financial paragraph for retrieval: ')
541
 
542
- else:
543
 
544
  embeddings = HuggingFaceEmbeddings(model_name=embedding_model)
545
 
 
 
 
 
 
 
 
546
  return embeddings
547
 
548
  @st.cache_data
549
- def process_corpus(corpus, title, embedding_model, chunk_size=1000, overlap=50):
550
 
551
  '''Process text for Semantic Search'''
552
 
@@ -560,34 +598,36 @@ def process_corpus(corpus, title, embedding_model, chunk_size=1000, overlap=50):
560
 
561
  return vectorstore
562
 
563
- def embed_text(query,_docsearch):
564
 
565
  '''Embed text and generate semantic search scores'''
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
566
 
567
- # llm = OpenAI(temperature=0)
568
- chat_llm = ChatOpenAI(streaming=True,
569
- model_name = 'gpt-4',
570
- callbacks=[StdOutCallbackHandler()],
571
- verbose=True,
572
- temperature=0
573
- )
574
-
575
- # chain = RetrievalQA.from_chain_type(llm=chat_llm, chain_type="stuff",
576
- # retriever=_docsearch.as_retriever(),
577
- # return_source_documents=True)
578
-
579
- question_generator = LLMChain(llm=chat_llm, prompt=CONDENSE_QUESTION_PROMPT)
580
- doc_chain = load_qa_chain(llm=chat_llm,chain_type="stuff",prompt=load_prompt())
581
- chain = ConversationalRetrievalChain(retriever=_docsearch.as_retriever(search_kwags={"k": 3}),
582
- question_generator=question_generator,
583
- combine_docs_chain=doc_chain,
584
- memory=memory,
585
- return_source_documents=True,
586
- get_chat_history=lambda h :h)
587
-
588
- answer = chain({"question": query})
589
-
590
- return answer
591
 
592
  @st.cache_data
593
  def gen_sentiment(text):
 
26
  import torch
27
  from pydub import AudioSegment
28
  from langchain.docstore.document import Document
29
+ from langchain.embeddings import HuggingFaceEmbeddings, HuggingFaceBgeEmbeddings, HuggingFaceInstructEmbeddings
30
  from langchain.vectorstores import FAISS
31
  from langchain.text_splitter import RecursiveCharacterTextSplitter
32
  from langchain.chat_models import ChatOpenAI
 
34
  from langchain.chains import ConversationalRetrievalChain, QAGenerationChain, LLMChain
35
  from langchain.memory import ConversationBufferMemory
36
  from langchain.chains.question_answering import load_qa_chain
37
+
38
+ from langchain.callbacks import StreamlitCallbackHandler
39
+ from langchain.agents import OpenAIFunctionsAgent, AgentExecutor
40
+ from langchain.agents.agent_toolkits import create_retriever_tool
41
+ from langchain.agents.openai_functions_agent.agent_token_buffer_memory import (
42
+ AgentTokenBufferMemory,
43
+ )
44
 
45
  from langchain.prompts.chat import (
46
  ChatPromptTemplate,
 
103
 
104
  return prompt
105
 
106
+
107
  ###################### Functions #######################################################################################
108
 
109
  # @st.cache_data
 
535
 
536
  return eval_set_full
537
 
538
+ @st.cache_resource
539
+ def create_prompt_and_llm():
540
+ '''Create prompt'''
541
+
542
+ llm = ChatOpenAI(temperature=0, streaming=True, model="gpt-4")
543
+
544
+ message = SystemMessage(
545
+ content=(
546
+ "You are a helpful chatbot who is tasked with answering questions acuurately about earnings call transcript provided. "
547
+ "Unless otherwise explicitly stated, it is probably fair to assume that questions are about the earnings call transcript. "
548
+ "If there is any ambiguity, you probably assume they are about that."
549
+ "Do not use any information not provided in the earnings context and remember you are a to speak like a finance expert."
550
+ "If you don't know the answer, just say 'There is no relevant answer in the given earnings call transcript'"
551
+ "don't try to make up an answer"
552
+ )
553
+ )
554
+
555
+ prompt = OpenAIFunctionsAgent.create_prompt(
556
+ system_message=message,
557
+ extra_prompt_messages=[MessagesPlaceholder(variable_name="history")],
558
+ )
559
+
560
+ return prompt, llm
561
+
562
  @st.cache_resource
563
  def gen_embeddings(embedding_model):
564
 
 
570
  query_instruction='Represent the Financial question for retrieving supporting paragraphs: ',
571
  embed_instruction='Represent the Financial paragraph for retrieval: ')
572
 
573
+ elif 'mpnet' in embedding_model:
574
 
575
  embeddings = HuggingFaceEmbeddings(model_name=embedding_model)
576
 
577
+ elif 'FlagEmbedding' in embedding_model:
578
+
579
+ encode_kwargs = {'normalize_embeddings': True}
580
+ embeddings = HuggingFaceBgeEmbeddings(model_name=embedding_model,
581
+ encode_kwargs = encode_kwargs
582
+ )
583
+
584
  return embeddings
585
 
586
  @st.cache_data
587
+ def create_vectorstore(corpus, title, embedding_model, chunk_size=1000, overlap=50):
588
 
589
  '''Process text for Semantic Search'''
590
 
 
598
 
599
  return vectorstore
600
 
601
+ def create_memory_and_agent(query,_docsearch):
602
 
603
  '''Embed text and generate semantic search scores'''
604
+
605
+ #create vectorstore
606
+ vectorstore = _docsearch.as_retriever(search_kwargs={"k": 4})
607
+
608
+ #create retriever tool
609
+ tool = create_retriever_tool(
610
+ vectorstore,
611
+ "earnings_call_search",
612
+ "Searches and returns documents using the earnings context provided as a source, relevant to the user input question.",
613
+ )
614
+
615
+ tools = [tool]
616
+
617
+ prompt,llm = create_prompt_and_llm()
618
+
619
+ agent = OpenAIFunctionsAgent(llm=llm, tools=tools, prompt=prompt)
620
 
621
+ agent_executor = AgentExecutor(
622
+ agent=agent,
623
+ tools=tools,
624
+ verbose=True,
625
+ return_intermediate_steps=True,
626
+ )
627
+
628
+ memory = AgentTokenBufferMemory(llm=llm)
629
+
630
+ return memory, agent_executor
 
 
 
 
 
 
 
 
 
 
 
 
 
 
631
 
632
  @st.cache_data
633
  def gen_sentiment(text):