fracapuano commited on
Commit
60017a4
1 Parent(s): 7a7c4d5

fix: minor QA pipeline fix

Browse files
Files changed (1) hide show
  1. qa/utils.py +11 -7
qa/utils.py CHANGED
@@ -4,6 +4,7 @@ from langchain import OpenAI
4
  from langchain.chains.qa_with_sources import load_qa_with_sources_chain
5
  from langchain.embeddings.openai import OpenAIEmbeddings
6
  from langchain.llms import OpenAI
 
7
  from langchain.docstore.document import Document
8
  from langchain.vectorstores import FAISS, VectorStore
9
  import docx2txt
@@ -13,7 +14,7 @@ from io import BytesIO
13
  import streamlit as st
14
  from .prompts import STUFF_PROMPT
15
  from pypdf import PdfReader
16
- from openai.error import AuthenticationError
17
 
18
  class PDFFile:
19
  """A PDF file class for typing purposes."""
@@ -140,6 +141,7 @@ def embed_docs(_docs: Tuple[Document]) -> VectorStore:
140
 
141
  return index
142
 
 
143
  @st.cache_data
144
  def search_docs(_index: VectorStore, query: str, k:int=5) -> List[Document]:
145
  """Searches a FAISS index for similar chunks to the query
@@ -153,13 +155,15 @@ def search_docs(_index: VectorStore, query: str, k:int=5) -> List[Document]:
153
  @st.cache_data
154
  def get_answer(_docs: List[Document], query: str) -> Dict[str, Any]:
155
  """Gets an answer to a question from a list of Documents."""
156
- # Get the answer
 
 
157
  chain = load_qa_with_sources_chain(
158
- OpenAI(temperature=0,
159
- openai_api_key=st.session_state.get("OPENAI_API_KEY")),
160
- chain_type="stuff",
161
- prompt=STUFF_PROMPT
162
- )
163
  # also returnig the text of the source used to form the answer
164
  answer = chain(
165
  {"input_documents": _docs, "question": query}
 
4
  from langchain.chains.qa_with_sources import load_qa_with_sources_chain
5
  from langchain.embeddings.openai import OpenAIEmbeddings
6
  from langchain.llms import OpenAI
7
+ from langchain.chat_models import ChatOpenAI
8
  from langchain.docstore.document import Document
9
  from langchain.vectorstores import FAISS, VectorStore
10
  import docx2txt
 
14
  import streamlit as st
15
  from .prompts import STUFF_PROMPT
16
  from pypdf import PdfReader
17
+ from langchain.memory import ConversationBufferWindowMemory
18
 
19
  class PDFFile:
20
  """A PDF file class for typing purposes."""
 
141
 
142
  return index
143
 
144
+
145
  @st.cache_data
146
  def search_docs(_index: VectorStore, query: str, k:int=5) -> List[Document]:
147
  """Searches a FAISS index for similar chunks to the query
 
155
  @st.cache_data
156
  def get_answer(_docs: List[Document], query: str) -> Dict[str, Any]:
157
  """Gets an answer to a question from a list of Documents."""
158
+ memory = ConversationBufferWindowMemory(k=5, input_key="question") # only considering the last 5 messages
159
+
160
+ # Create the chain to be used in this specific setting
161
  chain = load_qa_with_sources_chain(
162
+ ChatOpenAI(temperature=0, openai_api_key=st.session_state.get("OPENAI_API_KEY"), model="gpt-4", streaming=True),
163
+ chain_type="stuff",
164
+ prompt=STUFF_PROMPT,
165
+ # memory=memory NOTE: As of Aug 2023, memory is not supported in the QA chain - uncomment this line when it is supported
166
+ )
167
  # also returnig the text of the source used to form the answer
168
  answer = chain(
169
  {"input_documents": _docs, "question": query}