Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -31,8 +31,8 @@ def get_pdf_text(pdf_docs):
|
|
31 |
|
32 |
def get_text_chunks(text):
|
33 |
text_splitter = CharacterTextSplitter(separator="\n",
|
34 |
-
chunk_size=
|
35 |
-
chunk_overlap=
|
36 |
length_function=len
|
37 |
)
|
38 |
chunks = text_splitter.split_text(text)
|
@@ -43,13 +43,14 @@ def get_text_chunks(text):
|
|
43 |
def get_vectorstore(text_chunks):
|
44 |
#embeddings = OpenAIEmbeddings()
|
45 |
embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
|
46 |
-
#
|
47 |
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
|
48 |
|
49 |
return vectorstore
|
50 |
|
51 |
|
52 |
def get_conversation_chain(vectorstore, model_name):
|
|
|
53 |
llm = LlamaCpp(model_path=model_name,
|
54 |
temperature=0.1,
|
55 |
top_k=30,
|
@@ -62,10 +63,12 @@ def get_conversation_chain(vectorstore, model_name):
|
|
62 |
|
63 |
#llm = ChatOpenAI()
|
64 |
|
65 |
-
memory = ConversationBufferMemory(memory_key='chat_history',
|
|
|
|
|
|
|
66 |
|
67 |
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm,
|
68 |
-
# condense_question_prompt=CONDENSE_QUESTION_PROMPT,
|
69 |
retriever=vectorstore.as_retriever(),
|
70 |
memory=memory,
|
71 |
return_source_documents=True
|
@@ -75,6 +78,7 @@ def get_conversation_chain(vectorstore, model_name):
|
|
75 |
|
76 |
|
77 |
def handle_userinput(user_question):
|
|
|
78 |
response = st.session_state.conversation({'question': user_question})
|
79 |
|
80 |
st.session_state.chat_history = response['chat_history']
|
@@ -111,7 +115,7 @@ if "chat_history" not in st.session_state:
|
|
111 |
st.session_state.chat_history = None
|
112 |
|
113 |
st.header("Chat with multiple PDFs :books:")
|
114 |
-
user_question = st.text_input("Ask a question about your documents:")
|
115 |
|
116 |
if user_question:
|
117 |
handle_userinput(user_question)
|
|
|
31 |
|
32 |
def get_text_chunks(text):
|
33 |
text_splitter = CharacterTextSplitter(separator="\n",
|
34 |
+
chunk_size=1000, # 1000
|
35 |
+
chunk_overlap=200, # 200
|
36 |
length_function=len
|
37 |
)
|
38 |
chunks = text_splitter.split_text(text)
|
|
|
43 |
def get_vectorstore(text_chunks):
|
44 |
#embeddings = OpenAIEmbeddings()
|
45 |
embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
|
46 |
+
#embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2")
|
47 |
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
|
48 |
|
49 |
return vectorstore
|
50 |
|
51 |
|
52 |
def get_conversation_chain(vectorstore, model_name):
|
53 |
+
|
54 |
llm = LlamaCpp(model_path=model_name,
|
55 |
temperature=0.1,
|
56 |
top_k=30,
|
|
|
63 |
|
64 |
#llm = ChatOpenAI()
|
65 |
|
66 |
+
memory = ConversationBufferMemory(memory_key='chat_history',
|
67 |
+
input_key='question',
|
68 |
+
output_key='answer',
|
69 |
+
return_messages=True)
|
70 |
|
71 |
conversation_chain = ConversationalRetrievalChain.from_llm(llm=llm,
|
|
|
72 |
retriever=vectorstore.as_retriever(),
|
73 |
memory=memory,
|
74 |
return_source_documents=True
|
|
|
78 |
|
79 |
|
80 |
def handle_userinput(user_question):
|
81 |
+
|
82 |
response = st.session_state.conversation({'question': user_question})
|
83 |
|
84 |
st.session_state.chat_history = response['chat_history']
|
|
|
115 |
st.session_state.chat_history = None
|
116 |
|
117 |
st.header("Chat with multiple PDFs :books:")
|
118 |
+
user_question = st.text_input("Ask a question about your documents: ")
|
119 |
|
120 |
if user_question:
|
121 |
handle_userinput(user_question)
|