nklomp commited on
Commit
5bb9c79
1 Parent(s): b32c4b4

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +26 -11
app.py CHANGED
@@ -10,7 +10,8 @@ from langchain_community.vectorstores import FAISS
10
  from langchain.memory import ConversationBufferMemory
11
  from langchain.chains import ConversationalRetrievalChain
12
  from htmlTemplates import css, bot_template, user_template
13
- from langchain_community.llms import HuggingFaceHub
 
14
 
15
  #Llama2
16
  import torch
@@ -33,8 +34,8 @@ def get_pdf_text(pdf_docs):
33
  def get_text_chunks(text):
34
  text_splitter = CharacterTextSplitter(
35
  separator="\n",
36
- chunk_size=1000, # the character length of the chunck
37
- chunk_overlap=200, # the character length of the overlap between chuncks
38
  length_function=len # the length function - in this case, character length (aka the python len() fn.)
39
  )
40
  chunks = text_splitter.split_text(text)
@@ -76,14 +77,14 @@ def load_vectorstore(text_chunks,selected_embedding):
76
  return vectorstore
77
 
78
  def get_conversation_chain(vectorstore,selected_llm,selected_temperature):
79
- print('Seleted LLM: ' + selected_llm)
80
  print('Selected Temperature: ' + str(selected_temperature))
81
 
82
  if selected_llm == 'GPT 3.5':
83
  #openai_model = "gpt-4-turbo-preview"
84
  openai_model = "gpt-3.5-turbo"
85
  llm = ChatOpenAI(model=openai_model,temperature=selected_temperature)
86
- elif selected_llm == 'Llama2':
87
 
88
  model_id = 'meta-llama/Llama-2-7b-chat-hf'
89
  hf_auth = os.environ.get("HUGGINGFACEHUB_API_TOKEN")
@@ -144,18 +145,25 @@ def get_conversation_chain(vectorstore,selected_llm,selected_temperature):
144
 
145
  llm = HuggingFacePipeline(pipeline=pipeline)
146
 
147
-
 
 
 
 
 
 
148
 
149
  # Generic LLM
150
  memory = ConversationBufferMemory(
151
- memory_key='chat_history', return_messages=True)
152
 
153
 
154
  conversation_chain = ConversationalRetrievalChain.from_llm(
155
  llm=llm,
156
  retriever=vectorstore.as_retriever(),
157
  memory=memory,
158
- return_source_documents=False
 
159
  )
160
  #print(conversation_chain)
161
 
@@ -164,10 +172,17 @@ def get_conversation_chain(vectorstore,selected_llm,selected_temperature):
164
 
165
  def handle_userinput(user_question):
166
 
167
- print('Question: ' + user_question)
168
  response = st.session_state.conversation({'question': user_question})
169
- st.session_state.chat_history = response['chat_history']
170
 
 
 
 
 
 
 
 
 
171
 
172
  for i, message in enumerate(st.session_state.chat_history):
173
  if i % 2 == 0:
@@ -197,7 +212,7 @@ def main():
197
  "Upload your new PDFs here and click on 'Process' or load the last upload by clicking on 'Load'", accept_multiple_files=True)
198
 
199
  selected_embedding = st.radio("Which Embedding?",["Cohere-multilingual-v3.0","OpenAI", "Instructor-xl"])
200
- selected_llm = st.radio("Which LLM?",["GPT 3.5", "Llama2"])
201
  selected_temperature = st.slider('Temperature?', 0.0, 1.0, 0.1)
202
 
203
  if st.button("Process"):
 
10
  from langchain.memory import ConversationBufferMemory
11
  from langchain.chains import ConversationalRetrievalChain
12
  from htmlTemplates import css, bot_template, user_template
13
+ from langchain_community.llms import HuggingFaceHub,HuggingFaceTextGenInference
14
+
15
 
16
  #Llama2
17
  import torch
 
34
  def get_text_chunks(text):
35
  text_splitter = CharacterTextSplitter(
36
  separator="\n",
37
+ chunk_size=500, # the character length of the chunck
38
+ chunk_overlap=100, # the character length of the overlap between chuncks
39
  length_function=len # the length function - in this case, character length (aka the python len() fn.)
40
  )
41
  chunks = text_splitter.split_text(text)
 
77
  return vectorstore
78
 
79
  def get_conversation_chain(vectorstore,selected_llm,selected_temperature):
80
+ print('Selected LLM: ' + selected_llm)
81
  print('Selected Temperature: ' + str(selected_temperature))
82
 
83
  if selected_llm == 'GPT 3.5':
84
  #openai_model = "gpt-4-turbo-preview"
85
  openai_model = "gpt-3.5-turbo"
86
  llm = ChatOpenAI(model=openai_model,temperature=selected_temperature)
87
+ elif selected_llm == 'Llama2 local':
88
 
89
  model_id = 'meta-llama/Llama-2-7b-chat-hf'
90
  hf_auth = os.environ.get("HUGGINGFACEHUB_API_TOKEN")
 
145
 
146
  llm = HuggingFacePipeline(pipeline=pipeline)
147
 
148
+ elif selected_llm == 'Llama2 inference':
149
+ llm = HuggingFaceTextGenInference(
150
+ inference_server_url=os.environ.get("INFERENCE_URL"),
151
+ max_new_tokens=50,
152
+ timeout=1200,
153
+ temperature=selected_temperature
154
+ )
155
 
156
  # Generic LLM
157
  memory = ConversationBufferMemory(
158
+ memory_key='chat_history', return_messages=True, output_key='answer')
159
 
160
 
161
  conversation_chain = ConversationalRetrievalChain.from_llm(
162
  llm=llm,
163
  retriever=vectorstore.as_retriever(),
164
  memory=memory,
165
+ return_source_documents=True,
166
+ verbose=True,
167
  )
168
  #print(conversation_chain)
169
 
 
172
 
173
  def handle_userinput(user_question):
174
 
175
+ #print('Question: ' + user_question)
176
  response = st.session_state.conversation({'question': user_question})
 
177
 
178
+ anser = response.get("answer")
179
+ sources = response.get("source_documents", [])
180
+ #print('Answer: ' + anser)
181
+ #print('Sources: ' + str(sources))
182
+ with st.expander("Sources"):
183
+ st.write(str(sources))
184
+
185
+ st.session_state.chat_history = response['chat_history']
186
 
187
  for i, message in enumerate(st.session_state.chat_history):
188
  if i % 2 == 0:
 
212
  "Upload your new PDFs here and click on 'Process' or load the last upload by clicking on 'Load'", accept_multiple_files=True)
213
 
214
  selected_embedding = st.radio("Which Embedding?",["Cohere-multilingual-v3.0","OpenAI", "Instructor-xl"])
215
+ selected_llm = st.radio("Which LLM?",["GPT 3.5", "Llama2 local" ,"Llama2 inference"])
216
  selected_temperature = st.slider('Temperature?', 0.0, 1.0, 0.1)
217
 
218
  if st.button("Process"):