eliujl commited on
Commit
4117986
1 Parent(s): 3607afa

Updated models and retrieval number

Browse files

Updated OpenAI models and Mistral models. Added a slider for number of retrieval when using retriever.

Files changed (1) hide show
  1. app.py +7 -6
app.py CHANGED
@@ -23,12 +23,12 @@ import json
23
  OPENAI_API_KEY = ''
24
  PINECONE_API_KEY = ''
25
  PINECONE_API_ENV = ''
26
- gpt3p5 = 'gpt-3.5-turbo-1106'
27
- gpt4 = 'gpt-4-1106-preview'
28
  local_model_tuples = [
29
  (0, 'mistral_7b', "TheBloke/OpenHermes-2-Mistral-7B-GGUF", "openhermes-2-mistral-7b.Q8_0.gguf", "mistral", "https://huggingface.co/TheBloke/OpenHermes-2-Mistral-7B-GGUF"),
30
- (1, 'mistral_7b_inst_small', "TheBloke/Mistral-7B-Instruct-v0.1-GGUF", "mistral-7b-instruct-v0.1.Q2_K.gguf", "mistral", "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF"),
31
- (2, 'mistral_7b_inst_med', "TheBloke/Mistral-7B-Instruct-v0.1-GGUF", "mistral-7b-instruct-v0.1.Q8_0.gguf", "mistral", "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.1-GGUF"),
32
  (3, 'llama_13b_small', "TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q4_K_M.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
33
  (4, 'llama_13b_med', "TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q8_0.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
34
  (5, 'mixtral', "TheBloke/Mixtral-8x7B-v0.1-GGUF", "mixtral-8x7b-v0.1.Q8_0.gguf", "mixtral", "https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF"),
@@ -320,7 +320,8 @@ def main(pinecone_index_name, chroma_collection_name, persist_directory, docsear
320
  temperature = st.slider('Temperature', 0.0, 1.0, 0.1)
321
  if usage == 'RAG':
322
  r_pinecone = st.radio('Vector store:', ('Pinecone (online)', 'Chroma (local)'))
323
- k_sources = st.slider('# source(s) to print out', 0, 20, 2)
 
324
  r_ingest = st.radio('Ingest file(s)?', ('Yes', 'No'))
325
  if r_pinecone == 'Pinecone (online)':
326
  use_pinecone = True
@@ -399,7 +400,7 @@ def main(pinecone_index_name, chroma_collection_name, persist_directory, docsear
399
  memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True, output_key='answer')
400
  if usage == 'RAG':
401
  # number of sources (split-documents when ingesting files); default is 4
402
- k = min([20, n_texts])
403
  retriever = setup_retriever(docsearch, k)
404
  CRqa = ConversationalRetrievalChain.from_llm(
405
  llm,
 
23
  OPENAI_API_KEY = ''
24
  PINECONE_API_KEY = ''
25
  PINECONE_API_ENV = ''
26
+ gpt3p5 = 'gpt-3.5-turbo-0125'
27
+ gpt4 = 'gpt-4-0125-preview'
28
  local_model_tuples = [
29
  (0, 'mistral_7b', "TheBloke/OpenHermes-2-Mistral-7B-GGUF", "openhermes-2-mistral-7b.Q8_0.gguf", "mistral", "https://huggingface.co/TheBloke/OpenHermes-2-Mistral-7B-GGUF"),
30
+ (1, 'mistral_7b_inst_small', "TheBloke/Mistral-7B-Instruct-v0.2-GGUF", "mistral-7b-instruct-v0.2.Q2_K.gguf", "mistral", "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF"),
31
+ (2, 'mistral_7b_inst_med', "TheBloke/Mistral-7B-Instruct-v0.2-GGUF", "mistral-7b-instruct-v0.2.Q8_0.gguf", "mistral", "https://huggingface.co/TheBloke/Mistral-7B-Instruct-v0.2-GGUF"),
32
  (3, 'llama_13b_small', "TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q4_K_M.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
33
  (4, 'llama_13b_med', "TheBloke/Llama-2-13B-chat-GGUF", "llama-2-13b-chat.Q8_0.gguf", "llama", "https://huggingface.co/TheBloke/Llama-2-13B-chat-GGUF"),
34
  (5, 'mixtral', "TheBloke/Mixtral-8x7B-v0.1-GGUF", "mixtral-8x7b-v0.1.Q8_0.gguf", "mixtral", "https://huggingface.co/TheBloke/Mixtral-8x7B-v0.1-GGUF"),
 
320
  temperature = st.slider('Temperature', 0.0, 1.0, 0.1)
321
  if usage == 'RAG':
322
  r_pinecone = st.radio('Vector store:', ('Pinecone (online)', 'Chroma (local)'))
323
+ k_retrieval = st.slider('# source chunk(s) to retrieve', 1, 80, 20)
324
+ k_sources = st.slider('# source chunk(s) to print', 0, 20, 2)
325
  r_ingest = st.radio('Ingest file(s)?', ('Yes', 'No'))
326
  if r_pinecone == 'Pinecone (online)':
327
  use_pinecone = True
 
400
  memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True, output_key='answer')
401
  if usage == 'RAG':
402
  # number of sources (split-documents when ingesting files); default is 4
403
+ k = min([k_retrieval, n_texts])
404
  retriever = setup_retriever(docsearch, k)
405
  CRqa = ConversationalRetrievalChain.from_llm(
406
  llm,