ryanrwatkins commited on
Commit
f527f6a
1 Parent(s): 4ef51a0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -224
app.py CHANGED
@@ -74,10 +74,11 @@ from langchain.memory import ConversationSummaryBufferMemory,ConversationBufferM
74
  from langchain.schema import Document
75
 
76
 
77
- # Cohere
78
  from langchain.retrievers.document_compressors import CohereRerank
79
  from langchain_community.llms import Cohere
80
 
 
81
  openai_api_key = os.environ['openai_key']
82
  google_api_key = os.environ['gemini_key']
83
  HF_key = os.environ['HF_token']
@@ -86,12 +87,12 @@ cohere_api_key = os.environ['cohere_api']
86
  current_dir = os.getcwd()
87
 
88
 
89
-
90
-
91
  prompt_templates = {"All Needs Experts": "Respond as if you are combination of all needs assessment experts."}
92
  actor_description = {"All Needs Experts": "<div style='float: left;margin: 0px 5px 0px 5px;'><img src='https://na.weshareresearch.com/wp-content/uploads/2023/04/experts2.jpg' alt='needs expert image' style='width:70px;align:top;'></div>A combination of all needs assessment experts."}
93
 
94
 
 
95
 
96
  def get_empty_state():
97
  return { "messages": []}
@@ -129,17 +130,6 @@ def on_prompt_template_change_description(prompt_template):
129
 
130
 
131
 
132
-
133
-
134
-
135
-
136
-
137
-
138
-
139
-
140
-
141
-
142
-
143
  # set to load only PDF, but could change to set to specific directory, so that other files don't get embeddings
144
 
145
  def langchain_document_loader():
@@ -179,23 +169,21 @@ def langchain_document_loader():
179
  documents.extend(doc_loader.load())
180
  """
181
  return documents
 
182
 
183
 
184
-
185
- langchain_document_loader()
186
 
187
  text_splitter = RecursiveCharacterTextSplitter(
188
  separators = ["\n\n", "\n", " ", ""],
189
  chunk_size = 1500,
190
  chunk_overlap= 200
191
  )
192
-
193
- # Text splitting
194
  chunks = text_splitter.split_documents(documents=documents)
195
 
196
 
197
 
198
- # just FYI, does not impact anything
199
 
200
  def tiktoken_tokens(documents,model="gpt-3.5-turbo"):
201
  """Use tiktoken (tokeniser for OpenAI models) to return a list of token lengths per document."""
@@ -204,8 +192,6 @@ def tiktoken_tokens(documents,model="gpt-3.5-turbo"):
204
  tokens_length = [len(encoding.encode(documents[i].page_content)) for i in range(len(documents))]
205
 
206
  return tokens_length
207
-
208
-
209
  chunks_length = tiktoken_tokens(chunks,model="gpt-3.5-turbo")
210
 
211
  print(f"Number of tokens - Average : {int(np.mean(chunks_length))}")
@@ -214,6 +200,7 @@ print(f"Number of tokens - 50% percentile : {int(np.quantile(chunks_length,0.5))
214
  print(f"Number of tokens - 75% percentile : {int(np.quantile(chunks_length,0.75))}")
215
 
216
 
 
217
  # For embeddings I am just using the free HF model so others are turned off
218
 
219
  def select_embeddings_model(LLM_service="HuggingFace"):
@@ -248,8 +235,7 @@ embeddings_HuggingFace = select_embeddings_model(LLM_service="HuggingFace")
248
 
249
 
250
 
251
- # Creates the DB that will hold the embedding vectors
252
-
253
  def create_vectorstore(embeddings,documents,vectorstore_name):
254
  """Create a Chroma vector database."""
255
  persist_directory = (current_dir + "/" + vectorstore_name)
@@ -263,11 +249,9 @@ def create_vectorstore(embeddings,documents,vectorstore_name):
263
  return vector_store
264
 
265
 
266
-
267
  create_vectorstores = True # change to True to create vectorstores
268
 
269
  # Then we tell it to store the embeddings in the VectorStore (stickiong with HF for this)
270
-
271
  if create_vectorstores:
272
  """
273
  vector_store_OpenAI,_ = create_vectorstore(
@@ -312,8 +296,6 @@ vector_store_google = Chroma(
312
  print("vector_store_google:",vector_store_google._collection.count(),"chunks.")
313
  """
314
 
315
-
316
-
317
  vector_store_HF = Chroma(
318
  persist_directory = current_dir + "/Vit_All_HF_Embeddings",
319
  embedding_function=embeddings_HuggingFace)
@@ -434,121 +416,9 @@ def CohereRerank_retriever(
434
 
435
 
436
 
437
- # Don't have to use this, but it brings all the above pieces together into a single function
438
-
439
- '''
440
- def retrieval_blocks(
441
- create_vectorstore=True,# if True a Chroma vectorstore is created, else the Chroma vectorstore will be loaded
442
- LLM_service="HuggingFace",
443
- vectorstore_name="Vit_All_HF_Embeddings",
444
- chunk_size = 1600, chunk_overlap=200, # parameters of the RecursiveCharacterTextSplitter
445
- retriever_type="Vectorstore_backed_retriever",
446
- base_retriever_search_type="similarity", base_retriever_k=10, base_retriever_score_threshold=None,
447
- compression_retriever_k=16,
448
- cohere_api_key="***", cohere_model="rerank-multilingual-v2.0", cohere_top_n=8,
449
- ):
450
- print("retrieval blocks started")
451
- """
452
- Rertieval includes: document loaders, text splitter, vectorstore and retriever.
453
-
454
- Parameters:
455
- create_vectorstore (boolean): If True, a new Chroma vectorstore will be created. Otherwise, an existing vectorstore will be loaded.
456
- LLM_service: OpenAI, Google or HuggingFace.
457
- vectorstore_name (str): the name of the vectorstore.
458
- chunk_size and chunk_overlap: parameters of the RecursiveCharacterTextSplitter, default = (1600,200).
459
-
460
- retriever_type (str): in [Vectorstore_backed_retriever,Contextual_compression,Cohere_reranker]
461
-
462
- base_retriever_search_type: search_type in ["similarity", "mmr", "similarity_score_threshold"], default = similarity.
463
- base_retriever_k: The most similar vectors to retrieve (default k = 10).
464
- base_retriever_score_threshold: score_threshold used by the base retriever, default = None.
465
-
466
- compression_retriever_k: top k documents returned by the compression retriever, default=16
467
-
468
- cohere_api_key: Cohere API key
469
- cohere_model (str): The Cohere model can be either 'rerank-english-v2.0' or 'rerank-multilingual-v2.0', with the latter being the default.
470
- cohere_top_n: top n results returned by Cohere rerank, default = 8.
471
-
472
- Output:
473
- retriever.
474
- """
475
- try:
476
- # Create new Vectorstore (Chroma index)
477
- if create_vectorstore:
478
- # 1. load documents
479
- documents = langchain_document_loader(current_dir)
480
-
481
- # 2. Text Splitter: split documents to chunks
482
- text_splitter = RecursiveCharacterTextSplitter(
483
- separators = ["\n\n", "\n", " ", ""],
484
- chunk_size = chunk_size,
485
- chunk_overlap= chunk_overlap
486
- )
487
- chunks = text_splitter.split_documents(documents=documents)
488
-
489
- # 3. Embeddings
490
- embeddings = select_embeddings_model(LLM_service=LLM_service)
491
-
492
- # 4. Vectorsore: create Chroma index
493
- vector_store = create_vectorstore(
494
- embeddings=embeddings,
495
- documents = chunks,
496
- vectorstore_name=vectorstore_name,
497
- )
498
-
499
- # 5. Load a Vectorstore (Chroma index)
500
- else:
501
- embeddings = select_embeddings_model(LLM_service=LLM_service)
502
- vector_store = Chroma(
503
- persist_directory = current_dir + "/" + vectorstore_name,
504
- embedding_function=embeddings
505
- )
506
-
507
-
508
- # 6. base retriever: Vector store-backed retriever
509
- base_retriever = Vectorstore_backed_retriever(
510
- vector_store,
511
- search_type=base_retriever_search_type,
512
- k=base_retriever_k,
513
- score_threshold=base_retriever_score_threshold
514
- )
515
- retriever = None
516
- if retriever_type=="Vectorstore_backed_retriever":
517
- retriever = base_retriever
518
-
519
- # 7. Contextual Compression Retriever
520
- if retriever_type=="Contextual_compression":
521
- retriever = create_compression_retriever(
522
- embeddings=embeddings,
523
- base_retriever=base_retriever,
524
- k=compression_retriever_k,
525
- )
526
-
527
- # 8. CohereRerank retriever
528
- if retriever_type=="Cohere_reranker":
529
- retriever = CohereRerank_retriever(
530
- base_retriever=base_retriever,
531
- cohere_api_key=cohere_api_key,
532
- cohere_model=cohere_model,
533
- top_n=cohere_top_n
534
- )
535
-
536
- print(f"\n{retriever_type} is created successfully!")
537
- print(f"Relevant documents will be retrieved from vectorstore ({vectorstore_name}) which uses {LLM_service} embeddings \
538
- and has {vector_store._collection.count()} chunks.")
539
- print("retrieval blocks done")
540
- return retriever
541
- except Exception as e:
542
- print(e)
543
- '''
544
-
545
-
546
-
547
-
548
 
549
  # Can use any of these LLMs for responses, for now I am Gemini-Pro for the bot (this is for responses now, not embeddings)
550
 
551
-
552
  def instantiate_LLM(LLM_provider,api_key,temperature=0.8,top_p=0.95,model_name=None):
553
  """Instantiate LLM in Langchain.
554
  Parameters:
@@ -631,7 +501,6 @@ memory = create_memory(model_name='gemini-pro',memory_max_token=None)
631
  #memory = create_memory(model_name='gpt-3.5-turbo',memory_max_token=20)
632
 
633
  # save history as context for the conversation
634
-
635
  memory.save_context(
636
  inputs={"question":"sample"},
637
  outputs={"answer":"sample"}
@@ -679,12 +548,11 @@ def answer_template(language="english"):
679
  """
680
  return template
681
 
682
-
683
-
684
  answer_prompt = ChatPromptTemplate.from_template(answer_template())
685
 
686
 
687
 
 
688
 
689
  chain = ConversationalRetrievalChain.from_llm(
690
  condense_question_prompt=PromptTemplate(
@@ -707,78 +575,6 @@ chain = ConversationalRetrievalChain.from_llm(
707
 
708
 
709
 
710
-
711
- # As above, this is not in use but it brings all the above elements together into a single function
712
-
713
- '''
714
- def create_ConversationalRetrievalChain(
715
- llm,condense_question_llm,
716
- retriever,
717
- chain_type= 'stuff',
718
- language="english",
719
- model_name='gemini-pro'
720
- #model_name='gpt-3.5-turbo'
721
- ):
722
- """Create a ConversationalRetrievalChain.
723
- First, it passes the follow-up question along with the chat history to an LLM which rephrases
724
- the question and generates a standalone query.
725
- This query is then sent to the retriever, which fetches relevant documents (context)
726
- and passes them along with the standalone question and chat history to an LLM to answer.
727
- """
728
-
729
- # 1. Define the standalone_question prompt.
730
- # Pass the follow-up question along with the chat history to the `condense_question_llm`
731
- # which rephrases the question and generates a standalone question.
732
-
733
- standalone_question_prompt = PromptTemplate(
734
- input_variables=['chat_history', 'question'],
735
- template="""Given the following conversation and a follow up question,
736
- rephrase the follow up question to be a standalone question, in its original language.\n\n
737
- Chat History:\n{chat_history}\n
738
- Follow Up Input: {question}\n
739
- Standalone question: {question}""")
740
-
741
- # 2. Define the answer_prompt
742
- # Pass the standalone question + the chat history + the context (retrieved documents) to the `LLM` wihch will answer
743
-
744
- answer_prompt = ChatPromptTemplate.from_template(answer_template(language='English'))
745
-
746
- # 3. Add ConversationSummaryBufferMemory for gpt-3.5, and ConversationBufferMemory for the other models
747
-
748
- memory = create_memory(model_name)
749
-
750
- # 4. Create the ConversationalRetrievalChain
751
-
752
- chain = ConversationalRetrievalChain.from_llm(
753
- condense_question_prompt=standalone_question_prompt,
754
- combine_docs_chain_kwargs={'prompt': answer_prompt},
755
- #condense_question_llm=condense_question_llm,
756
- condense_question_llm=instantiate_LLM(
757
- LLM_provider="Google",api_key=google_api_key,temperature=0.1,
758
- model_name="gemini-pro"),
759
-
760
- memory=memory,
761
- retriever = compression_retriever_HF,
762
- #retriever = base_retriever_HF, #changed this
763
- #retriever = retriever,
764
- #llm=llm, #changed this
765
- llm=instantiate_LLM(
766
- LLM_provider="Google",api_key=google_api_key,temperature=0.5,
767
- model_name="gemini-pro"),
768
- chain_type= "stuff",
769
- #chain_type= chain_type,
770
- verbose= True,
771
- return_source_documents=True
772
- )
773
-
774
- print("Conversational retriever chain created successfully!")
775
-
776
- return chain,memory
777
-
778
- '''
779
-
780
-
781
-
782
  # This below is for the interface
783
 
784
  def submit_message(prompt, prompt_template, temperature, max_tokens, context_length, state):
@@ -786,7 +582,7 @@ def submit_message(prompt, prompt_template, temperature, max_tokens, context_len
786
 
787
  history = state['messages']
788
 
789
-
790
  #global prompt_template_name
791
  #prompt_template_name = prompt_template
792
  #print(prompt_template) # prints who is responding if I move to multiple experts
@@ -795,19 +591,13 @@ def submit_message(prompt, prompt_template, temperature, max_tokens, context_len
795
 
796
 
797
  completion = chain.invoke({"question":prompt})
798
-
799
-
800
-
801
 
802
  chain.memory.load_memory_variables({})
803
 
804
-
805
  get_empty_state()
806
 
807
-
808
  state['content'] = completion
809
 
810
-
811
  #state.append(completion.copy())
812
 
813
  completion = { "content": completion }
@@ -852,9 +642,6 @@ def clear_conversation():
852
 
853
 
854
 
855
-
856
-
857
-
858
  css = """
859
  #col-container {max-width: 80%; margin-left: auto; margin-right: auto;}
860
  #chatbox {min-height: 400px;}
 
74
  from langchain.schema import Document
75
 
76
 
77
+ # Cohere (not currently in use)
78
  from langchain.retrievers.document_compressors import CohereRerank
79
  from langchain_community.llms import Cohere
80
 
81
+ # Get API keys
82
  openai_api_key = os.environ['openai_key']
83
  google_api_key = os.environ['gemini_key']
84
  HF_key = os.environ['HF_token']
 
87
  current_dir = os.getcwd()
88
 
89
 
90
+ # Not currently in use
 
91
  prompt_templates = {"All Needs Experts": "Respond as if you are combination of all needs assessment experts."}
92
  actor_description = {"All Needs Experts": "<div style='float: left;margin: 0px 5px 0px 5px;'><img src='https://na.weshareresearch.com/wp-content/uploads/2023/04/experts2.jpg' alt='needs expert image' style='width:70px;align:top;'></div>A combination of all needs assessment experts."}
93
 
94
 
95
+ # Initiates the UI features
96
 
97
  def get_empty_state():
98
  return { "messages": []}
 
130
 
131
 
132
 
 
 
 
 
 
 
 
 
 
 
 
133
  # set to load only PDF, but could change to set to specific directory, so that other files don't get embeddings
134
 
135
  def langchain_document_loader():
 
169
  documents.extend(doc_loader.load())
170
  """
171
  return documents
172
+ langchain_document_loader()
173
 
174
 
175
+ # Text splitting of the uploaded documents, the chunks will become vectors
 
176
 
177
  text_splitter = RecursiveCharacterTextSplitter(
178
  separators = ["\n\n", "\n", " ", ""],
179
  chunk_size = 1500,
180
  chunk_overlap= 200
181
  )
 
 
182
  chunks = text_splitter.split_documents(documents=documents)
183
 
184
 
185
 
186
+ # just FYI, does not impact anything it is just for information when re-starting the app
187
 
188
  def tiktoken_tokens(documents,model="gpt-3.5-turbo"):
189
  """Use tiktoken (tokeniser for OpenAI models) to return a list of token lengths per document."""
 
192
  tokens_length = [len(encoding.encode(documents[i].page_content)) for i in range(len(documents))]
193
 
194
  return tokens_length
 
 
195
  chunks_length = tiktoken_tokens(chunks,model="gpt-3.5-turbo")
196
 
197
  print(f"Number of tokens - Average : {int(np.mean(chunks_length))}")
 
200
  print(f"Number of tokens - 75% percentile : {int(np.quantile(chunks_length,0.75))}")
201
 
202
 
203
+
204
  # For embeddings I am just using the free HF model so others are turned off
205
 
206
  def select_embeddings_model(LLM_service="HuggingFace"):
 
235
 
236
 
237
 
238
+ # Creates the Database that will hold the embedding vectors
 
239
  def create_vectorstore(embeddings,documents,vectorstore_name):
240
  """Create a Chroma vector database."""
241
  persist_directory = (current_dir + "/" + vectorstore_name)
 
249
  return vector_store
250
 
251
 
 
252
  create_vectorstores = True # change to True to create vectorstores
253
 
254
  # Then we tell it to store the embeddings in the VectorStore (stickiong with HF for this)
 
255
  if create_vectorstores:
256
  """
257
  vector_store_OpenAI,_ = create_vectorstore(
 
296
  print("vector_store_google:",vector_store_google._collection.count(),"chunks.")
297
  """
298
 
 
 
299
  vector_store_HF = Chroma(
300
  persist_directory = current_dir + "/Vit_All_HF_Embeddings",
301
  embedding_function=embeddings_HuggingFace)
 
416
 
417
 
418
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
419
 
420
  # Can use any of these LLMs for responses, for now I am Gemini-Pro for the bot (this is for responses now, not embeddings)
421
 
 
422
  def instantiate_LLM(LLM_provider,api_key,temperature=0.8,top_p=0.95,model_name=None):
423
  """Instantiate LLM in Langchain.
424
  Parameters:
 
501
  #memory = create_memory(model_name='gpt-3.5-turbo',memory_max_token=20)
502
 
503
  # save history as context for the conversation
 
504
  memory.save_context(
505
  inputs={"question":"sample"},
506
  outputs={"answer":"sample"}
 
548
  """
549
  return template
550
 
 
 
551
  answer_prompt = ChatPromptTemplate.from_template(answer_template())
552
 
553
 
554
 
555
+ # This begins the whole process and gives the parameters
556
 
557
  chain = ConversationalRetrievalChain.from_llm(
558
  condense_question_prompt=PromptTemplate(
 
575
 
576
 
577
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
578
  # This below is for the interface
579
 
580
  def submit_message(prompt, prompt_template, temperature, max_tokens, context_length, state):
 
582
 
583
  history = state['messages']
584
 
585
+ # this could be used later if I want to let users set it to different experts and use different documents based on preferred expert
586
  #global prompt_template_name
587
  #prompt_template_name = prompt_template
588
  #print(prompt_template) # prints who is responding if I move to multiple experts
 
591
 
592
 
593
  completion = chain.invoke({"question":prompt})
 
 
 
594
 
595
  chain.memory.load_memory_variables({})
596
 
 
597
  get_empty_state()
598
 
 
599
  state['content'] = completion
600
 
 
601
  #state.append(completion.copy())
602
 
603
  completion = { "content": completion }
 
642
 
643
 
644
 
 
 
 
645
  css = """
646
  #col-container {max-width: 80%; margin-left: auto; margin-right: auto;}
647
  #chatbox {min-height: 400px;}