ryanrwatkins commited on
Commit
0ca4304
1 Parent(s): 93d7318

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +34 -101
app.py CHANGED
@@ -140,7 +140,7 @@ def on_prompt_template_change_description(prompt_template):
140
 
141
 
142
 
143
-
144
 
145
  def langchain_document_loader():
146
  """
@@ -180,6 +180,8 @@ def langchain_document_loader():
180
  """
181
  return documents
182
 
 
 
183
  langchain_document_loader()
184
 
185
  text_splitter = RecursiveCharacterTextSplitter(
@@ -193,6 +195,7 @@ chunks = text_splitter.split_documents(documents=documents)
193
 
194
 
195
 
 
196
 
197
  def tiktoken_tokens(documents,model="gpt-3.5-turbo"):
198
  """Use tiktoken (tokeniser for OpenAI models) to return a list of token lengths per document."""
@@ -211,6 +214,7 @@ print(f"Number of tokens - 50% percentile : {int(np.quantile(chunks_length,0.5))
211
  print(f"Number of tokens - 75% percentile : {int(np.quantile(chunks_length,0.75))}")
212
 
213
 
 
214
 
215
  def select_embeddings_model(LLM_service="HuggingFace"):
216
  """Connect to the embeddings API endpoint by specifying
@@ -244,6 +248,7 @@ embeddings_HuggingFace = select_embeddings_model(LLM_service="HuggingFace")
244
 
245
 
246
 
 
247
 
248
  def create_vectorstore(embeddings,documents,vectorstore_name):
249
  """Create a Chroma vector database."""
@@ -261,6 +266,8 @@ def create_vectorstore(embeddings,documents,vectorstore_name):
261
 
262
  create_vectorstores = True # change to True to create vectorstores
263
 
 
 
264
  if create_vectorstores:
265
  """
266
  vector_store_OpenAI,_ = create_vectorstore(
@@ -288,6 +295,9 @@ if create_vectorstores:
288
 
289
  print("")
290
 
 
 
 
291
  """
292
  vector_store_OpenAI = Chroma(
293
  persist_directory = LOCAL_VECTOR_STORE_DIR.as_posix() + "/Vit_All_OpenAI_Embeddings",
@@ -302,13 +312,15 @@ vector_store_google = Chroma(
302
  print("vector_store_google:",vector_store_google._collection.count(),"chunks.")
303
  """
304
 
 
 
305
  vector_store_HF = Chroma(
306
  persist_directory = current_dir + "/Vit_All_HF_Embeddings",
307
  embedding_function=embeddings_HuggingFace)
308
  print("vector_store_HF:",vector_store_HF._collection.count(),"chunks.")
309
- # Create a new file
310
 
311
 
 
312
 
313
  def Vectorstore_backed_retriever(
314
  vectorstore,search_type="similarity",k=10,score_threshold=None
@@ -341,6 +353,8 @@ base_retriever_HF = Vectorstore_backed_retriever(vector_store_HF,"similarity",k=
341
 
342
 
343
 
 
 
344
  def create_compression_retriever(embeddings, base_retriever, chunk_size=500, k=16, similarity_threshold=None):
345
  """Build a ContextualCompressionRetriever.
346
  We wrap the the base_retriever (a vectorstore-backed retriever) into a ContextualCompressionRetriever.
@@ -389,6 +403,8 @@ compression_retriever_HF = create_compression_retriever(
389
  k=16)
390
 
391
 
 
 
392
  '''
393
  def CohereRerank_retriever(
394
  base_retriever,
@@ -417,6 +433,9 @@ def CohereRerank_retriever(
417
  '''
418
 
419
 
 
 
 
420
  '''
421
  def retrieval_blocks(
422
  create_vectorstore=True,# if True a Chroma vectorstore is created, else the Chroma vectorstore will be loaded
@@ -527,7 +546,7 @@ and has {vector_store._collection.count()} chunks.")
527
 
528
 
529
 
530
-
531
 
532
 
533
  def instantiate_LLM(LLM_provider,api_key,temperature=0.7,top_p=0.95,model_name=None):
@@ -577,19 +596,9 @@ def instantiate_LLM(LLM_provider,api_key,temperature=0.7,top_p=0.95,model_name=N
577
  )
578
  return llm
579
 
580
- """
581
- def get_environment_variable(key):
582
- if key in os.environ:
583
- value = os.environ.get(key)
584
- print(f"\n[INFO]: {key} retrieved successfully.")
585
- else :
586
- print(f"\n[ERROR]: {key} is not found in your environment variables.")
587
- value = getpass(f"Insert your {key}")
588
- return value
589
- """
590
-
591
 
592
 
 
593
 
594
  def create_memory(model_name='gemini-pro',memory_max_token=None):
595
  #def create_memory(model_name='gpt-3.5-turbo',memory_max_token=None):
@@ -621,21 +630,18 @@ def create_memory(model_name='gemini-pro',memory_max_token=None):
621
  memory = create_memory(model_name='gemini-pro',memory_max_token=None)
622
  #memory = create_memory(model_name='gpt-3.5-turbo',memory_max_token=20)
623
 
624
- # save context
 
625
  memory.save_context(
626
- inputs={"question":"what does DTC stand for?"},
627
- outputs={"answer":"""Diffuse to Choose (DTC) is a novel diffusion inpainting approach designed for the Vit-All application,
628
- which allows users to virtually place any e-commerce item in any setting, ensuring detailed, semantically coherent blending with realistic
629
- lighting and shadows. It effectively incorporates fine-grained cues from the reference image into the main U-Net decoder
630
- using a secondary U-Net encoder.
631
- DTC can handle a variety of e-commerce products and can generate images using in-the-wild images & references.
632
- It is superior to existing zero-shot personalization methods, especially in preserving the fine-grained details of items."""}
633
  )
634
 
635
-
636
  memory.load_memory_variables({})
637
 
638
 
 
639
 
640
  standalone_question_template = """Given the following conversation and a follow up question,
641
  rephrase the follow up question to be a standalone question, in the English language.\n\n
@@ -671,48 +677,6 @@ def answer_template(language="english"):
671
  answer_prompt = ChatPromptTemplate.from_template(answer_template())
672
 
673
 
674
- """
675
- # invoke the ChatPromptTemplate
676
- answer_prompt.invoke(
677
- {"question":"plaese ",
678
- "context":[Document(page_content="include...")], # the context is a list of retrieved documents.
679
- "chat_history":memory.chat_memory}
680
- )
681
-
682
- """
683
-
684
-
685
-
686
-
687
- """
688
- # Instantiate the retriever and the ConversationalRetrievalChain :
689
-
690
- retriever_Google = retrieval_blocks(
691
- create_vectorstore=False,
692
- LLM_service="Google",
693
- vectorstore_name="Vit_All_Google_Embeddings",
694
- retriever_type="Cohere_reranker",
695
- base_retriever_search_type="similarity", base_retriever_k=12,
696
- compression_retriever_k=16,
697
- cohere_api_key=cohere_api_key,cohere_top_n=10,
698
- )
699
-
700
-
701
- chain_gemini,memory_gemini = custom_ConversationalRetrievalChain(
702
- llm = instantiate_LLM(
703
- LLM_provider="Google",api_key=google_api_key,temperature=0.5,model_name="gemini-pro"
704
- ),
705
- condense_question_llm = instantiate_LLM(
706
- LLM_provider="Google",api_key=google_api_key,temperature=0.1,model_name="gemini-pro"),
707
- retriever=retriever_Google,
708
- language="english",
709
- llm_provider="Google",
710
- model_name="gemini-pro"
711
- )
712
-
713
-
714
- memory_gemini.clear()
715
- """
716
 
717
 
718
  chain = ConversationalRetrievalChain.from_llm(
@@ -733,37 +697,10 @@ chain = ConversationalRetrievalChain.from_llm(
733
  )
734
 
735
 
736
- """
737
- # let's invoke the chain
738
- response = chain.invoke({"question":"what does Google stand for?"})
739
- print(response['answer'])
740
-
741
 
742
- chain.memory.load_memory_variables({})
743
 
744
- follow_up_question = "plaese give more details about it, including its use cases and implementation."
745
 
746
- chain.invoke({"question":follow_up_question})['answer']
747
- """
748
-
749
-
750
-
751
- """
752
- # let's invoke the chain
753
- response = chain.invoke({"question":"what does Google stand for?"})
754
- print(response['answer'])
755
-
756
-
757
- chain.memory.load_memory_variables({})
758
-
759
- follow_up_question = "plaese give more details about it, including its use cases and implementation."
760
-
761
- chain.invoke({"question":follow_up_question})['answer'])
762
- """
763
-
764
-
765
-
766
- # It is not clear to me if this is running. If you take it out, it still provides answers but also using different promptTemplate
767
 
768
  def create_ConversationalRetrievalChain(
769
  llm,condense_question_llm,
@@ -838,22 +775,18 @@ def submit_message(prompt, prompt_template, temperature, max_tokens, context_len
838
 
839
  history = state['messages']
840
 
841
- #if not prompt:
842
- # return gr.update(value=''), [(history[i]['content'], history[i+1]['content']) for i in range(0, len(history)-1, 2)], state
843
-
844
- #prompt_template = prompt_templates[prompt_template]
845
  global prompt_template_name
846
  prompt_template_name = prompt_template
847
- print(prompt_template)
848
  print(prompt_templates[prompt_template])
849
 
850
 
851
 
852
  completion = chain.invoke({"question":prompt})
853
- #print("completion")
854
  #print(completion)
855
- #chain = load_qa_chain(ChatOpenAI(temperature=temperature, max_tokens=max_tokens, model_name="gpt-3.5-turbo"), chain_type="stuff")
856
- #completion = chain.run(input_documents=docs, question=query)
857
 
858
 
859
  chain.memory.load_memory_variables({})
 
140
 
141
 
142
 
143
+ # set to load only PDF, but could change to set to specific directory, so that other files don't get embeddings
144
 
145
  def langchain_document_loader():
146
  """
 
180
  """
181
  return documents
182
 
183
+
184
+
185
  langchain_document_loader()
186
 
187
  text_splitter = RecursiveCharacterTextSplitter(
 
195
 
196
 
197
 
198
+ # just FYI, does not impact anything
199
 
200
  def tiktoken_tokens(documents,model="gpt-3.5-turbo"):
201
  """Use tiktoken (tokeniser for OpenAI models) to return a list of token lengths per document."""
 
214
  print(f"Number of tokens - 75% percentile : {int(np.quantile(chunks_length,0.75))}")
215
 
216
 
217
+ # For embeddings I am just using the free HF model so others are turned off
218
 
219
  def select_embeddings_model(LLM_service="HuggingFace"):
220
  """Connect to the embeddings API endpoint by specifying
 
248
 
249
 
250
 
251
+ # Creates the DB that will hold the embedding vectors
252
 
253
  def create_vectorstore(embeddings,documents,vectorstore_name):
254
  """Create a Chroma vector database."""
 
266
 
267
  create_vectorstores = True # change to True to create vectorstores
268
 
269
+ # Then we tell it to store the embeddings in the VectorStore (stickiong with HF for this)
270
+
271
  if create_vectorstores:
272
  """
273
  vector_store_OpenAI,_ = create_vectorstore(
 
295
 
296
  print("")
297
 
298
+
299
+ # Now we tell it to keep the chromadb persistent so that it can be referenced at any time
300
+
301
  """
302
  vector_store_OpenAI = Chroma(
303
  persist_directory = LOCAL_VECTOR_STORE_DIR.as_posix() + "/Vit_All_OpenAI_Embeddings",
 
312
  print("vector_store_google:",vector_store_google._collection.count(),"chunks.")
313
  """
314
 
315
+
316
+
317
  vector_store_HF = Chroma(
318
  persist_directory = current_dir + "/Vit_All_HF_Embeddings",
319
  embedding_function=embeddings_HuggingFace)
320
  print("vector_store_HF:",vector_store_HF._collection.count(),"chunks.")
 
321
 
322
 
323
+ # Now we create the code to retrieve embeddings from the vectorstore (again, sticking with HF)
324
 
325
  def Vectorstore_backed_retriever(
326
  vectorstore,search_type="similarity",k=10,score_threshold=None
 
353
 
354
 
355
 
356
+ # This next code takes the retrieved embeddings, gets rid of redundant ones, takes out non-useful information, and provides back a shorter embedding for use
357
+
358
  def create_compression_retriever(embeddings, base_retriever, chunk_size=500, k=16, similarity_threshold=None):
359
  """Build a ContextualCompressionRetriever.
360
  We wrap the the base_retriever (a vectorstore-backed retriever) into a ContextualCompressionRetriever.
 
403
  k=16)
404
 
405
 
406
+ # Can use the following to rank the returned embeddings in order of relevance but all are used anyway so I am skipping for now (can test later)
407
+
408
  '''
409
  def CohereRerank_retriever(
410
  base_retriever,
 
433
  '''
434
 
435
 
436
+
437
+ # Don't have to use this, but is brings all the above pieces together in a single series (probably not worth implementing since I have the pieces already)
438
+
439
  '''
440
  def retrieval_blocks(
441
  create_vectorstore=True,# if True a Chroma vectorstore is created, else the Chroma vectorstore will be loaded
 
546
 
547
 
548
 
549
+ # Can use any of these LLMs for responses, for now I am Gemini-Pro for the bot (this is for responses now, not embeddings)
550
 
551
 
552
  def instantiate_LLM(LLM_provider,api_key,temperature=0.7,top_p=0.95,model_name=None):
 
596
  )
597
  return llm
598
 
 
 
 
 
 
 
 
 
 
 
 
599
 
600
 
601
+ # This creates history (memory) of prior questions. I am using Gemini for this but I left the code if I decide to go to GPT later on.
602
 
603
  def create_memory(model_name='gemini-pro',memory_max_token=None):
604
  #def create_memory(model_name='gpt-3.5-turbo',memory_max_token=None):
 
630
  memory = create_memory(model_name='gemini-pro',memory_max_token=None)
631
  #memory = create_memory(model_name='gpt-3.5-turbo',memory_max_token=20)
632
 
633
+ # save history as context for the conversation
634
+
635
  memory.save_context(
636
+ inputs={"question":"."},
637
+ outputs={"answer":"""."""}
 
 
 
 
 
638
  )
639
 
640
+ # loads the template above
641
  memory.load_memory_variables({})
642
 
643
 
644
+ # Create the prompt template for the conversation
645
 
646
  standalone_question_template = """Given the following conversation and a follow up question,
647
  rephrase the follow up question to be a standalone question, in the English language.\n\n
 
677
  answer_prompt = ChatPromptTemplate.from_template(answer_template())
678
 
679
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
680
 
681
 
682
  chain = ConversationalRetrievalChain.from_llm(
 
697
  )
698
 
699
 
 
 
 
 
 
700
 
 
701
 
 
702
 
703
+ # It
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
704
 
705
  def create_ConversationalRetrievalChain(
706
  llm,condense_question_llm,
 
775
 
776
  history = state['messages']
777
 
778
+
 
 
 
779
  global prompt_template_name
780
  prompt_template_name = prompt_template
781
+ print(prompt_template) # prints who is responding if I move to multiple experts
782
  print(prompt_templates[prompt_template])
783
 
784
 
785
 
786
  completion = chain.invoke({"question":prompt})
787
+
788
  #print(completion)
789
+
 
790
 
791
 
792
  chain.memory.load_memory_variables({})