Spaces:

ryanrwatkins
/

needs

Running

App Files Files Community

ryanrwatkins commited on Apr 9

Commit

f527f6a

•

1 Parent(s): 4ef51a0

Update app.py

Browse files

Files changed (1) hide show

app.py +11 -224

app.py CHANGED Viewed

@@ -74,10 +74,11 @@ from langchain.memory import ConversationSummaryBufferMemory,ConversationBufferM
 from langchain.schema import Document
-# Cohere
 from langchain.retrievers.document_compressors import CohereRerank
 from langchain_community.llms import Cohere
 openai_api_key = os.environ['openai_key']
 google_api_key = os.environ['gemini_key']
 HF_key = os.environ['HF_token']
@@ -86,12 +87,12 @@ cohere_api_key = os.environ['cohere_api']
 current_dir = os.getcwd()
 prompt_templates = {"All Needs Experts": "Respond as if you are combination of all needs assessment experts."}
 actor_description = {"All Needs Experts": "<div style='float: left;margin: 0px 5px 0px 5px;'><img src='https://na.weshareresearch.com/wp-content/uploads/2023/04/experts2.jpg' alt='needs expert image' style='width:70px;align:top;'></div>A combination of all needs assessment experts."}
 def get_empty_state():
     return { "messages": []}
@@ -129,17 +130,6 @@ def on_prompt_template_change_description(prompt_template):
 # set to load only PDF, but could change to set to specific directory, so that other files don't get embeddings
 def langchain_document_loader():
@@ -179,23 +169,21 @@ def langchain_document_loader():
     documents.extend(doc_loader.load())
     """
     return documents
-langchain_document_loader()
 text_splitter = RecursiveCharacterTextSplitter(
     separators = ["\n\n", "\n", " ", ""],
     chunk_size = 1500,
     chunk_overlap= 200
 )
-# Text splitting
 chunks = text_splitter.split_documents(documents=documents)
-# just FYI, does not impact anything
 def tiktoken_tokens(documents,model="gpt-3.5-turbo"):
     """Use tiktoken (tokeniser for OpenAI models) to return a list of token lengths per document."""
@@ -204,8 +192,6 @@ def tiktoken_tokens(documents,model="gpt-3.5-turbo"):
     tokens_length = [len(encoding.encode(documents[i].page_content)) for i in range(len(documents))]
     return tokens_length
 chunks_length = tiktoken_tokens(chunks,model="gpt-3.5-turbo")
 print(f"Number of tokens - Average : {int(np.mean(chunks_length))}")
@@ -214,6 +200,7 @@ print(f"Number of tokens - 50% percentile : {int(np.quantile(chunks_length,0.5))
 print(f"Number of tokens - 75% percentile : {int(np.quantile(chunks_length,0.75))}")
 # For embeddings I am just using the free HF model so others are turned off
 def select_embeddings_model(LLM_service="HuggingFace"):
@@ -248,8 +235,7 @@ embeddings_HuggingFace = select_embeddings_model(LLM_service="HuggingFace")
-# Creates the DB that will hold the embedding vectors
 def create_vectorstore(embeddings,documents,vectorstore_name):
     """Create a Chroma vector database."""
     persist_directory = (current_dir + "/" + vectorstore_name)
@@ -263,11 +249,9 @@ def create_vectorstore(embeddings,documents,vectorstore_name):
     return vector_store
 create_vectorstores = True # change to True to create vectorstores
 # Then we tell it to store the embeddings in the VectorStore (stickiong with HF for this)
 if create_vectorstores:
     """
     vector_store_OpenAI,_ = create_vectorstore(
@@ -312,8 +296,6 @@ vector_store_google = Chroma(
 print("vector_store_google:",vector_store_google._collection.count(),"chunks.")
 """
 vector_store_HF = Chroma(
     persist_directory = current_dir + "/Vit_All_HF_Embeddings",
     embedding_function=embeddings_HuggingFace)
@@ -434,121 +416,9 @@ def CohereRerank_retriever(
-#  Don't have to use this, but it brings all the above pieces together into a single function
-'''
-def retrieval_blocks(
-    create_vectorstore=True,# if True a Chroma vectorstore is created, else the Chroma vectorstore will be loaded
-    LLM_service="HuggingFace",
-    vectorstore_name="Vit_All_HF_Embeddings",
-    chunk_size = 1600, chunk_overlap=200, # parameters of the RecursiveCharacterTextSplitter
-    retriever_type="Vectorstore_backed_retriever",
-    base_retriever_search_type="similarity", base_retriever_k=10, base_retriever_score_threshold=None,
-    compression_retriever_k=16,
-    cohere_api_key="***", cohere_model="rerank-multilingual-v2.0", cohere_top_n=8,
-):
-    print("retrieval blocks started")
-    """
-    Rertieval includes: document loaders, text splitter, vectorstore and retriever.
-    Parameters:
-        create_vectorstore (boolean): If True, a new Chroma vectorstore will be created. Otherwise, an existing vectorstore will be loaded.
-        LLM_service: OpenAI, Google or HuggingFace.
-        vectorstore_name (str): the name of the vectorstore.
-        chunk_size and chunk_overlap: parameters of the RecursiveCharacterTextSplitter, default = (1600,200).
-        retriever_type (str): in [Vectorstore_backed_retriever,Contextual_compression,Cohere_reranker]
-        base_retriever_search_type: search_type in ["similarity", "mmr", "similarity_score_threshold"], default = similarity.
-        base_retriever_k: The most similar vectors to retrieve (default k = 10).
-        base_retriever_score_threshold: score_threshold used by the base retriever, default = None.
-        compression_retriever_k: top k documents returned by the compression retriever, default=16
-        cohere_api_key: Cohere API key
-        cohere_model (str): The Cohere model can be either 'rerank-english-v2.0' or 'rerank-multilingual-v2.0', with the latter being the default.
-        cohere_top_n: top n results returned by Cohere rerank, default = 8.
-    Output:
-        retriever.
-    """
-    try:
-        # Create new Vectorstore (Chroma index)
-        if create_vectorstore:
-            # 1. load documents
-            documents = langchain_document_loader(current_dir)
-            # 2. Text Splitter: split documents to chunks
-            text_splitter = RecursiveCharacterTextSplitter(
-                separators = ["\n\n", "\n", " ", ""],
-                chunk_size = chunk_size,
-                chunk_overlap= chunk_overlap
-            )
-            chunks = text_splitter.split_documents(documents=documents)
-            # 3. Embeddings
-            embeddings = select_embeddings_model(LLM_service=LLM_service)
-            # 4. Vectorsore: create Chroma index
-            vector_store = create_vectorstore(
-                embeddings=embeddings,
-                documents = chunks,
-                vectorstore_name=vectorstore_name,
-            )
-        # 5. Load a Vectorstore (Chroma index)
-        else:
-            embeddings = select_embeddings_model(LLM_service=LLM_service)
-            vector_store = Chroma(
-                persist_directory = current_dir + "/" + vectorstore_name,
-                embedding_function=embeddings
-            )
-        # 6. base retriever: Vector store-backed retriever
-        base_retriever = Vectorstore_backed_retriever(
-            vector_store,
-            search_type=base_retriever_search_type,
-            k=base_retriever_k,
-            score_threshold=base_retriever_score_threshold
-        )
-        retriever = None
-        if retriever_type=="Vectorstore_backed_retriever":
-            retriever = base_retriever
-        # 7. Contextual Compression Retriever
-        if retriever_type=="Contextual_compression":
-            retriever = create_compression_retriever(
-                embeddings=embeddings,
-                base_retriever=base_retriever,
-                k=compression_retriever_k,
-            )
-        # 8. CohereRerank retriever
-        if retriever_type=="Cohere_reranker":
-            retriever = CohereRerank_retriever(
-                base_retriever=base_retriever,
-                cohere_api_key=cohere_api_key,
-                cohere_model=cohere_model,
-                top_n=cohere_top_n
-            )
-        print(f"\n{retriever_type} is created successfully!")
-        print(f"Relevant documents will be retrieved from vectorstore ({vectorstore_name}) which uses {LLM_service} embeddings \
-and has {vector_store._collection.count()} chunks.")
-        print("retrieval blocks done")
-        return retriever
-    except Exception as e:
-        print(e)
-'''
 # Can use any of these LLMs for responses, for now I am Gemini-Pro for the bot  (this is for responses now, not embeddings)
 def instantiate_LLM(LLM_provider,api_key,temperature=0.8,top_p=0.95,model_name=None):
     """Instantiate LLM in Langchain.
     Parameters:
@@ -631,7 +501,6 @@ memory = create_memory(model_name='gemini-pro',memory_max_token=None)
 #memory = create_memory(model_name='gpt-3.5-turbo',memory_max_token=20)
 # save history as context for the conversation
 memory.save_context(
     inputs={"question":"sample"},
     outputs={"answer":"sample"}
@@ -679,12 +548,11 @@ def answer_template(language="english"):
     """
     return template
 answer_prompt = ChatPromptTemplate.from_template(answer_template())
 chain = ConversationalRetrievalChain.from_llm(
     condense_question_prompt=PromptTemplate(
@@ -707,78 +575,6 @@ chain = ConversationalRetrievalChain.from_llm(
-# As above, this is not in use but it brings all the above elements together into a single function
-'''
-def create_ConversationalRetrievalChain(
-    llm,condense_question_llm,
-    retriever,
-    chain_type= 'stuff',
-    language="english",
-    model_name='gemini-pro'
-    #model_name='gpt-3.5-turbo'
-):
-    """Create a ConversationalRetrievalChain.
-    First, it passes the follow-up question along with the chat history to an LLM which rephrases
-    the question and generates a standalone query.
-    This query is then sent to the retriever, which fetches relevant documents (context)
-    and passes them along with the standalone question and chat history to an LLM to answer.
-    """
-    # 1. Define the standalone_question prompt.
-    # Pass the follow-up question along with the chat history to the `condense_question_llm`
-    # which rephrases the question and generates a standalone question.
-    standalone_question_prompt = PromptTemplate(
-        input_variables=['chat_history', 'question'],
-        template="""Given the following conversation and a follow up question,
-rephrase the follow up question to be a standalone question, in its original language.\n\n
-Chat History:\n{chat_history}\n
-Follow Up Input: {question}\n
-Standalone question: {question}""")
-    # 2. Define the answer_prompt
-    # Pass the standalone question + the chat history + the context (retrieved documents) to the `LLM` wihch will answer
-    answer_prompt = ChatPromptTemplate.from_template(answer_template(language='English'))
-    # 3. Add ConversationSummaryBufferMemory for gpt-3.5, and ConversationBufferMemory for the other models
-    memory = create_memory(model_name)
-    # 4. Create the ConversationalRetrievalChain
-    chain = ConversationalRetrievalChain.from_llm(
-        condense_question_prompt=standalone_question_prompt,
-        combine_docs_chain_kwargs={'prompt': answer_prompt},
-        #condense_question_llm=condense_question_llm,
-        condense_question_llm=instantiate_LLM(
-            LLM_provider="Google",api_key=google_api_key,temperature=0.1,
-            model_name="gemini-pro"),
-        memory=memory,
-        retriever = compression_retriever_HF,
-        #retriever = base_retriever_HF,  #changed this
-        #retriever = retriever,
-        #llm=llm,    #changed this
-        llm=instantiate_LLM(
-                LLM_provider="Google",api_key=google_api_key,temperature=0.5,
-                model_name="gemini-pro"),
-        chain_type= "stuff",
-        #chain_type= chain_type,
-        verbose= True,
-        return_source_documents=True
-    )
-    print("Conversational retriever chain created successfully!")
-    return chain,memory
-'''
 # This below is for the interface
 def submit_message(prompt, prompt_template, temperature, max_tokens, context_length, state):
@@ -786,7 +582,7 @@ def submit_message(prompt, prompt_template, temperature, max_tokens, context_len
     history = state['messages']
     #global prompt_template_name
     #prompt_template_name = prompt_template
     #print(prompt_template)  # prints who is responding if I move to multiple experts
@@ -795,19 +591,13 @@ def submit_message(prompt, prompt_template, temperature, max_tokens, context_len
     completion = chain.invoke({"question":prompt})
     chain.memory.load_memory_variables({})
     get_empty_state()
     state['content'] = completion
     #state.append(completion.copy())
     completion = { "content": completion }
@@ -852,9 +642,6 @@ def clear_conversation():
 css = """
       #col-container {max-width: 80%; margin-left: auto; margin-right: auto;}
       #chatbox {min-height: 400px;}

 from langchain.schema import Document
+# Cohere (not currently in use)
 from langchain.retrievers.document_compressors import CohereRerank
 from langchain_community.llms import Cohere
+# Get API keys
 openai_api_key = os.environ['openai_key']
 google_api_key = os.environ['gemini_key']
 HF_key = os.environ['HF_token']
 current_dir = os.getcwd()
+# Not currently in use
 prompt_templates = {"All Needs Experts": "Respond as if you are combination of all needs assessment experts."}
 actor_description = {"All Needs Experts": "<div style='float: left;margin: 0px 5px 0px 5px;'><img src='https://na.weshareresearch.com/wp-content/uploads/2023/04/experts2.jpg' alt='needs expert image' style='width:70px;align:top;'></div>A combination of all needs assessment experts."}
+# Initiates the UI features
 def get_empty_state():
     return { "messages": []}
 # set to load only PDF, but could change to set to specific directory, so that other files don't get embeddings
 def langchain_document_loader():
     documents.extend(doc_loader.load())
     """
     return documents
+langchain_document_loader()
+# Text splitting of the uploaded documents, the chunks will become vectors
 text_splitter = RecursiveCharacterTextSplitter(
     separators = ["\n\n", "\n", " ", ""],
     chunk_size = 1500,
     chunk_overlap= 200
 )
 chunks = text_splitter.split_documents(documents=documents)
+# just FYI, does not impact anything it is just for information when re-starting the app
 def tiktoken_tokens(documents,model="gpt-3.5-turbo"):
     """Use tiktoken (tokeniser for OpenAI models) to return a list of token lengths per document."""
     tokens_length = [len(encoding.encode(documents[i].page_content)) for i in range(len(documents))]
     return tokens_length
 chunks_length = tiktoken_tokens(chunks,model="gpt-3.5-turbo")
 print(f"Number of tokens - Average : {int(np.mean(chunks_length))}")
 print(f"Number of tokens - 75% percentile : {int(np.quantile(chunks_length,0.75))}")
 # For embeddings I am just using the free HF model so others are turned off
 def select_embeddings_model(LLM_service="HuggingFace"):
+# Creates the Database that will hold the embedding vectors
 def create_vectorstore(embeddings,documents,vectorstore_name):
     """Create a Chroma vector database."""
     persist_directory = (current_dir + "/" + vectorstore_name)
     return vector_store
 create_vectorstores = True # change to True to create vectorstores
 # Then we tell it to store the embeddings in the VectorStore (stickiong with HF for this)
 if create_vectorstores:
     """
     vector_store_OpenAI,_ = create_vectorstore(
 print("vector_store_google:",vector_store_google._collection.count(),"chunks.")
 """
 vector_store_HF = Chroma(
     persist_directory = current_dir + "/Vit_All_HF_Embeddings",
     embedding_function=embeddings_HuggingFace)
 # Can use any of these LLMs for responses, for now I am Gemini-Pro for the bot  (this is for responses now, not embeddings)
 def instantiate_LLM(LLM_provider,api_key,temperature=0.8,top_p=0.95,model_name=None):
     """Instantiate LLM in Langchain.
     Parameters:
 #memory = create_memory(model_name='gpt-3.5-turbo',memory_max_token=20)
 # save history as context for the conversation
 memory.save_context(
     inputs={"question":"sample"},
     outputs={"answer":"sample"}
     """
     return template
 answer_prompt = ChatPromptTemplate.from_template(answer_template())
+# This begins the whole process and gives the parameters
 chain = ConversationalRetrievalChain.from_llm(
     condense_question_prompt=PromptTemplate(
 # This below is for the interface
 def submit_message(prompt, prompt_template, temperature, max_tokens, context_length, state):
     history = state['messages']
+    # this could be used later if I want to let users set it to different experts and use different documents based on preferred expert
     #global prompt_template_name
     #prompt_template_name = prompt_template
     #print(prompt_template)  # prints who is responding if I move to multiple experts
     completion = chain.invoke({"question":prompt})
     chain.memory.load_memory_variables({})
     get_empty_state()
     state['content'] = completion
     #state.append(completion.copy())
     completion = { "content": completion }
 css = """
       #col-container {max-width: 80%; margin-left: auto; margin-right: auto;}
       #chatbox {min-height: 400px;}