Spaces:
Running
Running
#https://medium.com/thedeephub/rag-chatbot-powered-by-langchain-openai-google-generative-ai-and-hugging-face-apis-6a9b9d7d59db | |
#https://github.com/AlaGrine/RAG_chatabot_with_Langchain/blob/main/RAG_notebook.ipynb | |
from langchain_community.document_loaders import ( | |
PyPDFLoader, | |
TextLoader, | |
DirectoryLoader, | |
CSVLoader, | |
UnstructuredExcelLoader, | |
Docx2txtLoader, | |
) | |
from langchain.text_splitter import RecursiveCharacterTextSplitter, CharacterTextSplitter | |
import tiktoken | |
import gradio as gr | |
import csv | |
import os, tempfile, glob, random | |
from pathlib import Path | |
#from IPython.display import Markdown | |
from PIL import Image | |
from getpass import getpass | |
import numpy as np | |
from itertools import combinations | |
import pypdf | |
import requests | |
# LLM: openai and google_genai | |
import openai | |
from langchain_openai import OpenAI, OpenAIEmbeddings, ChatOpenAI | |
from langchain_google_genai import ChatGoogleGenerativeAI | |
from langchain_google_genai import GoogleGenerativeAIEmbeddings | |
# LLM: HuggingFace | |
from langchain_community.embeddings import HuggingFaceInferenceAPIEmbeddings | |
from langchain_community.llms import HuggingFaceHub | |
# langchain prompts, memory, chains... | |
from langchain.prompts import PromptTemplate, ChatPromptTemplate | |
from langchain.chains import ConversationalRetrievalChain | |
from langchain_community.chat_message_histories import StreamlitChatMessageHistory | |
from operator import itemgetter | |
from langchain_core.runnables import RunnableLambda, RunnableParallel, RunnablePassthrough | |
from langchain.schema import Document, format_document | |
from langchain_core.messages import AIMessage, HumanMessage, get_buffer_string | |
from langchain_google_genai import ( | |
ChatGoogleGenerativeAI, | |
HarmBlockThreshold, | |
HarmCategory, | |
) | |
# OutputParser | |
from langchain_core.output_parsers import StrOutputParser | |
# Chroma: vectorstore | |
from langchain_community.vectorstores import Chroma | |
# Contextual Compression | |
from langchain.retrievers.document_compressors import DocumentCompressorPipeline | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain_community.document_transformers import EmbeddingsRedundantFilter,LongContextReorder | |
from langchain.retrievers.document_compressors import EmbeddingsFilter | |
from langchain.retrievers import ContextualCompressionRetriever | |
from langchain.retrievers import ContextualCompressionRetriever | |
from langchain.retrievers.document_compressors import CohereRerank | |
from langchain_community.llms import Cohere | |
from langchain.memory import ConversationSummaryBufferMemory,ConversationBufferMemory | |
from langchain.schema import Document | |
# Cohere | |
from langchain.retrievers.document_compressors import CohereRerank | |
from langchain_community.llms import Cohere | |
openai_api_key = os.environ['openai_key'] | |
google_api_key = os.environ['gemini_key'] | |
HF_key = os.environ['HF_token'] | |
cohere_api_key = os.environ['cohere_api'] | |
current_dir = os.getcwd() | |
prompt_templates = {"All Needs Experts": "Respond as if you are combination of all needs assessment experts."} | |
actor_description = {"All Needs Experts": "<div style='float: left;margin: 0px 5px 0px 5px;'><img src='https://na.weshareresearch.com/wp-content/uploads/2023/04/experts2.jpg' alt='needs expert image' style='width:70px;align:top;'></div>A combination of all needs assessment experts."} | |
def get_empty_state(): | |
return { "messages": []} | |
def download_prompt_templates(): | |
url = "https://huggingface.co/spaces/ryanrwatkins/needs/raw/main/gurus.txt" | |
try: | |
response = requests.get(url) | |
reader = csv.reader(response.text.splitlines()) | |
next(reader) # skip the header row | |
for row in reader: | |
if len(row) >= 2: | |
act = row[0].strip('"') | |
prompt = row[1].strip('"') | |
description = row[2].strip('"') | |
prompt_templates[act] = prompt | |
actor_description[act] = description | |
except requests.exceptions.RequestException as e: | |
print(f"An error occurred while downloading prompt templates: {e}") | |
return | |
choices = list(prompt_templates.keys()) | |
choices = choices[:1] + sorted(choices[1:]) | |
return gr.update(value=choices[0], choices=choices) | |
def on_prompt_template_change(prompt_template): | |
if not isinstance(prompt_template, str): return | |
return prompt_templates[prompt_template] | |
def on_prompt_template_change_description(prompt_template): | |
if not isinstance(prompt_template, str): return | |
return actor_description[prompt_template] | |
# set to load only PDF, but could change to set to specific directory, so that other files don't get embeddings | |
def langchain_document_loader(): | |
""" | |
Load documents from the temporary directory (TMP_DIR). | |
Files can be in txt, pdf, CSV or docx format. | |
""" | |
#current_dir = os.getcwd() | |
#TMP_DIR = current_dir | |
global documents | |
documents = [] | |
""" | |
txt_loader = DirectoryLoader( | |
TMP_DIR.as_posix(), glob="**/*.txt", loader_cls=TextLoader, show_progress=True | |
) | |
documents.extend(txt_loader.load()) | |
""" | |
pdf_loader = DirectoryLoader( | |
current_dir, glob="*.pdf", loader_cls=PyPDFLoader, show_progress=True | |
) | |
documents.extend(pdf_loader.load()) | |
""" | |
csv_loader = DirectoryLoader( | |
TMP_DIR.as_posix(), glob="**/*.csv", loader_cls=CSVLoader, show_progress=True, | |
loader_kwargs={"encoding":"utf8"} | |
) | |
documents.extend(csv_loader.load()) | |
doc_loader = DirectoryLoader( | |
#TMP_DIR.as_posix(), | |
current_dir, | |
glob="**/*.docx", | |
loader_cls=Docx2txtLoader, | |
show_progress=True, | |
) | |
documents.extend(doc_loader.load()) | |
""" | |
return documents | |
langchain_document_loader() | |
text_splitter = RecursiveCharacterTextSplitter( | |
separators = ["\n\n", "\n", " ", ""], | |
chunk_size = 1500, | |
chunk_overlap= 200 | |
) | |
# Text splitting | |
chunks = text_splitter.split_documents(documents=documents) | |
# just FYI, does not impact anything | |
def tiktoken_tokens(documents,model="gpt-3.5-turbo"): | |
"""Use tiktoken (tokeniser for OpenAI models) to return a list of token lengths per document.""" | |
encoding = tiktoken.encoding_for_model(model) # returns the encoding used by the model. | |
tokens_length = [len(encoding.encode(documents[i].page_content)) for i in range(len(documents))] | |
return tokens_length | |
chunks_length = tiktoken_tokens(chunks,model="gpt-3.5-turbo") | |
print(f"Number of tokens - Average : {int(np.mean(chunks_length))}") | |
print(f"Number of tokens - 25% percentile : {int(np.quantile(chunks_length,0.25))}") | |
print(f"Number of tokens - 50% percentile : {int(np.quantile(chunks_length,0.5))}") | |
print(f"Number of tokens - 75% percentile : {int(np.quantile(chunks_length,0.75))}") | |
# For embeddings I am just using the free HF model so others are turned off | |
def select_embeddings_model(LLM_service="HuggingFace"): | |
"""Connect to the embeddings API endpoint by specifying | |
the name of the embedding model. | |
if LLM_service == "OpenAI": | |
embeddings = OpenAIEmbeddings( | |
model='text-embedding-ada-002', | |
api_key=openai_api_key) | |
""" | |
""" | |
if LLM_service == "Google": | |
embeddings = GoogleGenerativeAIEmbeddings( | |
model="models/embedding-001", | |
google_api_key=google_api_key, | |
) | |
""" | |
if LLM_service == "HuggingFace": | |
embeddings = HuggingFaceInferenceAPIEmbeddings( | |
api_key=HF_key, | |
#model_name="thenlper/gte-large" | |
model_name="sentence-transformers/all-MiniLM-l6-v2" | |
) | |
print("embedding model selected") | |
return embeddings | |
#embeddings_OpenAI = select_embeddings_model(LLM_service="OpenAI") | |
#embeddings_google = select_embeddings_model(LLM_service="Google") | |
embeddings_HuggingFace = select_embeddings_model(LLM_service="HuggingFace") | |
# Creates the DB that will hold the embedding vectors | |
def create_vectorstore(embeddings,documents,vectorstore_name): | |
"""Create a Chroma vector database.""" | |
persist_directory = (current_dir + "/" + vectorstore_name) | |
embedding_function=embeddings | |
vector_store = Chroma.from_documents( | |
documents=documents, | |
embedding=embeddings, | |
persist_directory=persist_directory | |
) | |
print("created Chroma vector database") | |
return vector_store | |
create_vectorstores = True # change to True to create vectorstores | |
# Then we tell it to store the embeddings in the VectorStore (stickiong with HF for this) | |
if create_vectorstores: | |
""" | |
vector_store_OpenAI,_ = create_vectorstore( | |
embeddings=embeddings_OpenAI, | |
documents = chunks, | |
vectorstore_name="Vit_All_OpenAI_Embeddings", | |
) | |
print("vector_store_OpenAI:",vector_store_OpenAI._collection.count(),"chunks.") | |
""" | |
""" | |
vector_store_google,new_vectorstore_name = create_vectorstore( | |
embeddings=embeddings_google, | |
documents = chunks, | |
vectorstore_name="Vit_All_Google_Embeddings" | |
) | |
print("vector_store_google:",vector_store_google._collection.count(),"chunks.") | |
""" | |
vector_store_HF = create_vectorstore( | |
embeddings=embeddings_HuggingFace, | |
documents = chunks, | |
vectorstore_name="Vit_All_HF_Embeddings" | |
) | |
print("vector_store_HF:",vector_store_HF._collection.count(),"chunks.") | |
print("") | |
# Now we tell it to keep the chromadb persistent so that it can be referenced at any time | |
""" | |
vector_store_OpenAI = Chroma( | |
persist_directory = LOCAL_VECTOR_STORE_DIR.as_posix() + "/Vit_All_OpenAI_Embeddings", | |
embedding_function=embeddings_OpenAI) | |
print("vector_store_OpenAI:",vector_store_OpenAI._collection.count(),"chunks.") | |
""" | |
""" | |
vector_store_google = Chroma( | |
persist_directory = current_dir + "/Vit_All_Google_Embeddings", | |
embedding_function=embeddings_google) | |
print("vector_store_google:",vector_store_google._collection.count(),"chunks.") | |
""" | |
vector_store_HF = Chroma( | |
persist_directory = current_dir + "/Vit_All_HF_Embeddings", | |
embedding_function=embeddings_HuggingFace) | |
print("vector_store_HF:",vector_store_HF._collection.count(),"chunks.") | |
# Now we create the code to retrieve embeddings from the vectorstore (again, sticking with HF) | |
def Vectorstore_backed_retriever( | |
vectorstore,search_type="similarity",k=10,score_threshold=None | |
): | |
"""create a vectorsore-backed retriever | |
Parameters: | |
search_type: Defines the type of search that the Retriever should perform. | |
Can be "similarity" (default), "mmr", or "similarity_score_threshold" | |
k: number of documents to return (Default: 4) | |
score_threshold: Minimum relevance threshold for similarity_score_threshold (default=None) | |
""" | |
print("vector_backed retriever started") | |
search_kwargs={} | |
if k is not None: | |
search_kwargs['k'] = k | |
if score_threshold is not None: | |
search_kwargs['score_threshold'] = score_threshold | |
global retriever | |
retriever = vectorstore.as_retriever( | |
search_type=search_type, | |
search_kwargs=search_kwargs | |
) | |
print("vector_backed retriever done") | |
return retriever | |
# similarity search | |
#base_retriever_OpenAI = Vectorstore_backed_retriever(vector_store_OpenAI,"similarity",k=10) | |
#base_retriever_google = Vectorstore_backed_retriever(vector_store_google,"similarity",k=10) | |
base_retriever_HF = Vectorstore_backed_retriever(vector_store_HF,"similarity",k=10) | |
# This next code takes the retrieved embeddings, gets rid of redundant ones, takes out non-useful information, and provides back a shorter embedding for use | |
def create_compression_retriever(embeddings, base_retriever, chunk_size=500, k=16, similarity_threshold=None): | |
"""Build a ContextualCompressionRetriever. | |
We wrap the the base_retriever (a vectorstore-backed retriever) into a ContextualCompressionRetriever. | |
The compressor here is a Document Compressor Pipeline, which splits documents | |
into smaller chunks, removes redundant documents, filters out the most relevant documents, | |
and reorder the documents so that the most relevant are at the top and bottom of the list. | |
Parameters: | |
embeddings: OpenAIEmbeddings, GoogleGenerativeAIEmbeddings or HuggingFaceInferenceAPIEmbeddings. | |
base_retriever: a vectorstore-backed retriever. | |
chunk_size (int): Documents will be splitted into smaller chunks using a CharacterTextSplitter with a default chunk_size of 500. | |
k (int): top k relevant chunks to the query are filtered using the EmbeddingsFilter. default =16. | |
similarity_threshold : minimum relevance threshold used by the EmbeddingsFilter. default =None. | |
""" | |
print("compression retriever started") | |
# 1. splitting documents into smaller chunks | |
splitter = CharacterTextSplitter(chunk_size=chunk_size, chunk_overlap=0, separator=". ") | |
# 2. removing redundant documents | |
redundant_filter = EmbeddingsRedundantFilter(embeddings=embeddings) | |
# 3. filtering based on relevance to the query | |
relevant_filter = EmbeddingsFilter(embeddings=embeddings, k=k, similarity_threshold=similarity_threshold) # similarity_threshold and top K | |
# 4. Reorder the documents | |
# Less relevant document will be at the middle of the list and more relevant elements at the beginning or end of the list. | |
# Reference: https://python.langchain.com/docs/modules/data_connection/retrievers/long_context_reorder | |
reordering = LongContextReorder() | |
# 5. Create compressor pipeline and retriever | |
pipeline_compressor = DocumentCompressorPipeline( | |
transformers=[splitter, redundant_filter, relevant_filter, reordering] | |
) | |
compression_retriever = ContextualCompressionRetriever( | |
base_compressor=pipeline_compressor, | |
base_retriever=base_retriever | |
) | |
print("compression retriever done") | |
return compression_retriever | |
compression_retriever_HF = create_compression_retriever( | |
embeddings=embeddings_HuggingFace, | |
base_retriever=base_retriever_HF, | |
k=16) | |
# Can use the following to rank the returned embeddings in order of relevance but all are used anyway so I am skipping for now (can test later) | |
''' | |
def CohereRerank_retriever( | |
base_retriever, | |
cohere_api_key,cohere_model="rerank-multilingual-v2.0", top_n=8 | |
): | |
"""Build a ContextualCompressionRetriever using Cohere Rerank endpoint to reorder the results based on relevance. | |
Parameters: | |
base_retriever: a Vectorstore-backed retriever | |
cohere_api_key: the Cohere API key | |
cohere_model: The Cohere model can be either 'rerank-english-v2.0' or 'rerank-multilingual-v2.0', with the latter being the default. | |
top_n: top n results returned by Cohere rerank, default = 8. | |
""" | |
print("cohere rerank started") | |
compressor = CohereRerank( | |
cohere_api_key=cohere_api_key, | |
model=cohere_model, | |
top_n=top_n | |
) | |
retriever_Cohere = ContextualCompressionRetriever( | |
base_compressor=compressor, | |
base_retriever=base_retriever | |
) | |
print("cohere rerank done") | |
return retriever_Cohere | |
''' | |
# Don't have to use this, but it brings all the above pieces together into a single function | |
''' | |
def retrieval_blocks( | |
create_vectorstore=True,# if True a Chroma vectorstore is created, else the Chroma vectorstore will be loaded | |
LLM_service="HuggingFace", | |
vectorstore_name="Vit_All_HF_Embeddings", | |
chunk_size = 1600, chunk_overlap=200, # parameters of the RecursiveCharacterTextSplitter | |
retriever_type="Vectorstore_backed_retriever", | |
base_retriever_search_type="similarity", base_retriever_k=10, base_retriever_score_threshold=None, | |
compression_retriever_k=16, | |
cohere_api_key="***", cohere_model="rerank-multilingual-v2.0", cohere_top_n=8, | |
): | |
print("retrieval blocks started") | |
""" | |
Rertieval includes: document loaders, text splitter, vectorstore and retriever. | |
Parameters: | |
create_vectorstore (boolean): If True, a new Chroma vectorstore will be created. Otherwise, an existing vectorstore will be loaded. | |
LLM_service: OpenAI, Google or HuggingFace. | |
vectorstore_name (str): the name of the vectorstore. | |
chunk_size and chunk_overlap: parameters of the RecursiveCharacterTextSplitter, default = (1600,200). | |
retriever_type (str): in [Vectorstore_backed_retriever,Contextual_compression,Cohere_reranker] | |
base_retriever_search_type: search_type in ["similarity", "mmr", "similarity_score_threshold"], default = similarity. | |
base_retriever_k: The most similar vectors to retrieve (default k = 10). | |
base_retriever_score_threshold: score_threshold used by the base retriever, default = None. | |
compression_retriever_k: top k documents returned by the compression retriever, default=16 | |
cohere_api_key: Cohere API key | |
cohere_model (str): The Cohere model can be either 'rerank-english-v2.0' or 'rerank-multilingual-v2.0', with the latter being the default. | |
cohere_top_n: top n results returned by Cohere rerank, default = 8. | |
Output: | |
retriever. | |
""" | |
try: | |
# Create new Vectorstore (Chroma index) | |
if create_vectorstore: | |
# 1. load documents | |
documents = langchain_document_loader(current_dir) | |
# 2. Text Splitter: split documents to chunks | |
text_splitter = RecursiveCharacterTextSplitter( | |
separators = ["\n\n", "\n", " ", ""], | |
chunk_size = chunk_size, | |
chunk_overlap= chunk_overlap | |
) | |
chunks = text_splitter.split_documents(documents=documents) | |
# 3. Embeddings | |
embeddings = select_embeddings_model(LLM_service=LLM_service) | |
# 4. Vectorsore: create Chroma index | |
vector_store = create_vectorstore( | |
embeddings=embeddings, | |
documents = chunks, | |
vectorstore_name=vectorstore_name, | |
) | |
# 5. Load a Vectorstore (Chroma index) | |
else: | |
embeddings = select_embeddings_model(LLM_service=LLM_service) | |
vector_store = Chroma( | |
persist_directory = current_dir + "/" + vectorstore_name, | |
embedding_function=embeddings | |
) | |
# 6. base retriever: Vector store-backed retriever | |
base_retriever = Vectorstore_backed_retriever( | |
vector_store, | |
search_type=base_retriever_search_type, | |
k=base_retriever_k, | |
score_threshold=base_retriever_score_threshold | |
) | |
retriever = None | |
if retriever_type=="Vectorstore_backed_retriever": | |
retriever = base_retriever | |
# 7. Contextual Compression Retriever | |
if retriever_type=="Contextual_compression": | |
retriever = create_compression_retriever( | |
embeddings=embeddings, | |
base_retriever=base_retriever, | |
k=compression_retriever_k, | |
) | |
# 8. CohereRerank retriever | |
if retriever_type=="Cohere_reranker": | |
retriever = CohereRerank_retriever( | |
base_retriever=base_retriever, | |
cohere_api_key=cohere_api_key, | |
cohere_model=cohere_model, | |
top_n=cohere_top_n | |
) | |
print(f"\n{retriever_type} is created successfully!") | |
print(f"Relevant documents will be retrieved from vectorstore ({vectorstore_name}) which uses {LLM_service} embeddings \ | |
and has {vector_store._collection.count()} chunks.") | |
print("retrieval blocks done") | |
return retriever | |
except Exception as e: | |
print(e) | |
''' | |
# Can use any of these LLMs for responses, for now I am Gemini-Pro for the bot (this is for responses now, not embeddings) | |
def instantiate_LLM(LLM_provider,api_key,temperature=0.8,top_p=0.95,model_name=None): | |
"""Instantiate LLM in Langchain. | |
Parameters: | |
LLM_provider (str): the LLM provider; in ["OpenAI","Google","HuggingFace"] | |
model_name (str): in ["gpt-3.5-turbo", "gpt-3.5-turbo-0125", "gpt-4-turbo-preview", | |
"gemini-pro", "mistralai/Mistral-7B-Instruct-v0.2"]. | |
api_key (str): google_api_key or openai_api_key or huggingfacehub_api_token | |
temperature (float): Range: 0.0 - 1.0; default = 0.5 | |
top_p (float): : Range: 0.0 - 1.0; default = 1. | |
""" | |
if LLM_provider == "OpenAI": | |
llm = ChatOpenAI( | |
api_key=api_key, | |
model="gpt-3.5-turbo", # in ["gpt-3.5-turbo", "gpt-3.5-turbo-0125", "gpt-4-turbo-preview"] | |
temperature=temperature, | |
model_kwargs={ | |
"top_p": top_p | |
} | |
) | |
if LLM_provider == "Google": | |
llm = ChatGoogleGenerativeAI( | |
google_api_key=api_key, | |
model="gemini-pro", # "gemini-pro" | |
temperature=temperature, | |
top_p=top_p, | |
convert_system_message_to_human=True, | |
safety_settings={ | |
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE, | |
HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE, | |
HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE, | |
HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE, | |
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE}, | |
) | |
if LLM_provider == "HuggingFace": | |
llm = HuggingFaceHub( | |
repo_id="mistralai/Mistral-7B-Instruct-v0.2", # "mistralai/Mistral-7B-Instruct-v0.2" | |
huggingfacehub_api_token=api_key, | |
model_kwargs={ | |
"temperature":temperature, | |
"top_p": top_p, | |
"do_sample": True, | |
"max_new_tokens":1024 | |
}, | |
) | |
return llm | |
# This creates history (memory) of prior questions. I am using Gemini for this but I left the code if I decide to go to GPT later on. | |
def create_memory(model_name='gemini-pro',memory_max_token=None): | |
#def create_memory(model_name='gpt-3.5-turbo',memory_max_token=None): | |
"""Creates a ConversationSummaryBufferMemory for gpt-3.5-turbo. | |
Creates a ConversationBufferMemory for the other models.""" | |
if model_name=="gpt-3.5-turbo": | |
if memory_max_token is None: | |
memory_max_token = 1024 # max_tokens for 'gpt-3.5-turbo' = 4096 | |
memory = ConversationSummaryBufferMemory( | |
max_token_limit=memory_max_token, | |
llm=ChatOpenAI(model_name="gpt-3.5-turbo",openai_api_key=openai_api_key,temperature=0.1), | |
return_messages=True, | |
memory_key='chat_history', | |
output_key="answer", | |
input_key="question" | |
) | |
else: | |
memory = ConversationBufferMemory( | |
return_messages=True, | |
memory_key='chat_history', | |
output_key="answer", | |
input_key="question", | |
) | |
return memory | |
# Set a small memory_max_token, just to show how older messages are summarized if max_token_limit is exceeded. | |
memory = create_memory(model_name='gemini-pro',memory_max_token=None) | |
#memory = create_memory(model_name='gpt-3.5-turbo',memory_max_token=20) | |
# save history as context for the conversation | |
memory.save_context( | |
inputs={"question":"sample"}, | |
outputs={"answer":"sample"} | |
) | |
# loads the template above | |
memory.load_memory_variables({}) | |
# Create the prompt template for the conversation | |
standalone_question_template = """Given the following conversation and a follow up question, | |
rephrase the follow up question to be a standalone question, in the English language.\n\n | |
Chat History:\n{chat_history}\n | |
Follow Up Input: {question}\n | |
Standalone question: {question}""" | |
#standalone_question_prompt = PromptTemplate( | |
# input_variables=['chat_history', 'question'], | |
# template=standalone_question_template | |
#) | |
def answer_template(language="english"): | |
"""Pass the standalone question along with the chat history and context | |
to the `LLM` which will answer""" | |
template = f"""You are a professor who is an expert in needs assessment. | |
Answer the question at the end (convert the queestion to {language} language if it is not). | |
Use only the following context (delimited by <context></context>) in responding to the question. | |
Be polite and end by asking if you can answer any other questions. | |
If you can't answer the question, then you should say that it is not within your knowledge base and that you can only answer needs assessment related questions. | |
Your answer must be in the language at the end. | |
<context> | |
{{chat_history}} | |
{{context}} | |
</context> | |
Question: {{question}} | |
Language: {language}. | |
""" | |
return template | |
answer_prompt = ChatPromptTemplate.from_template(answer_template()) | |
chain = ConversationalRetrievalChain.from_llm( | |
condense_question_prompt=PromptTemplate( | |
input_variables=['chat_history', 'question'], | |
template=standalone_question_template), | |
combine_docs_chain_kwargs={'prompt': answer_prompt}, | |
condense_question_llm=instantiate_LLM( | |
LLM_provider="Google",api_key=google_api_key,temperature=0.3, | |
model_name="gemini-pro"), | |
memory=create_memory("gemini-pro"), | |
retriever = compression_retriever_HF, | |
#retriever = base_retriever_HF, #base_retriever_HF | |
llm=instantiate_LLM( | |
LLM_provider="Google",api_key=google_api_key,temperature=0.8, | |
model_name="gemini-pro"), | |
chain_type= "stuff", | |
verbose= True, | |
return_source_documents=True | |
) | |
# As above, this is not in use but it brings all the above elements together into a single function | |
''' | |
def create_ConversationalRetrievalChain( | |
llm,condense_question_llm, | |
retriever, | |
chain_type= 'stuff', | |
language="english", | |
model_name='gemini-pro' | |
#model_name='gpt-3.5-turbo' | |
): | |
"""Create a ConversationalRetrievalChain. | |
First, it passes the follow-up question along with the chat history to an LLM which rephrases | |
the question and generates a standalone query. | |
This query is then sent to the retriever, which fetches relevant documents (context) | |
and passes them along with the standalone question and chat history to an LLM to answer. | |
""" | |
# 1. Define the standalone_question prompt. | |
# Pass the follow-up question along with the chat history to the `condense_question_llm` | |
# which rephrases the question and generates a standalone question. | |
standalone_question_prompt = PromptTemplate( | |
input_variables=['chat_history', 'question'], | |
template="""Given the following conversation and a follow up question, | |
rephrase the follow up question to be a standalone question, in its original language.\n\n | |
Chat History:\n{chat_history}\n | |
Follow Up Input: {question}\n | |
Standalone question: {question}""") | |
# 2. Define the answer_prompt | |
# Pass the standalone question + the chat history + the context (retrieved documents) to the `LLM` wihch will answer | |
answer_prompt = ChatPromptTemplate.from_template(answer_template(language='English')) | |
# 3. Add ConversationSummaryBufferMemory for gpt-3.5, and ConversationBufferMemory for the other models | |
memory = create_memory(model_name) | |
# 4. Create the ConversationalRetrievalChain | |
chain = ConversationalRetrievalChain.from_llm( | |
condense_question_prompt=standalone_question_prompt, | |
combine_docs_chain_kwargs={'prompt': answer_prompt}, | |
#condense_question_llm=condense_question_llm, | |
condense_question_llm=instantiate_LLM( | |
LLM_provider="Google",api_key=google_api_key,temperature=0.1, | |
model_name="gemini-pro"), | |
memory=memory, | |
retriever = compression_retriever_HF, | |
#retriever = base_retriever_HF, #changed this | |
#retriever = retriever, | |
#llm=llm, #changed this | |
llm=instantiate_LLM( | |
LLM_provider="Google",api_key=google_api_key,temperature=0.5, | |
model_name="gemini-pro"), | |
chain_type= "stuff", | |
#chain_type= chain_type, | |
verbose= True, | |
return_source_documents=True | |
) | |
print("Conversational retriever chain created successfully!") | |
return chain,memory | |
''' | |
# This below is for the interface | |
def submit_message(prompt, prompt_template, temperature, max_tokens, context_length, state): | |
history = state['messages'] | |
#global prompt_template_name | |
#prompt_template_name = prompt_template | |
#print(prompt_template) # prints who is responding if I move to multiple experts | |
#print(prompt_templates[prompt_template]) | |
completion = chain.invoke({"question":prompt}) | |
chain.memory.load_memory_variables({}) | |
get_empty_state() | |
state['content'] = completion | |
#state.append(completion.copy()) | |
completion = { "content": completion } | |
print("Prompt/question:", prompt) | |
answer = completion['content']['answer'] | |
print("Answer:", answer) | |
print("Embeddings utlized:") | |
for document in completion['content']['source_documents']: | |
page_content = document.page_content # Use dot notation to access an attribute | |
print("Embedding_content:", page_content) | |
metadata = document.metadata # Use dot notation to access an attribute | |
print("Metadata:", metadata) | |
similarity_score = document.state['query_similarity_score'] | |
print("Similarity_score:", similarity_score) | |
print("") | |
highest_similarity_score = -1 # Initialize with a score lower than possible | |
selected_document = None # To hold the document with the highest similarity score | |
for document in completion['content']['source_documents']: | |
if document.state['query_similarity_score'] > highest_similarity_score: | |
highest_similarity_score = document.state['query_similarity_score'] | |
selected_document = document | |
if selected_document is not None: | |
# Remove the "/home/user/app/" part from the document name | |
modified_source = selected_document.metadata['source'].replace('/home/user/app/', '').replace('.pdf', '') | |
source_info = f"\n**Lead source:** {modified_source}, **Page:** {selected_document.metadata['page']} " | |
else: | |
source_info = "Lead source: not determined" | |
#chat_messages = [(prompt_msg['content'], completion['content'])] | |
chat_messages = [(prompt, completion['content']['answer'] + source_info )] | |
return '', chat_messages, state # total_tokens_used_msg, | |
def clear_conversation(): | |
return gr.update(value=None, visible=True), None, "", get_empty_state() | |
css = """ | |
#col-container {max-width: 80%; margin-left: auto; margin-right: auto;} | |
#chatbox {min-height: 400px;} | |
#header {text-align: center;} | |
#prompt_template_preview {padding: 1em; border-width: 1px; border-style: solid; border-color: #e0e0e0; border-radius: 4px; min-height: 150px;} | |
#total_tokens_str {text-align: right; font-size: 0.8em; color: #666;} | |
#label {font-size: 0.8em; padding: 0.5em; margin: 0;} | |
.message { font-size: 1.2em; } | |
""" | |
with gr.Blocks(css=css) as demo: | |
state = gr.State(get_empty_state()) | |
with gr.Column(elem_id="col-container"): | |
gr.Markdown("""## Ask questions of our *needs assessment* bot! \n | |
**It is specially trained to only answer needs assessment related questions.** | |
""" , | |
elem_id="header") | |
with gr.Row(): | |
with gr.Column(): | |
chatbot = gr.Chatbot(elem_id="chatbox") | |
input_message = gr.Textbox(show_label=False, placeholder="Enter your needs assessment question", visible=True).style(container=False) | |
btn_submit = gr.Button("Submit") | |
#total_tokens_str = gr.Markdown(elem_id="total_tokens_str") | |
btn_clear_conversation = gr.Button("Start New Conversation", visible=False) | |
with gr.Column(visible=False): | |
prompt_template = gr.Dropdown(label="Choose an Expert:", choices=list(prompt_templates.keys())) | |
prompt_template_preview = gr.Markdown(elem_id="prompt_template_preview") | |
with gr.Accordion("Advanced parameters", open=False): | |
temperature = gr.Slider(minimum=0, maximum=2.0, value=0.7, step=0.1, label="Flexibility", info="Higher = More AI, Lower = More Expert") | |
max_tokens = gr.Slider(minimum=100, maximum=400, value=200, step=1, label="Length of Response.") | |
context_length = gr.Slider(minimum=1, maximum=5, value=2, step=1, label="Context Length", info="Number of previous questions you have asked.") | |
btn_submit.click(submit_message, [ input_message, prompt_template, temperature, max_tokens, context_length, state], [input_message, chatbot, state]) | |
input_message.submit(submit_message, [ input_message, prompt_template, temperature, max_tokens, context_length, state], [input_message, chatbot, state]) | |
btn_clear_conversation.click(clear_conversation, [], [input_message, chatbot, state]) | |
prompt_template.change(on_prompt_template_change_description, inputs=[prompt_template], outputs=[prompt_template_preview]) | |
demo.load(download_prompt_templates, inputs=None, outputs=[prompt_template], queur=False) | |
demo.queue(concurrency_count=10) | |
demo.launch(height='800px') | |