Spaces:
Running
Running
import gradio as gr | |
import os | |
from langchain_community.document_loaders import PyPDFLoader | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain.chat_models import ChatOpenAI | |
from langchain.retrievers.document_compressors import LLMChainExtractor | |
from langchain.retrievers.multi_query import MultiQueryRetriever | |
from langchain.retrievers import ContextualCompressionRetriever | |
from langchain.prompts.chat import ChatPromptTemplate, HumanMessagePromptTemplate | |
from langchain.vectorstores import Chroma | |
chat = ChatOpenAI() | |
embedding_function = HuggingFaceEmbeddings(model_name = "BAAI/bge-large-en-v1.5",model_kwargs={'device': 'cpu'},encode_kwargs={"normalize_embeddings": True}) | |
def add_docs(path): | |
loader = PyPDFLoader(file_path=path) | |
docs = loader.load_and_split(text_splitter=RecursiveCharacterTextSplitter(chunk_size = 500, | |
chunk_overlap = 100, | |
length_function = len, | |
is_separator_regex=False)) | |
model_vectorstore = Chroma | |
db = model_vectorstore.from_documents(documents=docs,embedding= embedding_function, persist_directory="output/general_knowledge") | |
return db | |
def answer_query(message, chat_history): | |
base_compressor = LLMChainExtractor.from_llm(chat) | |
db = Chroma(persist_directory = "output/general_knowledge", embedding_function=embedding_function) | |
base_retriever = db.as_retriever() | |
mq_retriever = MultiQueryRetriever.from_llm(retriever = base_retriever, llm=chat) | |
compression_retriever = ContextualCompressionRetriever(base_compressor=base_compressor, base_retriever=mq_retriever) | |
matched_docs = compression_retriever.get_relevant_documents(query = message) | |
context = "" | |
for doc in matched_docs: | |
page_content = doc.page_content | |
context+=page_content | |
context += "\n\n" | |
template = """ | |
Answer the following question only by using the context given below in the triple backticks, do not use any other information to answer the question. | |
If you can't answer the given question with the given context, you can return an emtpy string ('') | |
Context: ```{context}``` | |
---------------------------- | |
Question: {query} | |
---------------------------- | |
Answer: """ | |
human_message_prompt = HumanMessagePromptTemplate.from_template(template=template) | |
chat_prompt = ChatPromptTemplate.from_messages([human_message_prompt]) | |
prompt = chat_prompt.format_prompt(query = message, context = context) | |
response = chat(messages=prompt.to_messages()).content | |
chat_history.append((message,response)) | |
return "", chat_history | |
with gr.Blocks() as demo: | |
gr.HTML("<h1 align = 'center'>Smart Assistant</h1>") | |
gr.HTML("<h2 align = 'center'>Upload any PDF and ask your questions.</h2>") | |
with gr.Row(): | |
upload_files = gr.File(label = 'Upload a PDF',file_types=['.pdf'],file_count='single') | |
chatbot = gr.Chatbot() | |
msg = gr.Textbox(label = "Enter your question here") | |
upload_files.upload(add_docs,upload_files) | |
msg.submit(answer_query,[msg,chatbot],[msg,chatbot]) | |
if __name__ == "__main__": | |
demo.launch() |