|
|
|
|
|
|
|
import os |
|
import gradio as gr |
|
|
|
from operator import itemgetter |
|
|
|
|
|
from langchain.prompts import ChatPromptTemplate, MessagesPlaceholder |
|
from langchain_core.runnables import RunnableParallel,RunnablePassthrough,RunnableLambda |
|
from langchain_core.output_parsers import StrOutputParser |
|
from langchain_core.messages import AIMessage, HumanMessage, get_buffer_string |
|
from langchain.prompts.prompt import PromptTemplate |
|
from langchain.schema import format_document |
|
from langchain.memory import ConversationBufferMemory |
|
|
|
|
|
from langchain_community.embeddings import HuggingFaceEmbeddings |
|
|
|
|
|
from langchain_google_genai import ChatGoogleGenerativeAI |
|
|
|
|
|
from langchain_groq import ChatGroq |
|
|
|
|
|
from pinecone import Pinecone, ServerlessSpec |
|
from langchain_pinecone import PineconeVectorStore |
|
|
|
from dotenv import load_dotenv |
|
load_dotenv() |
|
|
|
|
|
|
|
setid = "global" |
|
|
|
def pipeLog(x): |
|
print("***", x) |
|
return x |
|
|
|
embeddings = HuggingFaceEmbeddings(model_name=os.getenv("EMBEDDINGS_MODEL")) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model = ChatGroq(model_name='mixtral-8x7b-32768') |
|
|
|
|
|
pc = Pinecone(api_key=os.getenv("PINECONE_API_KEY")) |
|
index = pc.Index(setid) |
|
vectorstore = PineconeVectorStore(index, embeddings, "text") |
|
retriever = vectorstore.as_retriever(kwargs={"k":5}) |
|
|
|
|
|
template_no_history = """Answer the question based only on the following context: |
|
{context} |
|
|
|
Question: {question} |
|
""" |
|
ANSWER_PROMPT = ChatPromptTemplate.from_template(template_no_history) |
|
|
|
template_with_history = """Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question, in its original language. |
|
|
|
Chat History: |
|
{chat_history} |
|
Follow Up Input: {question} |
|
Standalone question:""" |
|
CONDENSE_QUESTION_PROMPT = ChatPromptTemplate.from_template(template_with_history) |
|
|
|
DEFAULT_DOCUMENT_PROMPT = PromptTemplate.from_template(template="{page_content}") |
|
|
|
def _combine_documents(docs, document_prompt=DEFAULT_DOCUMENT_PROMPT, document_separator="\n\n"): |
|
doc_strings = [format_document(doc, document_prompt) for doc in docs] |
|
return document_separator.join(doc_strings) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
memory = ConversationBufferMemory(return_messages=True, output_key="answer", input_key="question") |
|
|
|
|
|
loaded_memory = RunnablePassthrough.assign( |
|
chat_history=RunnableLambda(memory.load_memory_variables) | itemgetter("history"), |
|
) |
|
|
|
|
|
standalone_question = { |
|
"standalone_question": { |
|
"question": lambda x: x["question"], |
|
"chat_history": lambda x: get_buffer_string(x["chat_history"]), |
|
} |
|
| CONDENSE_QUESTION_PROMPT |
|
| model |
|
| StrOutputParser(), |
|
} |
|
|
|
|
|
retrieved_documents = { |
|
"docs": itemgetter("standalone_question") | retriever, |
|
"question": lambda x: x["standalone_question"], |
|
} |
|
|
|
|
|
final_inputs = { |
|
"context": lambda x: _combine_documents(x["docs"]), |
|
"question": itemgetter("question"), |
|
} |
|
|
|
|
|
answer = { |
|
"answer": final_inputs | ANSWER_PROMPT | model, |
|
"docs": itemgetter("docs"), |
|
} |
|
|
|
|
|
final_chain = loaded_memory | standalone_question | retrieved_documents | answer |
|
|
|
|
|
def pipeLog(s:str, x): |
|
print(s, x) |
|
return x |
|
pipe_a = RunnableLambda(lambda x: pipeLog("a:",x)) |
|
pipe_b = RunnableLambda(lambda x: pipeLog("b:",x)) |
|
|
|
|
|
|
|
def rag_query(question: str, history: list[list[str]]) -> str: |
|
"""Run a RAG query using own history, not the gradio history""" |
|
inputs = { 'question':question } |
|
response = final_chain.invoke(inputs) |
|
|
|
memory.save_context(inputs, {"answer": response["answer"].content}) |
|
|
|
|
|
return response['answer'].content |
|
|
|
|
|
def test_query(question): |
|
print('QUESTION:', question) |
|
answer = rag_query(question, None) |
|
print('ANSWER: ', answer, '\n') |
|
|
|
|
|
|
|
|
|
|
|
|
|
gr.ChatInterface( |
|
rag_query, |
|
title="RAG Chatbot demo", |
|
description="A chatbot doing Retrieval Augmented Generation, backed by a Pinecone vector database" |
|
).launch() |
|
|
|
|
|
|