deltav2 / chatbotmemory.py
rohanshaw's picture
Upload 6 files
5479033 verified
raw
history blame contribute delete
No virus
3.86 kB
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import TextLoader
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
from langchain_pinecone import PineconeVectorStore
from langchain.prompts import MessagesPlaceholder, ChatPromptTemplate
from langchain_google_genai import GoogleGenerativeAI, GoogleGenerativeAIEmbeddings
from dotenv import load_dotenv, find_dotenv
import os
from pinecone import Pinecone, PodSpec
load_dotenv(find_dotenv())
class ChatbotMemory():
loader = TextLoader('dataset.txt', autodetect_encoding=True)
documents = loader.load()
text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=103)
docs = text_splitter.split_documents(documents)
embeddings = GoogleGenerativeAIEmbeddings(
model="models/embedding-001", task_type="retrieval_query", google_api_key=os.getenv("GEMINI_API_KEY"))
pinecone = Pinecone(
api_key=os.environ.get("PINECONE_API_KEY")
)
index_name = "gdscsou-chatbot"
if index_name not in pinecone.list_indexes().names():
pinecone.create_index(name=index_name, metric="cosine", dimension=768, spec=PodSpec(environment="gcp-starter"))
docsearch = PineconeVectorStore.from_documents(docs, embeddings, index_name=index_name)
else:
docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings)
llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=os.getenv("GEMINI_API_KEY"))
def contextualized_question(input: dict):
if input.get("chat_history"):
llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=os.getenv("GEMINI_API_KEY"))
contextualize_q_system_prompt = """Given a chat history and the latest user question \
which might reference context in the chat history, formulate a standalone question \
which can be understood without the chat history. Do NOT answer the question, \
just reformulate it if needed and otherwise return it as is."""
contextualize_q_prompt = ChatPromptTemplate.from_messages(
[
("system", contextualize_q_system_prompt),
MessagesPlaceholder(variable_name="chat_history"),
("human", "{question}"),
]
)
contextualize_q_chain = contextualize_q_prompt | llm | StrOutputParser()
return contextualize_q_chain
else:
return input["question"]
template = """
INSTRUCTION: Act as Delta a community support chatbot for Google Developer Student Clubs, Silver Oak University alias GDSC SOU, this is conversation \
to a community member. Use the CONTEXT to answer in a helpful manner to the QUESTION. \
Don't forget you are a Community support chatbot for Google Developer Student Clubs, Silver Oak University. \
If you don't know any ANSWER, say you don't know \
Always follow general guardrails before generating any response. \
Always try to keep the conversation in context to GDSC SOU. Keep your replies short \
compassionate and informative.\
Give the answer from the CONTEXT\
You should help user to get his query solved and also try to increase engagement for GDSC SOU by also promoting GDSC SOU.\
CONTEXT: {context}
QUESTION: {question}
ANSWER:
"""
prompt = ChatPromptTemplate.from_messages(
[
("system", template),
MessagesPlaceholder(variable_name="chat_history"),
("human", "{question}"),
]
)
rag_chain = (
RunnablePassthrough.assign(
context=contextualized_question | docsearch.as_retriever()
)
| prompt
| llm
)