| from langchain.text_splitter import RecursiveCharacterTextSplitter | |
| from langchain_community.document_loaders import TextLoader | |
| from langchain.schema.runnable import RunnablePassthrough | |
| from langchain.schema.output_parser import StrOutputParser | |
| from langchain_pinecone import PineconeVectorStore | |
| from langchain.prompts import MessagesPlaceholder, ChatPromptTemplate | |
| from langchain_google_genai import GoogleGenerativeAI, GoogleGenerativeAIEmbeddings | |
| from dotenv import load_dotenv, find_dotenv | |
| import os | |
| from pinecone import Pinecone, PodSpec | |
| load_dotenv(find_dotenv()) | |
| class ChatbotMemory(): | |
| loader = TextLoader('dataset.txt', autodetect_encoding=True) | |
| documents = loader.load() | |
| text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=103) | |
| docs = text_splitter.split_documents(documents) | |
| embeddings = GoogleGenerativeAIEmbeddings( | |
| model="models/embedding-001", task_type="retrieval_query", google_api_key=os.getenv("GEMINI_API_KEY")) | |
| pinecone = Pinecone( | |
| api_key=os.environ.get("PINECONE_API_KEY") | |
| ) | |
| index_name = "gdscsou-chatbot" | |
| if index_name not in pinecone.list_indexes().names(): | |
| pinecone.create_index(name=index_name, metric="cosine", dimension=768, spec=PodSpec(environment="gcp-starter")) | |
| docsearch = PineconeVectorStore.from_documents(docs, embeddings, index_name=index_name) | |
| else: | |
| docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings) | |
| llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=os.getenv("GEMINI_API_KEY")) | |
| def contextualized_question(input: dict): | |
| if input.get("chat_history"): | |
| llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=os.getenv("GEMINI_API_KEY")) | |
| contextualize_q_system_prompt = """Given a chat history and the latest user question \ | |
| which might reference context in the chat history, formulate a standalone question \ | |
| which can be understood without the chat history. Do NOT answer the question, \ | |
| just reformulate it if needed and otherwise return it as is.""" | |
| contextualize_q_prompt = ChatPromptTemplate.from_messages( | |
| [ | |
| ("system", contextualize_q_system_prompt), | |
| MessagesPlaceholder(variable_name="chat_history"), | |
| ("human", "{question}"), | |
| ] | |
| ) | |
| contextualize_q_chain = contextualize_q_prompt | llm | StrOutputParser() | |
| return contextualize_q_chain | |
| else: | |
| return input["question"] | |
| template = """ | |
| INSTRUCTION: Act as Delta a community support chatbot for Google Developer Student Clubs, Silver Oak University alias GDSC SOU, this is conversation \ | |
| to a community member. Use the CONTEXT to answer in a helpful manner to the QUESTION. \ | |
| Don't forget you are a Community support chatbot for Google Developer Student Clubs, Silver Oak University. \ | |
| If you don't know any ANSWER, say you don't know \ | |
| Always follow general guardrails before generating any response. \ | |
| Always try to keep the conversation in context to GDSC SOU. Keep your replies short \ | |
| compassionate and informative.\ | |
| Give the answer from the CONTEXT\ | |
| You should help user to get his query solved and also try to increase engagement for GDSC SOU by also promoting GDSC SOU.\ | |
| CONTEXT: {context} | |
| QUESTION: {question} | |
| ANSWER: | |
| """ | |
| prompt = ChatPromptTemplate.from_messages( | |
| [ | |
| ("system", template), | |
| MessagesPlaceholder(variable_name="chat_history"), | |
| ("human", "{question}"), | |
| ] | |
| ) | |
| rag_chain = ( | |
| RunnablePassthrough.assign( | |
| context=contextualized_question | docsearch.as_retriever() | |
| ) | |
| | prompt | |
| | llm | |
| ) |