Spaces:

rohanshaw
/

deltav2

Sleeping

App Files Files Community

deltav2 / chatbotmemory.py

rohanshaw

Upload 6 files

5479033 verified 7 months ago

raw

history blame contribute delete

No virus

3.86 kB

	from langchain.text_splitter import RecursiveCharacterTextSplitter
	from langchain_community.document_loaders import TextLoader
	from langchain.schema.runnable import RunnablePassthrough
	from langchain.schema.output_parser import StrOutputParser
	from langchain_pinecone import PineconeVectorStore
	from langchain.prompts import MessagesPlaceholder, ChatPromptTemplate
	from langchain_google_genai import GoogleGenerativeAI, GoogleGenerativeAIEmbeddings
	from dotenv import load_dotenv, find_dotenv
	import os
	from pinecone import Pinecone, PodSpec

	load_dotenv(find_dotenv())

	class ChatbotMemory():

	loader = TextLoader('dataset.txt', autodetect_encoding=True)
	documents = loader.load()
	text_splitter = RecursiveCharacterTextSplitter(chunk_size=512, chunk_overlap=103)
	docs = text_splitter.split_documents(documents)

	embeddings = GoogleGenerativeAIEmbeddings(
	model="models/embedding-001", task_type="retrieval_query", google_api_key=os.getenv("GEMINI_API_KEY"))

	pinecone = Pinecone(
	api_key=os.environ.get("PINECONE_API_KEY")
	)

	index_name = "gdscsou-chatbot"

	if index_name not in pinecone.list_indexes().names():
	pinecone.create_index(name=index_name, metric="cosine", dimension=768, spec=PodSpec(environment="gcp-starter"))
	docsearch = PineconeVectorStore.from_documents(docs, embeddings, index_name=index_name)
	else:
	docsearch = PineconeVectorStore.from_existing_index(index_name, embeddings)


	llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=os.getenv("GEMINI_API_KEY"))

	def contextualized_question(input: dict):
	if input.get("chat_history"):
	llm = GoogleGenerativeAI(model="gemini-pro", google_api_key=os.getenv("GEMINI_API_KEY"))

	contextualize_q_system_prompt = """Given a chat history and the latest user question \
	which might reference context in the chat history, formulate a standalone question \
	which can be understood without the chat history. Do NOT answer the question, \
	just reformulate it if needed and otherwise return it as is."""

	contextualize_q_prompt = ChatPromptTemplate.from_messages(
	[
	("system", contextualize_q_system_prompt),
	MessagesPlaceholder(variable_name="chat_history"),
	("human", "{question}"),
	]
	)

	contextualize_q_chain = contextualize_q_prompt \| llm \| StrOutputParser()
	return contextualize_q_chain
	else:
	return input["question"]

	template = """
	INSTRUCTION: Act as Delta a community support chatbot for Google Developer Student Clubs, Silver Oak University alias GDSC SOU, this is conversation \
	to a community member. Use the CONTEXT to answer in a helpful manner to the QUESTION. \
	Don't forget you are a Community support chatbot for Google Developer Student Clubs, Silver Oak University. \
	If you don't know any ANSWER, say you don't know \
	Always follow general guardrails before generating any response. \
	Always try to keep the conversation in context to GDSC SOU. Keep your replies short \
	compassionate and informative.\
	Give the answer from the CONTEXT\
	You should help user to get his query solved and also try to increase engagement for GDSC SOU by also promoting GDSC SOU.\

	CONTEXT: {context}
	QUESTION: {question}
	ANSWER:
	"""

	prompt = ChatPromptTemplate.from_messages(
	[
	("system", template),
	MessagesPlaceholder(variable_name="chat_history"),
	("human", "{question}"),
	]
	)


	rag_chain = (
	RunnablePassthrough.assign(
	context=contextualized_question \| docsearch.as_retriever()
	)
	\| prompt
	\| llm
	)