chatdocs / mainapp.py
hubsnippetai's picture
Rename app.py to mainapp.py
1b6d6b8 verified
raw
history blame contribute delete
No virus
4.96 kB
from langchain_community.document_loaders import UnstructuredPDFLoader, TextLoader # type: ignore
from langchain_community.vectorstores import FAISS
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import PyPDFLoader
from langchain_cohere import ChatCohere
from langchain_core.messages import HumanMessage
import dotenv
from langchain_core.output_parsers import StrOutputParser
# from langchain_community.vectorstores import Chroma
from langchain.schema.runnable import RunnablePassthrough
from langchain_cohere import CohereEmbeddings
from langchain_core.prompts import PromptTemplate, ChatPromptTemplate
from langchain.memory.summary_buffer import ConversationSummaryBufferMemory
from langchain.chains import ConversationChain
from langchain_core.prompts.chat import MessagesPlaceholder
from langchain.agents import AgentExecutor, create_tool_calling_agent
import os
#from langchain_community.utilities import GoogleSearchAPIWrapper
from langchain_core.tools import Tool
from langchain_google_community import GoogleSearchAPIWrapper
dotenv.load_dotenv()
#file_path = ( "/home/hubsnippet/Downloads/papers/2205.11916v4.pdf")
#load the file to memory
#loader = PyPDFLoader(file_path)
#load the file content to data variable
#data = loader.load_and_split()
# embed the file data in a vector store
#print(data[0])
def parse_document(docs : str, question : str):
# initialise an embedding for the vector store
embeddings = CohereEmbeddings(model="embed-english-light-v3.0")
# initialise the llm
llm = ChatCohere(model='command-r-plus')
# split the file into chunks
text_splitter = RecursiveCharacterTextSplitter(
chunk_size = 1000,
chunk_overlap = 100
)
docs = text_splitter.split_text(docs)
# initialize vectorstore
faiss_vs = FAISS.from_texts(docs, embeddings)
# res = faiss_vs.similarity_search(input, k=2)
llm_retriever = faiss_vs.as_retriever(llm = llm, search_kwargs={'k':1})
res = llm_retriever.invoke(question)[0].page_content
return res
#os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")
#os.environ["GOOGLE_CSE_ID"] = os.getenv("GOOGLE_CSE_ID")
#COHERE_API_KEY = os.getenv("COHERE_API_KEY")
GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")
GOOGLE_CSE_ID = os.getenv("GOOGLE_CSE_ID")
COHERE_API_KEY = os.getenv("COHERE_API_KEY")
# integrating an agent to perform the search with the URL
llm = ChatCohere(model="command-r-plus", cohere_api_key=COHERE_API_KEY)
# history = MessagesPlaceholder(variable_name="history")
#question = ""
#url = ""
prompt_template = [
("system", "You are a searh engine for a corpse of documentation. you will be provided with a url {url} \
the url is your only source of information, so you should search the url pages by key words \
you should only ground your responses with the url. \
If {question} has no related content from the url, simple response 'no related content to your question'"),
("human", "{question}"),
("placeholder", "{agent_scratchpad}"),
("ai", "")]
# prompt_template = prompt_template.format(url=url, question=question)
prompt = ChatPromptTemplate.from_messages([
#SystemMessage(content="You are a helpful assistant. You should use the google_search_name agent tool for information."),
#HumanMessage(content="{input}"),
#AIMessage(content="{output}"),
("system","You are a helpful virtual assistant." \
"You should only use the google_search_name agent tool to search for information when necessary."),
#MessagesPlaceholder(variable_name="history"),
("human","{question}"),
("placeholder", "{agent_scratchpad}")
])
# prompt template
prompt_text = ChatPromptTemplate.from_messages(prompt_template)
# print(prompt_text)
# prompt template input variables
# prompt_text.input_variables = ["question", "url"], input_variables = ["question", "url"]
search = GoogleSearchAPIWrapper(google_api_key = GOOGLE_API_KEY, google_cse_id = GOOGLE_CSE_ID)
tool = Tool(
name="google_search_name",
description="The model should use this tool when it needs more information from the internet.",
func=search.run,
)
agent = create_tool_calling_agent(
tools=[tool],
llm=llm,
#prompt = prompt_text
prompt = prompt
)
agent_executor = AgentExecutor(
agent=agent,
tools=[tool],
verbose=False
)
def parse_url(question : str) -> str:
# initialise the llm
response = agent_executor.invoke(input = {"question": question})
# add memmory to your conversation
# chain your llm to prompt
# chain = prompt_text | llm | StrOutputParser()
# chain = conversation_llm | StrOutputParser()
#response = chain.invoke(input = {"question" : question, "url":url})
return response
# message = HumanMessage(content="inurl: https://learn.microsoft.com 'what are cloud security best practices'")
# print(parse_url(message))