from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import Chroma from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain.agents import AgentExecutor, Tool, load_tools from langchain.chains import RetrievalQA, RetrievalQAWithSourcesChain, LLMChain, LLMMathChain from langchain.chat_models import ChatOpenAI from langchain.document_loaders import DirectoryLoader from langchain.memory import ConversationBufferMemory from langchain.utilities import WikipediaAPIWrapper from langchain.agents import initialize_agent, AgentType from langchain.document_loaders import WebBaseLoader import gradio as gr # Add presets for Gradio theme from app_modules.presets import * import os os.environ["OPENAI_API_KEY"] = os.environ["OPENAI_TOKEN"] # Flag to load chroma store flag_chroma = True # Define the LLM chat model model = 'gpt-3.5-turbo' #model = 'gpt-4' temperature = 0 llm = ChatOpenAI(temperature=temperature, model=model) # Check flag to load vectorstore if flag_chroma: # Load an existing database persist_dir = "./chroma" embeddings = OpenAIEmbeddings() vectorstore = Chroma(persist_directory=persist_dir, embedding_function=embeddings) vectorstore.persist() else: # Document and sources loader pdf_loader = DirectoryLoader('./Reports/', glob="**/*.pdf") txt_loader = DirectoryLoader('./Reports/', glob="**/*.txt") word_loader = DirectoryLoader('./Reports/', glob="**/*.docx") web_based_loader = WebBaseLoader(["https://www.unwomen.org/en/what-we-do/ending-violence-against-women/faqs/types-of-violence", "https://2021.gho.unocha.org/global-trends/gender-and-gender-based-violence-humanitarian-action/"]) loaders = [pdf_loader, txt_loader, word_loader, web_based_loader] docs = [] for loader in loaders: docs.extend(loader.load()) # Text splitter ## If chunks are bigger than 1000, it recursively splits them until fitting them within size text_splitter = RecursiveCharacterTextSplitter( separators=["\n\n", "\n", ".", "!", "?", ",", " ", ""], chunk_size = 1000, chunk_overlap = 50 ) documents = text_splitter.split_documents(docs) # Embed documents in Vectorstore persist_dir = "chroma" embeddings = OpenAIEmbeddings() vectorstore = Chroma.from_documents(documents, embeddings, persist_directory=persist_dir) vectorstore.persist() # Create Retrieval Chain with sources ## It returns a dictionary with at least the 'answer' and the 'sources' qa = RetrievalQAWithSourcesChain.from_chain_type( llm=llm, chain_type="stuff", retriever=vectorstore.as_retriever(), return_source_documents=True ) # Define tools wikipedia = WikipediaAPIWrapper() tools = [ Tool( name="GBV Q&A Bot System", #func=qa, func=lambda question: qa({"question": question}, return_only_outputs=True), description="Useful for when you need to answer questions about the aspects asked. Input may be a partial or fully formed question.", #return_direct=True, # use the agent as a router and directly return the result ), Tool( name='Wikipedia', func=wikipedia.run, description='You must only use this tool if you cannot find answers with the other tools. Useful for when you need to look for answers in the Wikipedia.' ) ] # Create Buffer Memory memory = ConversationBufferMemory(memory_key="chat_history", input_key='input', output_key="output", return_messages=True) # Initialize Re-Act agent and create Agent Executor Chain react = initialize_agent(tools, llm, agent=AgentType.CHAT_CONVERSATIONAL_REACT_DESCRIPTION, verbose=True, max_iterations=3, early_stopping_method='generate', memory=memory) agent_chain = AgentExecutor.from_agent_and_tools( agent=react.agent, tools=tools, verbose=True, memory=memory, return_intermediate_steps=True, return_source_documents=True, handle_parsing_errors=True ) # Add custom CSS with open("assets/custom.css", "r", encoding="utf-8") as f: customCSS = f.read() with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo: gr.Markdown( """ # 🦜🔗 Ask the GBV in Emergencies Q&A Bot! This generative model has been trained on various sources covering themes on Gender-Based Violence response in Humanitarian Settings thematic. The IA might complement the replies with additional information retrieved from Wikipedia sources. You can maitaing a natural language conversation in order to retrieve information on this subject. Example questions: - What is GBV? - Who is in charge of the GBV Area in Emergencies? - Please prepare a strategy to minimize GBV risks in a temporary settlement """ ) # Start chatbot with welcome from bot chatbot = gr.Chatbot([(None,'How can I help you?')]).style(height=600) msg = gr.Textbox() clear = gr.ClearButton([msg, chatbot]) def user(user_message, history): return gr.update(value="", interactive=False), history + [[user_message, None]] def bot(history): user_message = history[-1][0] # get if from most recent history element #bot_message = conversation.run(user_message) response = agent_chain(user_message) bot_message = response['output'] history[-1][1] = "" for character in bot_message: history[-1][1] += character #time.sleep(0.05) yield history response = msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False).then( bot, chatbot, chatbot ) response.then(lambda: gr.update(interactive=True), None, [msg], queue=False) demo.queue() demo.launch()