import os import sys import openai from langchain.chains import ConversationalRetrievalChain, RetrievalQA from langchain.chat_models import ChatOpenAI from langchain.document_loaders import DirectoryLoader, TextLoader from langchain.embeddings import OpenAIEmbeddings from langchain.indexes import VectorstoreIndexCreator from langchain.indexes.vectorstore import VectorStoreIndexWrapper from langchain.llms import OpenAI from langchain.text_splitter import CharacterTextSplitter __import__('pysqlite3') import sys sys.modules['sqlite3'] = sys.modules.pop('pysqlite3') from langchain.vectorstores import Chroma import gradio as gr os.environ["OPENAI_API_KEY"] = os.getenv("OPENAPIKEY") docs = [] for f in os.listdir("input/multiple_docs"): if f.endswith(".pdf"): pdf_path = "./input/multiple_docs/" + f loader = PyPDFLoader(pdf_path) docs.extend(loader.load()) elif f.endswith('.docx') or f.endswith('.doc'): doc_path = "./input/multiple_docs/" + f loader = Docx2txtLoader(doc_path) docs.extend(loader.load()) elif f.endswith('.txt'): text_path = "./input/multiple_docs/" + f loader = TextLoader(text_path) docs.extend(loader.load()) splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10) docs = splitter.split_documents(docs) # Convert the document chunks to embedding and save them to the vector store vectorstore = Chroma.from_documents(docs, embedding=OpenAIEmbeddings(), persist_directory="./data") vectorstore.persist() chain = ConversationalRetrievalChain.from_llm( ChatOpenAI(temperature=0.7, model_name='gpt-3.5-turbo'), retriever=vectorstore.as_retriever(search_kwargs={'k': 6}), return_source_documents=True, verbose=False ) chat_history = [] with gr.Blocks() as demo: chatbot = gr.Chatbot([("", "Hello, I'm Thierry Decae's chatbot, you can ask me any recruitment relaged questions such as my previous experience, where i'm eligible to work, when I can start work, my most recent experience, what NLP skills I have, and much more!")],avatar_images=["./input/avatar/Guest.jpg","./input/avatar/Thierry Picture.jpg"]) msg = gr.Textbox() clear = gr.Button("Clear") chat_history = [] def user(query, chat_history): # print("User query:", query) # print("Chat history:", chat_history) # Convert chat history to list of tuples chat_history_tuples = [] for message in chat_history: chat_history_tuples.append((message[0], message[1])) # Get result from QA chain result = chain({"question": query, "chat_history": chat_history_tuples}) # Append user message and response to chat history chat_history.append((query, result["answer"])) # print("Updated chat history:", chat_history) return gr.update(value=""), chat_history msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False) clear.click(lambda: None, None, chatbot, queue=False) demo.launch(debug=True)