Spaces:
Running
Running
File size: 3,021 Bytes
2ff0eb9 5ddd792 2ff0eb9 5ddd792 2ff0eb9 5ddd792 1bc98ee 5ddd792 1bc98ee 5ddd792 1bc98ee 5ddd792 1bc98ee 5ddd792 2ff0eb9 5ddd792 2ff0eb9 5ddd792 1bc98ee |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
import os
import sys
import openai
from langchain.chains import ConversationalRetrievalChain, RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.indexes import VectorstoreIndexCreator
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
__import__('pysqlite3')
import sys
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
from langchain.vectorstores import Chroma
import gradio as gr
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAPIKEY")
docs = []
for f in os.listdir("input/multiple_docs"):
if f.endswith(".pdf"):
pdf_path = "./input/multiple_docs/" + f
loader = PyPDFLoader(pdf_path)
docs.extend(loader.load())
elif f.endswith('.docx') or f.endswith('.doc'):
doc_path = "./input/multiple_docs/" + f
loader = Docx2txtLoader(doc_path)
docs.extend(loader.load())
elif f.endswith('.txt'):
text_path = "./input/multiple_docs/" + f
loader = TextLoader(text_path)
docs.extend(loader.load())
splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
docs = splitter.split_documents(docs)
# Convert the document chunks to embedding and save them to the vector store
vectorstore = Chroma.from_documents(docs, embedding=OpenAIEmbeddings(), persist_directory="./data")
vectorstore.persist()
chain = ConversationalRetrievalChain.from_llm(
ChatOpenAI(temperature=0.7, model_name='gpt-3.5-turbo'),
retriever=vectorstore.as_retriever(search_kwargs={'k': 6}),
return_source_documents=True,
verbose=False
)
chat_history = []
with gr.Blocks() as demo:
chatbot = gr.Chatbot([("", "Hello, I'm Thierry Decae's chatbot, you can ask me any recruitment relaged questions such as my previous experience, where i'm eligible to work, when I can start work, my most recent experience, what NLP skills I have, and much more!")],avatar_images=["./input/avatar/Guest.jpg","./input/avatar/Thierry Picture.jpg"])
msg = gr.Textbox()
clear = gr.Button("Clear")
chat_history = []
def user(query, chat_history):
# print("User query:", query)
# print("Chat history:", chat_history)
# Convert chat history to list of tuples
chat_history_tuples = []
for message in chat_history:
chat_history_tuples.append((message[0], message[1]))
# Get result from QA chain
result = chain({"question": query, "chat_history": chat_history_tuples})
# Append user message and response to chat history
chat_history.append((query, result["answer"]))
# print("Updated chat history:", chat_history)
return gr.update(value=""), chat_history
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False)
clear.click(lambda: None, None, chatbot, queue=False)
demo.launch(debug=True) |