File size: 3,021 Bytes
2ff0eb9
 
 
 
 
 
 
 
 
 
5ddd792
2ff0eb9
 
 
 
 
 
5ddd792
2ff0eb9
 
 
5ddd792
 
1bc98ee
5ddd792
1bc98ee
5ddd792
 
 
1bc98ee
5ddd792
 
 
1bc98ee
5ddd792
 
 
 
 
 
 
 
 
2ff0eb9
 
5ddd792
 
 
 
2ff0eb9
 
 
5ddd792
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1bc98ee
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import os
import sys
import openai
from langchain.chains import ConversationalRetrievalChain, RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.indexes import VectorstoreIndexCreator
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter

__import__('pysqlite3')
import sys
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')

from langchain.vectorstores import Chroma
import gradio as gr

os.environ["OPENAI_API_KEY"] = os.getenv("OPENAPIKEY")

docs = []

for f in os.listdir("input/multiple_docs"):
    if f.endswith(".pdf"):
        pdf_path = "./input/multiple_docs/" + f
        loader = PyPDFLoader(pdf_path)
        docs.extend(loader.load())
    elif f.endswith('.docx') or f.endswith('.doc'):
        doc_path = "./input/multiple_docs/" + f
        loader = Docx2txtLoader(doc_path)
        docs.extend(loader.load())
    elif f.endswith('.txt'):
        text_path = "./input/multiple_docs/" + f
        loader = TextLoader(text_path)
        docs.extend(loader.load())

splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
docs = splitter.split_documents(docs)

# Convert the document chunks to embedding and save them to the vector store
vectorstore = Chroma.from_documents(docs, embedding=OpenAIEmbeddings(), persist_directory="./data")
vectorstore.persist()

chain = ConversationalRetrievalChain.from_llm(
    ChatOpenAI(temperature=0.7, model_name='gpt-3.5-turbo'),
    retriever=vectorstore.as_retriever(search_kwargs={'k': 6}),
    return_source_documents=True,
    verbose=False
)

chat_history = []

with gr.Blocks() as demo:
    chatbot = gr.Chatbot([("", "Hello, I'm Thierry Decae's chatbot, you can ask me any recruitment relaged questions such as my previous experience, where i'm eligible to work, when I can start work, my most recent experience, what NLP skills I have, and much more!")],avatar_images=["./input/avatar/Guest.jpg","./input/avatar/Thierry Picture.jpg"])
    msg = gr.Textbox()
    clear = gr.Button("Clear")
    chat_history = []

    def user(query, chat_history):
        # print("User query:", query)
        # print("Chat history:", chat_history)

        # Convert chat history to list of tuples
        chat_history_tuples = []
        for message in chat_history:
            chat_history_tuples.append((message[0], message[1]))

        # Get result from QA chain
        result = chain({"question": query, "chat_history": chat_history_tuples})

        # Append user message and response to chat history
        chat_history.append((query, result["answer"]))
        # print("Updated chat history:", chat_history)

        return gr.update(value=""), chat_history

    msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False)
    clear.click(lambda: None, None, chatbot, queue=False)

demo.launch(debug=True)