chatbot / app.py
tdecae's picture
Update app.py
ebf2c1b verified
raw
history blame
No virus
6.61 kB
import os
import sys
import openai
from langchain.chains import ConversationalRetrievalChain, RetrievalQA
from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import DirectoryLoader, TextLoader
from langchain.embeddings import OpenAIEmbeddings
from langchain.indexes import VectorstoreIndexCreator
from langchain.indexes.vectorstore import VectorStoreIndexWrapper
from langchain.llms import OpenAI
from langchain.text_splitter import CharacterTextSplitter
__import__('pysqlite3')
import sys
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
from langchain.vectorstores import Chroma
import gradio as gr
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAPIKEY")
docs = []
for f in os.listdir("multiple_docs"):
if f.endswith(".pdf"):
pdf_path = "./multiple_docs/" + f
loader = PyPDFLoader(pdf_path)
docs.extend(loader.load())
elif f.endswith('.docx') or f.endswith('.doc'):
doc_path = "./multiple_docs/" + f
loader = Docx2txtLoader(doc_path)
docs.extend(loader.load())
elif f.endswith('.txt'):
text_path = "./multiple_docs/" + f
loader = TextLoader(text_path)
docs.extend(loader.load())
splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
docs = splitter.split_documents(docs)
# Convert the document chunks to embedding and save them to the vector store
vectorstore = Chroma.from_documents(docs, embedding=OpenAIEmbeddings(), persist_directory="./data")
vectorstore.persist()
chain = ConversationalRetrievalChain.from_llm(
ChatOpenAI(temperature=0.7, model_name='gpt-3.5-turbo'),
retriever=vectorstore.as_retriever(search_kwargs={'k': 6}),
return_source_documents=True,
verbose=False
)
chat_history = []
with gr.Blocks() as demo:
chatbot = gr.Chatbot([("", "Hello, I'm Thierry Decae's chatbot, you can ask me any recruitment related questions such as my previous or most recent experience, where I'm eligible to work, when I can start work, what NLP skills I have, and much more! you can chat with me directly in multiple languages")],avatar_images=["./multiple_docs/Guest.jpg","./multiple_docs/Thierry Picture.jpg"])
msg = gr.Textbox()
clear = gr.Button("Clear")
chat_history = []
def user(query, chat_history):
# print("User query:", query)
# print("Chat history:", chat_history)
# Convert chat history to list of tuples
chat_history_tuples = []
for message in chat_history:
chat_history_tuples.append((message[0], message[1]))
# Get result from QA chain
result = chain({"question": query, "chat_history": chat_history_tuples})
# Append user message and response to chat history
chat_history.append((query, result["answer"]))
# print("Updated chat history:", chat_history)
return gr.update(value=""), chat_history
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False)
clear.click(lambda: None, None, chatbot, queue=False)
demo.launch(debug=True)
# import os
# import sys
# from langchain.chains import ConversationalRetrievalChain
# from langchain.document_loaders import PyPDFLoader, Docx2txtLoader, TextLoader
# from langchain.text_splitter import CharacterTextSplitter
# from langchain.vectorstores import Chroma
# import gradio as gr
# from transformers import pipeline
# from sentence_transformers import SentenceTransformer
# __import__('pysqlite3')
# sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
# docs = []
# for f in os.listdir("multiple_docs"):
# if f.endswith(".pdf"):
# pdf_path = "./multiple_docs/" + f
# loader = PyPDFLoader(pdf_path)
# docs.extend(loader.load())
# elif f.endswith('.docx') or f.endswith('.doc'):
# doc_path = "./multiple_docs/" + f
# loader = Docx2txtLoader(doc_path)
# docs.extend(loader.load())
# elif f.endswith('.txt'):
# text_path = "./multiple_docs/" + f
# loader = TextLoader(text_path)
# docs.extend(loader.load())
# splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
# docs = splitter.split_documents(docs)
# # Extract the content from documents and create embeddings
# embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
# texts = [doc.page_content for doc in docs]
# embeddings = embedding_model.encode(texts).tolist() # Convert numpy arrays to lists
# # Create a Chroma vector store and add documents and their embeddings
# vectorstore = Chroma(persist_directory="./db", embedding_function=embedding_model.encode)
# vectorstore.add_texts(texts=texts, metadatas=[{"id": i} for i in range(len(texts))], embeddings=embeddings)
# vectorstore.persist()
# # Load the Hugging Face model for text generation
# generator = pipeline("text-generation", model="EleutherAI/gpt-neo-2.7B")
# class HuggingFaceLLMWrapper:
# def __init__(self, generator):
# self.generator = generator
# def __call__(self, prompt, max_length=512):
# result = self.generator(prompt, max_length=max_length, num_return_sequences=1)
# return result[0]['generated_text']
# llm = HuggingFaceLLMWrapper(generator)
# chain = ConversationalRetrievalChain.from_llm(
# llm,
# retriever=vectorstore.as_retriever(search_kwargs={'k': 6}),
# return_source_documents=True,
# verbose=False
# )
# chat_history = []
# with gr.Blocks() as demo:
# chatbot = gr.Chatbot([("", "Hello, I'm Thierry Decae's chatbot, you can ask me any recruitment related questions such as my previous or most recent experience, where I'm eligible to work, when I can start work, what NLP skills I have, and much more! you can chat with me directly in multiple languages")], avatar_images=["./multiple_docs/Guest.jpg","./multiple_docs/Thierry Picture.jpg"])
# msg = gr.Textbox()
# clear = gr.Button("Clear")
# chat_history = []
# def user(query, chat_history):
# # Convert chat history to list of tuples
# chat_history_tuples = []
# for message in chat_history:
# chat_history_tuples.append((message[0], message[1]))
# # Get result from QA chain
# result = chain({"question": query, "chat_history": chat_history_tuples})
# # Append user message and response to chat history
# chat_history.append((query, result["answer"]))
# return gr.update(value=""), chat_history
# msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False)
# clear.click(lambda: None, None, chatbot, queue=False)
# demo.launch(debug=True)