Spaces:
Running
Running
# import os | |
# import sys | |
# import openai | |
# from langchain.chains import ConversationalRetrievalChain, RetrievalQA | |
# from langchain.chat_models import ChatOpenAI | |
# from langchain.document_loaders import DirectoryLoader, TextLoader | |
# from langchain.embeddings import OpenAIEmbeddings | |
# from langchain.indexes import VectorstoreIndexCreator | |
# from langchain.indexes.vectorstore import VectorStoreIndexWrapper | |
# from langchain.llms import OpenAI | |
# from langchain.text_splitter import CharacterTextSplitter | |
# __import__('pysqlite3') | |
# import sys | |
# sys.modules['sqlite3'] = sys.modules.pop('pysqlite3') | |
# from langchain.vectorstores import Chroma | |
# import gradio as gr | |
# os.environ["OPENAI_API_KEY"] = os.getenv("OPENAPIKEY") | |
# docs = [] | |
# for f in os.listdir("multiple_docs"): | |
# if f.endswith(".pdf"): | |
# pdf_path = "./multiple_docs/" + f | |
# loader = PyPDFLoader(pdf_path) | |
# docs.extend(loader.load()) | |
# elif f.endswith('.docx') or f.endswith('.doc'): | |
# doc_path = "./multiple_docs/" + f | |
# loader = Docx2txtLoader(doc_path) | |
# docs.extend(loader.load()) | |
# elif f.endswith('.txt'): | |
# text_path = "./multiple_docs/" + f | |
# loader = TextLoader(text_path) | |
# docs.extend(loader.load()) | |
# splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10) | |
# docs = splitter.split_documents(docs) | |
# # Convert the document chunks to embedding and save them to the vector store | |
# vectorstore = Chroma.from_documents(docs, embedding=OpenAIEmbeddings(), persist_directory="./data") | |
# vectorstore.persist() | |
# chain = ConversationalRetrievalChain.from_llm( | |
# ChatOpenAI(temperature=0.7, model_name='gpt-3.5-turbo'), | |
# retriever=vectorstore.as_retriever(search_kwargs={'k': 6}), | |
# return_source_documents=True, | |
# verbose=False | |
# ) | |
# chat_history = [] | |
# with gr.Blocks() as demo: | |
# chatbot = gr.Chatbot([("", "Hello, I'm Thierry Decae's chatbot, you can ask me any recruitment related questions such as my previous or most recent experience, where I'm eligible to work, when I can start work, what NLP skills I have, and much more! you can chat with me directly in multiple languages")],avatar_images=["./multiple_docs/Guest.jpg","./multiple_docs/Thierry Picture.jpg"]) | |
# msg = gr.Textbox() | |
# clear = gr.Button("Clear") | |
# chat_history = [] | |
# def user(query, chat_history): | |
# # print("User query:", query) | |
# # print("Chat history:", chat_history) | |
# # Convert chat history to list of tuples | |
# chat_history_tuples = [] | |
# for message in chat_history: | |
# chat_history_tuples.append((message[0], message[1])) | |
# # Get result from QA chain | |
# result = chain({"question": query, "chat_history": chat_history_tuples}) | |
# # Append user message and response to chat history | |
# chat_history.append((query, result["answer"])) | |
# # print("Updated chat history:", chat_history) | |
# return gr.update(value=""), chat_history | |
# msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False) | |
# clear.click(lambda: None, None, chatbot, queue=False) | |
# demo.launch(debug=True) | |
import os | |
import sys | |
from langchain.chains import ConversationalRetrievalChain | |
from langchain.document_loaders import PyPDFLoader, Docx2txtLoader, TextLoader | |
from langchain.text_splitter import CharacterTextSplitter | |
from langchain.vectorstores import Chroma | |
import gradio as gr | |
from transformers import pipeline | |
from sentence_transformers import SentenceTransformer | |
__import__('pysqlite3') | |
sys.modules['sqlite3'] = sys.modules.pop('pysqlite3') | |
docs = [] | |
for f in os.listdir("multiple_docs"): | |
if f.endswith(".pdf"): | |
pdf_path = "./multiple_docs/" + f | |
loader = PyPDFLoader(pdf_path) | |
docs.extend(loader.load()) | |
elif f.endswith('.docx') or f.endswith('.doc'): | |
doc_path = "./multiple_docs/" + f | |
loader = Docx2txtLoader(doc_path) | |
docs.extend(loader.load()) | |
elif f.endswith('.txt'): | |
text_path = "./multiple_docs/" + f | |
loader = TextLoader(text_path) | |
docs.extend(loader.load()) | |
splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10) | |
docs = splitter.split_documents(docs) | |
# Extract the content from documents and create embeddings | |
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2") | |
texts = [doc.page_content for doc in docs] | |
embeddings = embedding_model.encode(texts).tolist() # Convert numpy arrays to lists | |
# Create a Chroma vector store with an embedding function and add documents and their embeddings | |
vectorstore = Chroma(persist_directory="./db", embedding_function=embedding_model.encode) | |
vectorstore.add_texts(texts=texts, metadatas=[{"id": i} for i in range(len(texts))], embeddings=embeddings) | |
vectorstore.persist() | |
# Load the Hugging Face model for text generation | |
generator = pipeline("text-generation", model="EleutherAI/gpt-neo-2.7B") | |
class HuggingFaceLLMWrapper: | |
def __init__(self, generator): | |
self.generator = generator | |
def __call__(self, prompt, max_length=512): | |
result = self.generator(prompt, max_length=max_length, num_return_sequences=1) | |
return result[0]['generated_text'] | |
llm = HuggingFaceLLMWrapper(generator) | |
chain = ConversationalRetrievalChain.from_llm( | |
llm, | |
retriever=vectorstore.as_retriever(search_kwargs={'k': 6}), | |
return_source_documents=True, | |
verbose=False | |
) | |
chat_history = [] | |
with gr.Blocks() as demo: | |
chatbot = gr.Chatbot([("", "Hello, I'm Thierry Decae's chatbot, you can ask me any recruitment related questions such as my previous or most recent experience, where I'm eligible to work, when I can start work, what NLP skills I have, and much more! you can chat with me directly in multiple languages")], avatar_images=["./multiple_docs/Guest.jpg","./multiple_docs/Thierry Picture.jpg"]) | |
msg = gr.Textbox() | |
clear = gr.Button("Clear") | |
chat_history = [] | |
def user(query, chat_history): | |
# Convert chat history to list of tuples | |
chat_history_tuples = [] | |
for message in chat_history: | |
chat_history_tuples.append((message[0], message[1])) | |
# Get result from QA chain | |
result = chain({"question": query, "chat_history": chat_history_tuples}) | |
# Append user message and response to chat history | |
chat_history.append((query, result["answer"])) | |
return gr.update(value=""), chat_history | |
msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False) | |
clear.click(lambda: None, None, chatbot, queue=False) | |
demo.launch(debug=True) | |