Spaces:

tdecae
/

chatbot

Running

App Files Files Community

tdecae commited on May 23

Commit

a895164

•

1 Parent(s): e4d6780

Update app.py

Browse files

Files changed (1) hide show

app.py +95 -15

app.py CHANGED Viewed

@@ -1,13 +1,93 @@
 import os
 import sys
-import openai
-from langchain.chains import ConversationalRetrievalChain, RetrievalQA
-from langchain.chat_models import ChatOpenAI
 from langchain.document_loaders import DirectoryLoader, TextLoader
-from langchain.embeddings import OpenAIEmbeddings
 from langchain.indexes import VectorstoreIndexCreator
 from langchain.indexes.vectorstore import VectorStoreIndexWrapper
-from langchain.llms import OpenAI
 from langchain.text_splitter import CharacterTextSplitter
 __import__('pysqlite3')
@@ -16,8 +96,7 @@ sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
 from langchain.vectorstores import Chroma
 import gradio as gr
-os.environ["OPENAI_API_KEY"] = os.getenv("OPENAPIKEY")
 docs = []
@@ -39,11 +118,14 @@ splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
 docs = splitter.split_documents(docs)
 # Convert the document chunks to embedding and save them to the vector store
-vectorstore = Chroma.from_documents(docs, embedding=OpenAIEmbeddings(), persist_directory="./data")
 vectorstore.persist()
 chain = ConversationalRetrievalChain.from_llm(
-    ChatOpenAI(temperature=0.7, model_name='gpt-3.5-turbo'),
     retriever=vectorstore.as_retriever(search_kwargs={'k': 6}),
     return_source_documents=True,
     verbose=False
@@ -52,15 +134,12 @@ chain = ConversationalRetrievalChain.from_llm(
 chat_history = []
 with gr.Blocks() as demo:
-    chatbot = gr.Chatbot([("", "Hello, I'm Thierry Decae's chatbot, you can ask me any recruitment related questions such as my previous or most recent experience, where I'm eligible to work, when I can start work, what NLP skills I have, and much more! you can chat with me directly in multiple languages")],avatar_images=["./multiple_docs/Guest.jpg","./multiple_docs/Thierry Picture.jpg"])
     msg = gr.Textbox()
     clear = gr.Button("Clear")
     chat_history = []
     def user(query, chat_history):
-        # print("User query:", query)
-        # print("Chat history:", chat_history)
         # Convert chat history to list of tuples
         chat_history_tuples = []
         for message in chat_history:
@@ -71,11 +150,12 @@ with gr.Blocks() as demo:
         # Append user message and response to chat history
         chat_history.append((query, result["answer"]))
-        # print("Updated chat history:", chat_history)
         return gr.update(value=""), chat_history
     msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False)
     clear.click(lambda: None, None, chatbot, queue=False)
-demo.launch(debug=True)

+# import os
+# import sys
+# import openai
+# from langchain.chains import ConversationalRetrievalChain, RetrievalQA
+# from langchain.chat_models import ChatOpenAI
+# from langchain.document_loaders import DirectoryLoader, TextLoader
+# from langchain.embeddings import OpenAIEmbeddings
+# from langchain.indexes import VectorstoreIndexCreator
+# from langchain.indexes.vectorstore import VectorStoreIndexWrapper
+# from langchain.llms import OpenAI
+# from langchain.text_splitter import CharacterTextSplitter
+# __import__('pysqlite3')
+# import sys
+# sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
+# from langchain.vectorstores import Chroma
+# import gradio as gr
+# os.environ["OPENAI_API_KEY"] = os.getenv("OPENAPIKEY")
+# docs = []
+# for f in os.listdir("multiple_docs"):
+#     if f.endswith(".pdf"):
+#         pdf_path = "./multiple_docs/" + f
+#         loader = PyPDFLoader(pdf_path)
+#         docs.extend(loader.load())
+#     elif f.endswith('.docx') or f.endswith('.doc'):
+#         doc_path = "./multiple_docs/" + f
+#         loader = Docx2txtLoader(doc_path)
+#         docs.extend(loader.load())
+#     elif f.endswith('.txt'):
+#         text_path = "./multiple_docs/" + f
+#         loader = TextLoader(text_path)
+#         docs.extend(loader.load())
+# splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
+# docs = splitter.split_documents(docs)
+# # Convert the document chunks to embedding and save them to the vector store
+# vectorstore = Chroma.from_documents(docs, embedding=OpenAIEmbeddings(), persist_directory="./data")
+# vectorstore.persist()
+# chain = ConversationalRetrievalChain.from_llm(
+#     ChatOpenAI(temperature=0.7, model_name='gpt-3.5-turbo'),
+#     retriever=vectorstore.as_retriever(search_kwargs={'k': 6}),
+#     return_source_documents=True,
+#     verbose=False
+# )
+# chat_history = []
+# with gr.Blocks() as demo:
+#     chatbot = gr.Chatbot([("", "Hello, I'm Thierry Decae's chatbot, you can ask me any recruitment related questions such as my previous or most recent experience, where I'm eligible to work, when I can start work, what NLP skills I have, and much more! you can chat with me directly in multiple languages")],avatar_images=["./multiple_docs/Guest.jpg","./multiple_docs/Thierry Picture.jpg"])
+#     msg = gr.Textbox()
+#     clear = gr.Button("Clear")
+#     chat_history = []
+#     def user(query, chat_history):
+#         # print("User query:", query)
+#         # print("Chat history:", chat_history)
+#         # Convert chat history to list of tuples
+#         chat_history_tuples = []
+#         for message in chat_history:
+#             chat_history_tuples.append((message[0], message[1]))
+#         # Get result from QA chain
+#         result = chain({"question": query, "chat_history": chat_history_tuples})
+#         # Append user message and response to chat history
+#         chat_history.append((query, result["answer"]))
+#         # print("Updated chat history:", chat_history)
+#         return gr.update(value=""), chat_history
+#     msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False)
+#     clear.click(lambda: None, None, chatbot, queue=False)
+# demo.launch(debug=True)
 import os
 import sys
+from langchain.chains import ConversationalRetrievalChain
 from langchain.document_loaders import DirectoryLoader, TextLoader
+from langchain.embeddings import HuggingFaceEmbeddings
 from langchain.indexes import VectorstoreIndexCreator
 from langchain.indexes.vectorstore import VectorStoreIndexWrapper
+from langchain.llms import HuggingFaceLLM
 from langchain.text_splitter import CharacterTextSplitter
 __import__('pysqlite3')
 from langchain.vectorstores import Chroma
 import gradio as gr
+from transformers import pipeline
 docs = []
 docs = splitter.split_documents(docs)
 # Convert the document chunks to embedding and save them to the vector store
+vectorstore = Chroma.from_documents(docs, embedding=HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2"), persist_directory="./data")
 vectorstore.persist()
+# Load the Hugging Face model
+llm = HuggingFaceLLM(pipeline("text-generation", model="EleutherAI/gpt-neo-2.7B"))
 chain = ConversationalRetrievalChain.from_llm(
+    llm,
     retriever=vectorstore.as_retriever(search_kwargs={'k': 6}),
     return_source_documents=True,
     verbose=False
 chat_history = []
 with gr.Blocks() as demo:
+    chatbot = gr.Chatbot([("", "Hello, I'm Thierry Decae's chatbot, you can ask me any recruitment related questions such as my previous or most recent experience, where I'm eligible to work, when I can start work, what NLP skills I have, and much more! you can chat with me directly in multiple languages")], avatar_images=["./multiple_docs/Guest.jpg","./multiple_docs/Thierry Picture.jpg"])
     msg = gr.Textbox()
     clear = gr.Button("Clear")
     chat_history = []
     def user(query, chat_history):
         # Convert chat history to list of tuples
         chat_history_tuples = []
         for message in chat_history:
         # Append user message and response to chat history
         chat_history.append((query, result["answer"]))
         return gr.update(value=""), chat_history
     msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False)
     clear.click(lambda: None, None, chatbot, queue=False)
+demo.launch(debug=True)