tdecae commited on
Commit
ebf2c1b
1 Parent(s): a2e71cc

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +111 -111
app.py CHANGED
@@ -1,98 +1,24 @@
1
- # import os
2
- # import sys
3
- # import openai
4
- # from langchain.chains import ConversationalRetrievalChain, RetrievalQA
5
- # from langchain.chat_models import ChatOpenAI
6
- # from langchain.document_loaders import DirectoryLoader, TextLoader
7
- # from langchain.embeddings import OpenAIEmbeddings
8
- # from langchain.indexes import VectorstoreIndexCreator
9
- # from langchain.indexes.vectorstore import VectorStoreIndexWrapper
10
- # from langchain.llms import OpenAI
11
- # from langchain.text_splitter import CharacterTextSplitter
12
-
13
- # __import__('pysqlite3')
14
- # import sys
15
- # sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
16
-
17
- # from langchain.vectorstores import Chroma
18
- # import gradio as gr
19
-
20
- # os.environ["OPENAI_API_KEY"] = os.getenv("OPENAPIKEY")
21
-
22
- # docs = []
23
-
24
- # for f in os.listdir("multiple_docs"):
25
- # if f.endswith(".pdf"):
26
- # pdf_path = "./multiple_docs/" + f
27
- # loader = PyPDFLoader(pdf_path)
28
- # docs.extend(loader.load())
29
- # elif f.endswith('.docx') or f.endswith('.doc'):
30
- # doc_path = "./multiple_docs/" + f
31
- # loader = Docx2txtLoader(doc_path)
32
- # docs.extend(loader.load())
33
- # elif f.endswith('.txt'):
34
- # text_path = "./multiple_docs/" + f
35
- # loader = TextLoader(text_path)
36
- # docs.extend(loader.load())
37
-
38
- # splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
39
- # docs = splitter.split_documents(docs)
40
-
41
- # # Convert the document chunks to embedding and save them to the vector store
42
- # vectorstore = Chroma.from_documents(docs, embedding=OpenAIEmbeddings(), persist_directory="./data")
43
- # vectorstore.persist()
44
-
45
- # chain = ConversationalRetrievalChain.from_llm(
46
- # ChatOpenAI(temperature=0.7, model_name='gpt-3.5-turbo'),
47
- # retriever=vectorstore.as_retriever(search_kwargs={'k': 6}),
48
- # return_source_documents=True,
49
- # verbose=False
50
- # )
51
-
52
- # chat_history = []
53
-
54
- # with gr.Blocks() as demo:
55
- # chatbot = gr.Chatbot([("", "Hello, I'm Thierry Decae's chatbot, you can ask me any recruitment related questions such as my previous or most recent experience, where I'm eligible to work, when I can start work, what NLP skills I have, and much more! you can chat with me directly in multiple languages")],avatar_images=["./multiple_docs/Guest.jpg","./multiple_docs/Thierry Picture.jpg"])
56
- # msg = gr.Textbox()
57
- # clear = gr.Button("Clear")
58
- # chat_history = []
59
-
60
- # def user(query, chat_history):
61
- # # print("User query:", query)
62
- # # print("Chat history:", chat_history)
63
-
64
- # # Convert chat history to list of tuples
65
- # chat_history_tuples = []
66
- # for message in chat_history:
67
- # chat_history_tuples.append((message[0], message[1]))
68
-
69
- # # Get result from QA chain
70
- # result = chain({"question": query, "chat_history": chat_history_tuples})
71
-
72
- # # Append user message and response to chat history
73
- # chat_history.append((query, result["answer"]))
74
- # # print("Updated chat history:", chat_history)
75
-
76
- # return gr.update(value=""), chat_history
77
-
78
- # msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False)
79
- # clear.click(lambda: None, None, chatbot, queue=False)
80
-
81
- # demo.launch(debug=True)
82
-
83
  import os
84
  import sys
85
- from langchain.chains import ConversationalRetrievalChain
86
- from langchain.document_loaders import PyPDFLoader, Docx2txtLoader, TextLoader
 
 
 
 
 
 
87
  from langchain.text_splitter import CharacterTextSplitter
88
- from langchain.vectorstores import Chroma
89
- import gradio as gr
90
- from transformers import pipeline
91
- from sentence_transformers import SentenceTransformer
92
 
93
  __import__('pysqlite3')
 
94
  sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
95
 
 
 
 
 
 
96
  docs = []
97
 
98
  for f in os.listdir("multiple_docs"):
@@ -112,31 +38,12 @@ for f in os.listdir("multiple_docs"):
112
  splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
113
  docs = splitter.split_documents(docs)
114
 
115
- # Extract the content from documents and create embeddings
116
- embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
117
- texts = [doc.page_content for doc in docs]
118
- embeddings = embedding_model.encode(texts).tolist() # Convert numpy arrays to lists
119
-
120
- # Create a Chroma vector store and add documents and their embeddings
121
- vectorstore = Chroma(persist_directory="./db", embedding_function=embedding_model.encode)
122
- vectorstore.add_texts(texts=texts, metadatas=[{"id": i} for i in range(len(texts))], embeddings=embeddings)
123
  vectorstore.persist()
124
 
125
- # Load the Hugging Face model for text generation
126
- generator = pipeline("text-generation", model="EleutherAI/gpt-neo-2.7B")
127
-
128
- class HuggingFaceLLMWrapper:
129
- def __init__(self, generator):
130
- self.generator = generator
131
-
132
- def __call__(self, prompt, max_length=512):
133
- result = self.generator(prompt, max_length=max_length, num_return_sequences=1)
134
- return result[0]['generated_text']
135
-
136
- llm = HuggingFaceLLMWrapper(generator)
137
-
138
  chain = ConversationalRetrievalChain.from_llm(
139
- llm,
140
  retriever=vectorstore.as_retriever(search_kwargs={'k': 6}),
141
  return_source_documents=True,
142
  verbose=False
@@ -145,12 +52,15 @@ chain = ConversationalRetrievalChain.from_llm(
145
  chat_history = []
146
 
147
  with gr.Blocks() as demo:
148
- chatbot = gr.Chatbot([("", "Hello, I'm Thierry Decae's chatbot, you can ask me any recruitment related questions such as my previous or most recent experience, where I'm eligible to work, when I can start work, what NLP skills I have, and much more! you can chat with me directly in multiple languages")], avatar_images=["./multiple_docs/Guest.jpg","./multiple_docs/Thierry Picture.jpg"])
149
  msg = gr.Textbox()
150
  clear = gr.Button("Clear")
151
  chat_history = []
152
 
153
  def user(query, chat_history):
 
 
 
154
  # Convert chat history to list of tuples
155
  chat_history_tuples = []
156
  for message in chat_history:
@@ -161,6 +71,7 @@ with gr.Blocks() as demo:
161
 
162
  # Append user message and response to chat history
163
  chat_history.append((query, result["answer"]))
 
164
 
165
  return gr.update(value=""), chat_history
166
 
@@ -169,6 +80,95 @@ with gr.Blocks() as demo:
169
 
170
  demo.launch(debug=True)
171
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
172
 
173
 
174
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import sys
3
+ import openai
4
+ from langchain.chains import ConversationalRetrievalChain, RetrievalQA
5
+ from langchain.chat_models import ChatOpenAI
6
+ from langchain.document_loaders import DirectoryLoader, TextLoader
7
+ from langchain.embeddings import OpenAIEmbeddings
8
+ from langchain.indexes import VectorstoreIndexCreator
9
+ from langchain.indexes.vectorstore import VectorStoreIndexWrapper
10
+ from langchain.llms import OpenAI
11
  from langchain.text_splitter import CharacterTextSplitter
 
 
 
 
12
 
13
  __import__('pysqlite3')
14
+ import sys
15
  sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
16
 
17
+ from langchain.vectorstores import Chroma
18
+ import gradio as gr
19
+
20
+ os.environ["OPENAI_API_KEY"] = os.getenv("OPENAPIKEY")
21
+
22
  docs = []
23
 
24
  for f in os.listdir("multiple_docs"):
 
38
  splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
39
  docs = splitter.split_documents(docs)
40
 
41
+ # Convert the document chunks to embedding and save them to the vector store
42
+ vectorstore = Chroma.from_documents(docs, embedding=OpenAIEmbeddings(), persist_directory="./data")
 
 
 
 
 
 
43
  vectorstore.persist()
44
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  chain = ConversationalRetrievalChain.from_llm(
46
+ ChatOpenAI(temperature=0.7, model_name='gpt-3.5-turbo'),
47
  retriever=vectorstore.as_retriever(search_kwargs={'k': 6}),
48
  return_source_documents=True,
49
  verbose=False
 
52
  chat_history = []
53
 
54
  with gr.Blocks() as demo:
55
+ chatbot = gr.Chatbot([("", "Hello, I'm Thierry Decae's chatbot, you can ask me any recruitment related questions such as my previous or most recent experience, where I'm eligible to work, when I can start work, what NLP skills I have, and much more! you can chat with me directly in multiple languages")],avatar_images=["./multiple_docs/Guest.jpg","./multiple_docs/Thierry Picture.jpg"])
56
  msg = gr.Textbox()
57
  clear = gr.Button("Clear")
58
  chat_history = []
59
 
60
  def user(query, chat_history):
61
+ # print("User query:", query)
62
+ # print("Chat history:", chat_history)
63
+
64
  # Convert chat history to list of tuples
65
  chat_history_tuples = []
66
  for message in chat_history:
 
71
 
72
  # Append user message and response to chat history
73
  chat_history.append((query, result["answer"]))
74
+ # print("Updated chat history:", chat_history)
75
 
76
  return gr.update(value=""), chat_history
77
 
 
80
 
81
  demo.launch(debug=True)
82
 
83
+ # import os
84
+ # import sys
85
+ # from langchain.chains import ConversationalRetrievalChain
86
+ # from langchain.document_loaders import PyPDFLoader, Docx2txtLoader, TextLoader
87
+ # from langchain.text_splitter import CharacterTextSplitter
88
+ # from langchain.vectorstores import Chroma
89
+ # import gradio as gr
90
+ # from transformers import pipeline
91
+ # from sentence_transformers import SentenceTransformer
92
+
93
+ # __import__('pysqlite3')
94
+ # sys.modules['sqlite3'] = sys.modules.pop('pysqlite3')
95
+
96
+ # docs = []
97
+
98
+ # for f in os.listdir("multiple_docs"):
99
+ # if f.endswith(".pdf"):
100
+ # pdf_path = "./multiple_docs/" + f
101
+ # loader = PyPDFLoader(pdf_path)
102
+ # docs.extend(loader.load())
103
+ # elif f.endswith('.docx') or f.endswith('.doc'):
104
+ # doc_path = "./multiple_docs/" + f
105
+ # loader = Docx2txtLoader(doc_path)
106
+ # docs.extend(loader.load())
107
+ # elif f.endswith('.txt'):
108
+ # text_path = "./multiple_docs/" + f
109
+ # loader = TextLoader(text_path)
110
+ # docs.extend(loader.load())
111
+
112
+ # splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=10)
113
+ # docs = splitter.split_documents(docs)
114
+
115
+ # # Extract the content from documents and create embeddings
116
+ # embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
117
+ # texts = [doc.page_content for doc in docs]
118
+ # embeddings = embedding_model.encode(texts).tolist() # Convert numpy arrays to lists
119
+
120
+ # # Create a Chroma vector store and add documents and their embeddings
121
+ # vectorstore = Chroma(persist_directory="./db", embedding_function=embedding_model.encode)
122
+ # vectorstore.add_texts(texts=texts, metadatas=[{"id": i} for i in range(len(texts))], embeddings=embeddings)
123
+ # vectorstore.persist()
124
+
125
+ # # Load the Hugging Face model for text generation
126
+ # generator = pipeline("text-generation", model="EleutherAI/gpt-neo-2.7B")
127
+
128
+ # class HuggingFaceLLMWrapper:
129
+ # def __init__(self, generator):
130
+ # self.generator = generator
131
+
132
+ # def __call__(self, prompt, max_length=512):
133
+ # result = self.generator(prompt, max_length=max_length, num_return_sequences=1)
134
+ # return result[0]['generated_text']
135
+
136
+ # llm = HuggingFaceLLMWrapper(generator)
137
+
138
+ # chain = ConversationalRetrievalChain.from_llm(
139
+ # llm,
140
+ # retriever=vectorstore.as_retriever(search_kwargs={'k': 6}),
141
+ # return_source_documents=True,
142
+ # verbose=False
143
+ # )
144
+
145
+ # chat_history = []
146
+
147
+ # with gr.Blocks() as demo:
148
+ # chatbot = gr.Chatbot([("", "Hello, I'm Thierry Decae's chatbot, you can ask me any recruitment related questions such as my previous or most recent experience, where I'm eligible to work, when I can start work, what NLP skills I have, and much more! you can chat with me directly in multiple languages")], avatar_images=["./multiple_docs/Guest.jpg","./multiple_docs/Thierry Picture.jpg"])
149
+ # msg = gr.Textbox()
150
+ # clear = gr.Button("Clear")
151
+ # chat_history = []
152
+
153
+ # def user(query, chat_history):
154
+ # # Convert chat history to list of tuples
155
+ # chat_history_tuples = []
156
+ # for message in chat_history:
157
+ # chat_history_tuples.append((message[0], message[1]))
158
+
159
+ # # Get result from QA chain
160
+ # result = chain({"question": query, "chat_history": chat_history_tuples})
161
+
162
+ # # Append user message and response to chat history
163
+ # chat_history.append((query, result["answer"]))
164
+
165
+ # return gr.update(value=""), chat_history
166
+
167
+ # msg.submit(user, [msg, chatbot], [msg, chatbot], queue=False)
168
+ # clear.click(lambda: None, None, chatbot, queue=False)
169
+
170
+ # demo.launch(debug=True)
171
+
172
 
173
 
174