captain-awesome commited on
Commit
25f639b
1 Parent(s): 0201ce9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +313 -257
app.py CHANGED
@@ -1,67 +1,77 @@
1
- from langchain.chains import ConversationalRetrievalChain
2
- from langchain.chains.question_answering import load_qa_chain
3
- from langchain.chains import RetrievalQA
4
- from langchain.memory import ConversationBufferMemory
5
- from langchain.memory import ConversationTokenBufferMemory
6
- from langchain.llms import HuggingFacePipeline
7
- # from langchain import PromptTemplate
8
- from langchain.prompts import PromptTemplate
9
- from langchain.embeddings import HuggingFaceEmbeddings
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
10
  from langchain.text_splitter import RecursiveCharacterTextSplitter
11
- from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
12
  from langchain.vectorstores import Chroma
13
- from chromadb.utils import embedding_functions
14
- from langchain.embeddings import SentenceTransformerEmbeddings
15
  from langchain.embeddings import HuggingFaceBgeEmbeddings
16
- from langchain.document_loaders import (
17
- CSVLoader,
18
- DirectoryLoader,
19
- GitLoader,
20
- NotebookLoader,
21
- OnlinePDFLoader,
22
- PythonLoader,
23
- TextLoader,
24
- UnstructuredFileLoader,
25
- UnstructuredHTMLLoader,
26
- UnstructuredPDFLoader,
27
- UnstructuredWordDocumentLoader,
28
- WebBaseLoader,
29
- PyPDFLoader,
30
- UnstructuredMarkdownLoader,
31
- UnstructuredEPubLoader,
32
- UnstructuredHTMLLoader,
33
- UnstructuredPowerPointLoader,
34
- UnstructuredODTLoader,
35
- NotebookLoader,
36
- UnstructuredFileLoader
37
- )
38
- from transformers import (
39
- AutoModelForCausalLM,
40
- AutoTokenizer,
41
- StoppingCriteria,
42
- StoppingCriteriaList,
43
- pipeline,
44
- GenerationConfig,
45
- TextStreamer,
46
- pipeline
47
- )
48
- from langchain.llms import HuggingFaceHub
49
- import torch
50
- from transformers import BitsAndBytesConfig
51
- import os
52
- from langchain.llms import CTransformers
53
- import streamlit as st
54
- from langchain.document_loaders.base import BaseLoader
55
- from langchain.schema import Document
56
- import gradio as gr
57
- import tempfile
58
- import timeit
59
- import textwrap
60
- from chromadb.utils import embedding_functions
61
- from tqdm import tqdm
62
- tqdm(disable=True, total=0) # initialise internal lock
63
-
64
- tqdm.write("test")
65
 
66
  FILE_LOADER_MAPPING = {
67
  "csv": (CSVLoader, {"encoding": "utf-8"}),
@@ -80,206 +90,209 @@ FILE_LOADER_MAPPING = {
80
  # Add more mappings for other file extensions and loaders as needed
81
  }
82
 
83
- def load_model():
84
- config = {'max_new_tokens': 1024,
85
- 'repetition_penalty': 1.1,
86
- 'temperature': 0.1,
87
- 'top_k': 50,
88
- 'top_p': 0.9,
89
- 'stream': True,
90
- 'threads': int(os.cpu_count() / 2)
91
- }
 
92
 
93
- llm = CTransformers(
94
- model = "TheBloke/zephyr-7B-beta-GGUF",
95
- model_file = "zephyr-7b-beta.Q4_0.gguf",
96
- callbacks=[StreamingStdOutCallbackHandler()],
97
- lib="avx2", #for CPU use
98
- **config
99
- # model_type=model_type,
100
- # max_new_tokens=max_new_tokens, # type: ignore
101
- # temperature=temperature, # type: ignore
102
- )
103
- return llm
104
-
105
- def create_vector_database(loaded_documents):
106
- # DB_DIR: str = os.path.join(ABS_PATH, "db")
107
- """
108
- Creates a vector database using document loaders and embeddings.
109
- This function loads data from PDF, markdown and text files in the 'data/' directory,
110
- splits the loaded documents into chunks, transforms them into embeddings using HuggingFace,
111
- and finally persists the embeddings into a Chroma vector database.
112
- """
113
- # Split loaded documents into chunks
114
- text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=30, length_function = len)
115
- chunked_documents = text_splitter.split_documents(loaded_documents)
116
-
117
- # embeddings = HuggingFaceEmbeddings(
118
- # model_name="sentence-transformers/all-MiniLM-L6-v2"
119
- # # model_name = "sentence-transformers/all-mpnet-base-v2"
120
- # )
121
- embeddings = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")
122
-
123
- # embeddings = HuggingFaceBgeEmbeddings(
124
- # model_name = "BAAI/bge-large-en"
125
- # )
126
 
127
- # model_name = "BAAI/bge-large-en"
128
- # model_kwargs = {'device': 'cpu'}
129
- # encode_kwargs = {'normalize_embeddings': False}
130
- # embeddings = HuggingFaceBgeEmbeddings(
131
- # model_name=model_name,
132
- # model_kwargs=model_kwargs,
133
- # encode_kwargs=encode_kwargs
134
- # )
135
 
136
- persist_directory = 'db'
137
- # Create and persist a Chroma vector database from the chunked documents
138
- db = Chroma.from_documents(
139
- documents=chunked_documents,
140
- embedding=embeddings,
141
- persist_directory=persist_directory
142
- # persist_directory=DB_DIR,
143
- )
144
- db.persist()
145
- # db = Chroma(persist_directory=persist_directory,
146
- # embedding_function=embedding)
147
- return db
148
 
149
 
150
- def set_custom_prompt():
151
- """
152
- Prompt template for retrieval for each vectorstore
153
- """
154
- prompt_template = """Use the following pieces of information to answer the user's question.
155
- If you don't know the answer, just say that you don't know, don't try to make up an answer.
156
- Context: {context}
157
- Question: {question}
158
- Only return the helpful answer below and nothing else.
159
- Helpful answer:
160
- """
161
-
162
- prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
163
- return prompt
164
 
165
- def create_chain(llm, prompt, db):
166
- """
167
- Creates a Retrieval Question-Answering (QA) chain using a given language model, prompt, and database.
168
- This function initializes a ConversationalRetrievalChain object with a specific chain type and configurations,
169
- and returns this chain. The retriever is set up to return the top 3 results (k=3).
170
- Args:
171
- llm (any): The language model to be used in the RetrievalQA.
172
- prompt (str): The prompt to be used in the chain type.
173
- db (any): The database to be used as the
174
- retriever.
175
- Returns:
176
- ConversationalRetrievalChain: The initialized conversational chain.
177
- """
178
- memory = ConversationTokenBufferMemory(llm=llm, memory_key="chat_history", return_messages=True, input_key='question', output_key='answer')
179
- # chain = ConversationalRetrievalChain.from_llm(
180
- # llm=llm,
181
- # chain_type="stuff",
182
- # retriever=db.as_retriever(search_kwargs={"k": 3}),
183
- # return_source_documents=True,
184
- # max_tokens_limit=256,
185
- # combine_docs_chain_kwargs={"prompt": prompt},
186
- # condense_question_prompt=CONDENSE_QUESTION_PROMPT,
187
- # memory=memory,
188
- # )
189
- # chain = RetrievalQA.from_chain_type(llm=llm,
190
- # chain_type='stuff',
191
- # retriever=db.as_retriever(search_kwargs={'k': 3}),
192
- # return_source_documents=True,
193
- # chain_type_kwargs={'prompt': prompt}
194
- # )
195
- chain = RetrievalQA.from_chain_type(llm=llm,
196
- chain_type='stuff',
197
- retriever=db.as_retriever(search_kwargs={'k': 3}),
198
- return_source_documents=True
199
- )
200
- return chain
201
-
202
- def create_retrieval_qa_bot(loaded_documents):
203
- # if not os.path.exists(persist_dir):
204
- # raise FileNotFoundError(f"No directory found at {persist_dir}")
205
-
206
- try:
207
- llm = load_model() # Assuming this function exists and works as expected
208
- except Exception as e:
209
- raise Exception(f"Failed to load model: {str(e)}")
210
-
211
- try:
212
- prompt = set_custom_prompt() # Assuming this function exists and works as expected
213
- except Exception as e:
214
- raise Exception(f"Failed to get prompt: {str(e)}")
215
-
216
- # try:
217
- # CONDENSE_QUESTION_PROMPT = set_custom_prompt_condense() # Assuming this function exists and works as expected
218
- # except Exception as e:
219
- # raise Exception(f"Failed to get condense prompt: {str(e)}")
220
-
221
- try:
222
- db = create_vector_database(loaded_documents) # Assuming this function exists and works as expected
223
- except Exception as e:
224
- raise Exception(f"Failed to get database: {str(e)}")
225
-
226
- try:
227
- # qa = create_chain(
228
- # llm=llm, prompt=prompt,CONDENSE_QUESTION_PROMPT=CONDENSE_QUESTION_PROMPT, db=db
229
- # ) # Assuming this function exists and works as expected
230
- qa = create_chain(
231
- llm=llm, prompt=prompt, db=db
232
- ) # Assuming this function exists and works as expected
233
- except Exception as e:
234
- raise Exception(f"Failed to create retrieval QA chain: {str(e)}")
235
-
236
- return qa
237
-
238
- def wrap_text_preserve_newlines(text, width=110):
239
- # Split the input text into lines based on newline characters
240
- lines = text.split('\n')
241
-
242
- # Wrap each line individually
243
- wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
244
-
245
- # Join the wrapped lines back together using newline characters
246
- wrapped_text = '\n'.join(wrapped_lines)
247
-
248
- return wrapped_text
249
-
250
- def retrieve_bot_answer(query, loaded_documents):
251
- """
252
- Retrieves the answer to a given query using a QA bot.
253
- This function creates an instance of a QA bot, passes the query to it,
254
- and returns the bot's response.
255
- Args:
256
- query (str): The question to be answered by the QA bot.
257
- Returns:
258
- dict: The QA bot's response, typically a dictionary with response details.
259
- """
260
- qa_bot_instance = create_retrieval_qa_bot(loaded_documents)
261
- # bot_response = qa_bot_instance({"question": query})
262
- bot_response = qa_bot_instance({"query": query})
263
- # Check if the 'answer' key exists in the bot_response dictionary
264
- # if 'answer' in bot_response:
265
- # # answer = bot_response['answer']
266
- # return bot_response
267
- # else:
268
- # raise KeyError("Expected 'answer' key in bot_response, but it was not found.")
269
- # result = bot_response['answer']
270
 
271
- # result = bot_response['result']
272
- # sources = []
273
- # for source in bot_response["source_documents"]:
274
- # sources.append(source.metadata['source'])
275
- # return result, sources
276
 
277
- result = wrap_text_preserve_newlines(bot_response['result'])
278
- for source in bot_response["source_documents"]:
279
- sources.append(source.metadata['source'])
280
- return result, sources
281
 
282
  def main():
 
 
283
 
284
  st.title("Docuverse")
285
 
@@ -321,19 +334,62 @@ def main():
321
  # Load model, set prompts, create vector database, and retrieve answer
322
  try:
323
  start = timeit.default_timer()
324
- llm = load_model()
325
- prompt = set_custom_prompt()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
326
  # CONDENSE_QUESTION_PROMPT = set_custom_prompt_condense()
327
- db = create_vector_database(loaded_documents)
328
- # st.write(f"db: {db}")
329
- result, sources = retrieve_bot_answer(query,loaded_documents)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
330
  end = timeit.default_timer()
331
  st.write("Elapsed time:")
332
  st.write(end - start)
333
  # st.write(f"response: {response}")
334
  # Display bot response
335
  st.write("Bot Response:")
336
- st.write(result)
 
337
  st.write(sources)
338
  except Exception as e:
339
  st.error(f"An error occurred: {str(e)}")
 
1
+ # from langchain.chains import ConversationalRetrievalChain
2
+ # from langchain.chains.question_answering import load_qa_chain
3
+ # from langchain.chains import RetrievalQA
4
+ # from langchain.memory import ConversationBufferMemory
5
+ # from langchain.memory import ConversationTokenBufferMemory
6
+ # from langchain.llms import HuggingFacePipeline
7
+ # # from langchain import PromptTemplate
8
+ # from langchain.prompts import PromptTemplate
9
+ # from langchain.embeddings import HuggingFaceEmbeddings
10
+ # from langchain.text_splitter import RecursiveCharacterTextSplitter
11
+ # from langchain.callbacks.streaming_stdout import StreamingStdOutCallbackHandler
12
+ # from langchain.vectorstores import Chroma
13
+ # from chromadb.utils import embedding_functions
14
+ # from langchain.embeddings import SentenceTransformerEmbeddings
15
+ # from langchain.embeddings import HuggingFaceBgeEmbeddings
16
+ # from langchain.document_loaders import (
17
+ # CSVLoader,
18
+ # DirectoryLoader,
19
+ # GitLoader,
20
+ # NotebookLoader,
21
+ # OnlinePDFLoader,
22
+ # PythonLoader,
23
+ # TextLoader,
24
+ # UnstructuredFileLoader,
25
+ # UnstructuredHTMLLoader,
26
+ # UnstructuredPDFLoader,
27
+ # UnstructuredWordDocumentLoader,
28
+ # WebBaseLoader,
29
+ # PyPDFLoader,
30
+ # UnstructuredMarkdownLoader,
31
+ # UnstructuredEPubLoader,
32
+ # UnstructuredHTMLLoader,
33
+ # UnstructuredPowerPointLoader,
34
+ # UnstructuredODTLoader,
35
+ # NotebookLoader,
36
+ # UnstructuredFileLoader
37
+ # )
38
+ # from transformers import (
39
+ # AutoModelForCausalLM,
40
+ # AutoTokenizer,
41
+ # StoppingCriteria,
42
+ # StoppingCriteriaList,
43
+ # pipeline,
44
+ # GenerationConfig,
45
+ # TextStreamer,
46
+ # pipeline
47
+ # )
48
+ # from langchain.llms import HuggingFaceHub
49
+ # import torch
50
+ # from transformers import BitsAndBytesConfig
51
+ # import os
52
+ # from langchain.llms import CTransformers
53
+ # import streamlit as st
54
+ # from langchain.document_loaders.base import BaseLoader
55
+ # from langchain.schema import Document
56
+ # import gradio as gr
57
+ # import tempfile
58
+ # import timeit
59
+ # import textwrap
60
+ # from chromadb.utils import embedding_functions
61
+ # from tqdm import tqdm
62
+ # tqdm(disable=True, total=0) # initialise internal lock
63
+
64
+ # tqdm.write("test")
65
+
66
+ from langchain import PromptTemplate, LLMChain
67
+ from langchain.llms import CTransformers
68
+ import os
69
  from langchain.text_splitter import RecursiveCharacterTextSplitter
 
70
  from langchain.vectorstores import Chroma
71
+ from langchain.chains import RetrievalQA
 
72
  from langchain.embeddings import HuggingFaceBgeEmbeddings
73
+ from io import BytesIO
74
+ from langchain.document_loaders import PyPDFLoader
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
75
 
76
  FILE_LOADER_MAPPING = {
77
  "csv": (CSVLoader, {"encoding": "utf-8"}),
 
90
  # Add more mappings for other file extensions and loaders as needed
91
  }
92
 
93
+
94
+ # def load_model():
95
+ # config = {'max_new_tokens': 1024,
96
+ # 'repetition_penalty': 1.1,
97
+ # 'temperature': 0.1,
98
+ # 'top_k': 50,
99
+ # 'top_p': 0.9,
100
+ # 'stream': True,
101
+ # 'threads': int(os.cpu_count() / 2)
102
+ # }
103
 
104
+ # llm = CTransformers(
105
+ # model = "TheBloke/zephyr-7B-beta-GGUF",
106
+ # model_file = "zephyr-7b-beta.Q4_0.gguf",
107
+ # callbacks=[StreamingStdOutCallbackHandler()],
108
+ # lib="avx2", #for CPU use
109
+ # **config
110
+ # # model_type=model_type,
111
+ # # max_new_tokens=max_new_tokens, # type: ignore
112
+ # # temperature=temperature, # type: ignore
113
+ # )
114
+ # return llm
115
+
116
+ # def create_vector_database(loaded_documents):
117
+ # # DB_DIR: str = os.path.join(ABS_PATH, "db")
118
+ # """
119
+ # Creates a vector database using document loaders and embeddings.
120
+ # This function loads data from PDF, markdown and text files in the 'data/' directory,
121
+ # splits the loaded documents into chunks, transforms them into embeddings using HuggingFace,
122
+ # and finally persists the embeddings into a Chroma vector database.
123
+ # """
124
+ # # Split loaded documents into chunks
125
+ # text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=30, length_function = len)
126
+ # chunked_documents = text_splitter.split_documents(loaded_documents)
127
+
128
+ # # embeddings = HuggingFaceEmbeddings(
129
+ # # model_name="sentence-transformers/all-MiniLM-L6-v2"
130
+ # # # model_name = "sentence-transformers/all-mpnet-base-v2"
131
+ # # )
132
+ # embeddings = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="all-MiniLM-L6-v2")
133
+
134
+ # # embeddings = HuggingFaceBgeEmbeddings(
135
+ # # model_name = "BAAI/bge-large-en"
136
+ # # )
137
 
138
+ # # model_name = "BAAI/bge-large-en"
139
+ # # model_kwargs = {'device': 'cpu'}
140
+ # # encode_kwargs = {'normalize_embeddings': False}
141
+ # # embeddings = HuggingFaceBgeEmbeddings(
142
+ # # model_name=model_name,
143
+ # # model_kwargs=model_kwargs,
144
+ # # encode_kwargs=encode_kwargs
145
+ # # )
146
 
147
+ # persist_directory = 'db'
148
+ # # Create and persist a Chroma vector database from the chunked documents
149
+ # db = Chroma.from_documents(
150
+ # documents=chunked_documents,
151
+ # embedding=embeddings,
152
+ # persist_directory=persist_directory
153
+ # # persist_directory=DB_DIR,
154
+ # )
155
+ # db.persist()
156
+ # # db = Chroma(persist_directory=persist_directory,
157
+ # # embedding_function=embedding)
158
+ # return db
159
 
160
 
161
+ # def set_custom_prompt():
162
+ # """
163
+ # Prompt template for retrieval for each vectorstore
164
+ # """
165
+ # prompt_template = """Use the following pieces of information to answer the user's question.
166
+ # If you don't know the answer, just say that you don't know, don't try to make up an answer.
167
+ # Context: {context}
168
+ # Question: {question}
169
+ # Only return the helpful answer below and nothing else.
170
+ # Helpful answer:
171
+ # """
172
+
173
+ # prompt = PromptTemplate(template=prompt_template, input_variables=["context", "question"])
174
+ # return prompt
175
 
176
+ # def create_chain(llm, prompt, db):
177
+ # """
178
+ # Creates a Retrieval Question-Answering (QA) chain using a given language model, prompt, and database.
179
+ # This function initializes a ConversationalRetrievalChain object with a specific chain type and configurations,
180
+ # and returns this chain. The retriever is set up to return the top 3 results (k=3).
181
+ # Args:
182
+ # llm (any): The language model to be used in the RetrievalQA.
183
+ # prompt (str): The prompt to be used in the chain type.
184
+ # db (any): The database to be used as the
185
+ # retriever.
186
+ # Returns:
187
+ # ConversationalRetrievalChain: The initialized conversational chain.
188
+ # """
189
+ # memory = ConversationTokenBufferMemory(llm=llm, memory_key="chat_history", return_messages=True, input_key='question', output_key='answer')
190
+ # # chain = ConversationalRetrievalChain.from_llm(
191
+ # # llm=llm,
192
+ # # chain_type="stuff",
193
+ # # retriever=db.as_retriever(search_kwargs={"k": 3}),
194
+ # # return_source_documents=True,
195
+ # # max_tokens_limit=256,
196
+ # # combine_docs_chain_kwargs={"prompt": prompt},
197
+ # # condense_question_prompt=CONDENSE_QUESTION_PROMPT,
198
+ # # memory=memory,
199
+ # # )
200
+ # # chain = RetrievalQA.from_chain_type(llm=llm,
201
+ # # chain_type='stuff',
202
+ # # retriever=db.as_retriever(search_kwargs={'k': 3}),
203
+ # # return_source_documents=True,
204
+ # # chain_type_kwargs={'prompt': prompt}
205
+ # # )
206
+ # chain = RetrievalQA.from_chain_type(llm=llm,
207
+ # chain_type='stuff',
208
+ # retriever=db.as_retriever(search_kwargs={'k': 3}),
209
+ # return_source_documents=True
210
+ # )
211
+ # return chain
212
+
213
+ # def create_retrieval_qa_bot(loaded_documents):
214
+ # # if not os.path.exists(persist_dir):
215
+ # # raise FileNotFoundError(f"No directory found at {persist_dir}")
216
+
217
+ # try:
218
+ # llm = load_model() # Assuming this function exists and works as expected
219
+ # except Exception as e:
220
+ # raise Exception(f"Failed to load model: {str(e)}")
221
+
222
+ # try:
223
+ # prompt = set_custom_prompt() # Assuming this function exists and works as expected
224
+ # except Exception as e:
225
+ # raise Exception(f"Failed to get prompt: {str(e)}")
226
+
227
+ # # try:
228
+ # # CONDENSE_QUESTION_PROMPT = set_custom_prompt_condense() # Assuming this function exists and works as expected
229
+ # # except Exception as e:
230
+ # # raise Exception(f"Failed to get condense prompt: {str(e)}")
231
+
232
+ # try:
233
+ # db = create_vector_database(loaded_documents) # Assuming this function exists and works as expected
234
+ # except Exception as e:
235
+ # raise Exception(f"Failed to get database: {str(e)}")
236
+
237
+ # try:
238
+ # # qa = create_chain(
239
+ # # llm=llm, prompt=prompt,CONDENSE_QUESTION_PROMPT=CONDENSE_QUESTION_PROMPT, db=db
240
+ # # ) # Assuming this function exists and works as expected
241
+ # qa = create_chain(
242
+ # llm=llm, prompt=prompt, db=db
243
+ # ) # Assuming this function exists and works as expected
244
+ # except Exception as e:
245
+ # raise Exception(f"Failed to create retrieval QA chain: {str(e)}")
246
+
247
+ # return qa
248
+
249
+ # def wrap_text_preserve_newlines(text, width=110):
250
+ # # Split the input text into lines based on newline characters
251
+ # lines = text.split('\n')
252
+
253
+ # # Wrap each line individually
254
+ # wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
255
+
256
+ # # Join the wrapped lines back together using newline characters
257
+ # wrapped_text = '\n'.join(wrapped_lines)
258
+
259
+ # return wrapped_text
260
+
261
+ # def retrieve_bot_answer(query, loaded_documents):
262
+ # """
263
+ # Retrieves the answer to a given query using a QA bot.
264
+ # This function creates an instance of a QA bot, passes the query to it,
265
+ # and returns the bot's response.
266
+ # Args:
267
+ # query (str): The question to be answered by the QA bot.
268
+ # Returns:
269
+ # dict: The QA bot's response, typically a dictionary with response details.
270
+ # """
271
+ # qa_bot_instance = create_retrieval_qa_bot(loaded_documents)
272
+ # # bot_response = qa_bot_instance({"question": query})
273
+ # bot_response = qa_bot_instance({"query": query})
274
+ # # Check if the 'answer' key exists in the bot_response dictionary
275
+ # # if 'answer' in bot_response:
276
+ # # # answer = bot_response['answer']
277
+ # # return bot_response
278
+ # # else:
279
+ # # raise KeyError("Expected 'answer' key in bot_response, but it was not found.")
280
+ # # result = bot_response['answer']
281
 
282
+ # # result = bot_response['result']
283
+ # # sources = []
284
+ # # for source in bot_response["source_documents"]:
285
+ # # sources.append(source.metadata['source'])
286
+ # # return result, sources
287
 
288
+ # result = wrap_text_preserve_newlines(bot_response['result'])
289
+ # for source in bot_response["source_documents"]:
290
+ # sources.append(source.metadata['source'])
291
+ # return result, sources
292
 
293
  def main():
294
+
295
+
296
 
297
  st.title("Docuverse")
298
 
 
334
  # Load model, set prompts, create vector database, and retrieve answer
335
  try:
336
  start = timeit.default_timer()
337
+ config = {
338
+ 'max_new_tokens': 1024,
339
+ 'repetition_penalty': 1.1,
340
+ 'temperature': 0.1,
341
+ 'top_k': 50,
342
+ 'top_p': 0.9,
343
+ 'stream': True,
344
+ 'threads': int(os.cpu_count() / 2)
345
+ }
346
+
347
+ llm = CTransformers(
348
+ model = "TheBloke/zephyr-7B-beta-GGUF",
349
+ model_file = "zephyr-7b-beta.Q4_0.gguf",
350
+ model_type="mistral",
351
+ lib="avx2", #for CPU use
352
+ **config
353
+ )
354
+ st.write("LLM Initialized:")
355
+
356
+ model_name = "BAAI/bge-large-en"
357
+ model_kwargs = {'device': 'cpu'}
358
+ encode_kwargs = {'normalize_embeddings': False}
359
+ embeddings = HuggingFaceBgeEmbeddings(
360
+ model_name=model_name,
361
+ model_kwargs=model_kwargs,
362
+ encode_kwargs=encode_kwargs
363
+ )
364
+
365
+ # llm = load_model()
366
+ # prompt = set_custom_prompt()
367
  # CONDENSE_QUESTION_PROMPT = set_custom_prompt_condense()
368
+ # db = create_vector_database(loaded_documents)
369
+ persist_directory = 'db'
370
+ # Create and persist a Chroma vector database from the chunked documents
371
+ db = Chroma.from_documents(documents=chunked_documents,embedding=embeddings,persist_directory=persist_directory)
372
+ db.persist()
373
+
374
+ retriever = db.as_retriever(search_kwargs={"k":1})
375
+
376
+ qa = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever, return_source_documents=True, verbose=True)
377
+ bot_response = qa(query)
378
+ lines = bot_response['result'].split('\n')
379
+ wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
380
+ wrapped_text = '\n'.join(wrapped_lines)
381
+
382
+ for source in bot_response["source_documents"]:
383
+ sources = source.metadata['source']
384
+
385
  end = timeit.default_timer()
386
  st.write("Elapsed time:")
387
  st.write(end - start)
388
  # st.write(f"response: {response}")
389
  # Display bot response
390
  st.write("Bot Response:")
391
+ st.write(wrapped_text)
392
+
393
  st.write(sources)
394
  except Exception as e:
395
  st.error(f"An error occurred: {str(e)}")