Spaces:

sambanovasystems
/

enterprise_knowledge_retriever

Running

App Files Files Community

petrojm commited on Sep 27

Commit

a9bf317

•

1 Parent(s): 8bb5bf9

changes to app and document_retrieval

Browse files

Files changed (2) hide show

app.py +9 -5
src/document_retrieval.py +4 -4

app.py CHANGED Viewed

@@ -27,16 +27,20 @@ def handle_userinput(user_question, conversation_chain, history):
     else:
         return history, ""
-def process_documents(files, collection_name, document_retrieval, vectorstore, conversation_chain, save_location=None):
     try:
-        document_retrieval = DocumentRetrieval()
         _, _, text_chunks = parse_doc_universal(doc=files)
         print(len(text_chunks))
         print(text_chunks[0])
         embeddings = document_retrieval.load_embedding_model()
         collection_id = str(uuid.uuid4())
         collection_name = f"collection_{collection_id}"
-        vectorstore = document_retrieval.create_vector_store(text_chunks, embeddings, output_db=save_location, collection_name=collection_name)
         document_retrieval.init_retriever(vectorstore)
         conversation_chain = document_retrieval.get_qa_retrieval_chain()
         return conversation_chain, vectorstore, document_retrieval, collection_name, "Complete! You can now ask questions."
@@ -57,7 +61,7 @@ with gr.Blocks() as demo:
     gr.Markdown("Powered by LLama3.1-8B-Instruct on SambaNova Cloud. Get your API key [here](https://cloud.sambanova.ai/apis).")
-    #api_key = gr.Textbox(label="API Key", type="password", placeholder="(Optional) Enter your API key here for more availability")
     # Step 1: Add PDF file
     gr.Markdown("## 1️⃣ Upload PDF")
@@ -71,7 +75,7 @@ with gr.Blocks() as demo:
     gr.Markdown(caution_text)
     # Preprocessing events
-    process_btn.click(process_documents, inputs=[docs, collection_name, document_retrieval, vectorstore, conversation_chain], outputs=[conversation_chain, vectorstore, document_retrieval, collection_name, setup_output], concurrency_limit=20)
     # Step 3: Chat with your data
     gr.Markdown("## 3️⃣ Chat with your document")

     else:
         return history, ""
+def process_documents(files, collection_name, document_retrieval, vectorstore, conversation_chain, api_key=None):
     try:
+        if api_key:
+            sambanova_api_key = api_key
+        else:
+            sambanova_api_key = os.environ.get('SAMBANOVA_API_KEY')
+        document_retrieval = DocumentRetrieval(sambanova_api_key)
         _, _, text_chunks = parse_doc_universal(doc=files)
         print(len(text_chunks))
         print(text_chunks[0])
         embeddings = document_retrieval.load_embedding_model()
         collection_id = str(uuid.uuid4())
         collection_name = f"collection_{collection_id}"
+        vectorstore = document_retrieval.create_vector_store(text_chunks, embeddings, output_db=None, collection_name=collection_name)
         document_retrieval.init_retriever(vectorstore)
         conversation_chain = document_retrieval.get_qa_retrieval_chain()
         return conversation_chain, vectorstore, document_retrieval, collection_name, "Complete! You can now ask questions."
     gr.Markdown("Powered by LLama3.1-8B-Instruct on SambaNova Cloud. Get your API key [here](https://cloud.sambanova.ai/apis).")
+    api_key = gr.Textbox(label="API Key", type="password", placeholder="(Optional) Enter your API key here for more availability")
     # Step 1: Add PDF file
     gr.Markdown("## 1️⃣ Upload PDF")
     gr.Markdown(caution_text)
     # Preprocessing events
+    process_btn.click(process_documents, inputs=[docs, collection_name, document_retrieval, vectorstore, conversation_chain, api_key], outputs=[conversation_chain, vectorstore, document_retrieval, collection_name, setup_output], concurrency_limit=20)
     # Step 3: Chat with your data
     gr.Markdown("## 3️⃣ Chat with your document")

src/document_retrieval.py CHANGED Viewed

@@ -124,7 +124,7 @@ class RetrievalQAChain(Chain):
 class DocumentRetrieval:
-    def __init__(self):
         self.vectordb = VectorDb()
         config_info = self.get_config_info()
         self.api_info = config_info[0]
@@ -134,7 +134,7 @@ class DocumentRetrieval:
         self.prompts = config_info[4]
         self.prod_mode = config_info[5]
         self.retriever = None
-        self.llm = self.set_llm()
     def get_config_info(self):
         """
@@ -152,7 +152,7 @@ class DocumentRetrieval:
         return api_info, llm_info, embedding_model_info, retrieval_info, prompts, prod_mode
-    def set_llm(self):
         #if self.prod_mode:
         #    sambanova_api_key = st.session_state.SAMBANOVA_API_KEY
         #else:
@@ -161,7 +161,7 @@ class DocumentRetrieval:
         #    else:
         #        sambanova_api_key = os.environ.get('SAMBANOVA_API_KEY')
-        sambanova_api_key = os.environ.get('SAMBANOVA_API_KEY')
         llm = APIGateway.load_llm(
             type=self.api_info,

 class DocumentRetrieval:
+    def __init__(self, sambanova_api_key):
         self.vectordb = VectorDb()
         config_info = self.get_config_info()
         self.api_info = config_info[0]
         self.prompts = config_info[4]
         self.prod_mode = config_info[5]
         self.retriever = None
+        self.llm = self.set_llm(sambanova_api_key)
     def get_config_info(self):
         """
         return api_info, llm_info, embedding_model_info, retrieval_info, prompts, prod_mode
+    def set_llm(self, sambanova_api_key):
         #if self.prod_mode:
         #    sambanova_api_key = st.session_state.SAMBANOVA_API_KEY
         #else:
         #    else:
         #        sambanova_api_key = os.environ.get('SAMBANOVA_API_KEY')
+        #sambanova_api_key = os.environ.get('SAMBANOVA_API_KEY')
         llm = APIGateway.load_llm(
             type=self.api_info,