Spaces:

zhtet
/

RegBotBeta

Sleeping

App Files Files Community

Zwea Htet commited on May 11, 2024

Commit

b83dc9c

•

1 Parent(s): 3455cec

updated llama index demo

Browse files

Files changed (5) hide show

app.py +21 -18
models/llamaCustom.py +7 -3
models/vector_database.py +34 -0
pages/llama_custom_demo.py +32 -3
requirements.txt +3 -1

app.py CHANGED Viewed

@@ -1,13 +1,9 @@
-# https://docs.streamlit.io/knowledge-base/tutorials/build-conversational-apps
 import os
 import openai
 import requests
 import streamlit as st
 from utils.util import *
 from langchain.memory import ConversationBufferMemory
 SAVE_DIR = "uploaded_files"
@@ -17,30 +13,24 @@ os.makedirs(SAVE_DIR, exist_ok=True)
 def init_session_state():
     if "openai_api_key" not in st.session_state:
         st.session_state.openai_api_key = ""
     if "uploaded_files" not in st.session_state:
         st.session_state.uploaded_files = os.listdir(SAVE_DIR)
 init_session_state()
 st.set_page_config(page_title="RegBotBeta", page_icon="📜🤖")
 st.title("Welcome to RegBotBeta2.0")
-st.header("Powered by `LlamaIndex🦙`, `Langchain🦜🔗 ` and `OpenAI API`")
-def init_session_state():
-    if "huggingface_token" not in st.session_state:
-        st.session_state.huggingface_token = ""
-init_session_state()
 uploaded_files = st.file_uploader(
     "Upload Files",
     accept_multiple_files=True,
     type=["pdf", "docx", "txt", "csv"],
 )
 if uploaded_files:
@@ -48,14 +38,27 @@ if uploaded_files:
         if file not in st.session_state.uploaded_files:
             # add the file to session state
             st.session_state.uploaded_files.append(file.name)
             # save the file to the sample_data directory
             with open(os.path.join(SAVE_DIR, file.name), "wb") as f:
                 f.write(file.getbuffer())
     st.success("File(s) uploaded successfully!")
 if st.session_state.uploaded_files:
     st.write("Uploaded Files:")
-    for i, filename in enumerate(st.session_state.uploaded_files, start=1):
-        st.write(f"{i}. {filename}")

 import os
 import openai
 import requests
 import streamlit as st
 from utils.util import *
 from langchain.memory import ConversationBufferMemory
 SAVE_DIR = "uploaded_files"
 def init_session_state():
     if "openai_api_key" not in st.session_state:
         st.session_state.openai_api_key = ""
     if "uploaded_files" not in st.session_state:
         st.session_state.uploaded_files = os.listdir(SAVE_DIR)
+    if "huggingface_token" not in st.session_state:
+        st.session_state.huggingface_token = ""
 init_session_state()
 st.set_page_config(page_title="RegBotBeta", page_icon="📜🤖")
 st.title("Welcome to RegBotBeta2.0")
+st.header("Powered by `LlamaIndex🦙`, `Langchain🦜🔗` and `OpenAI API`")
 uploaded_files = st.file_uploader(
     "Upload Files",
     accept_multiple_files=True,
     type=["pdf", "docx", "txt", "csv"],
+    label_visibility="hidden",
 )
 if uploaded_files:
         if file not in st.session_state.uploaded_files:
             # add the file to session state
             st.session_state.uploaded_files.append(file.name)
             # save the file to the sample_data directory
             with open(os.path.join(SAVE_DIR, file.name), "wb") as f:
                 f.write(file.getbuffer())
     st.success("File(s) uploaded successfully!")
+def delete_file(filename):
+    """Delete file from session state and local filesystem."""
+    if filename in st.session_state.uploaded_files and os.path.exists(
+        os.path.join(SAVE_DIR, filename)
+    ):
+        st.session_state.uploaded_files.remove(filename)
+        os.remove(os.path.join(SAVE_DIR, filename))
+        st.success(f"Deleted {filename}!")
+        st.rerun()
 if st.session_state.uploaded_files:
     st.write("Uploaded Files:")
+    for index, filename in enumerate(st.session_state.uploaded_files):
+        col1, col2 = st.columns([4, 1])
+        col1.write(filename)
+        if col2.button("Delete", key=f"delete_{index}"):
+            delete_file(filename)

models/llamaCustom.py CHANGED Viewed

@@ -54,7 +54,7 @@ Use the following example format for your answer:
 Answer:
     The answer to the user question.
 Reference:
-    The list of references to the specific sections of the documents that support your answer.
 [END_FORMAT]
 """
@@ -184,9 +184,13 @@ class LlamaCustom:
     def get_response(self, query_str: str, chat_history: List[ChatMessage]):
         # https://docs.llamaindex.ai/en/stable/module_guides/deploying/chat_engines/
         query_engine = self.index.as_query_engine(
-            text_qa_template=PromptTemplate(QUERY_ENGINE_QA_TEMPLATE),
-            refine_template=PromptTemplate(QUERY_ENGINE_REFINE_TEMPLATE),
             verbose=self.verbose,
         )
         # chat_engine = self.index.as_chat_engine(

 Answer:
     The answer to the user question.
 Reference:
+    The list of references (such as page number, title, chapter, section) to the specific sections of the documents that support your answer.
 [END_FORMAT]
 """
     def get_response(self, query_str: str, chat_history: List[ChatMessage]):
         # https://docs.llamaindex.ai/en/stable/module_guides/deploying/chat_engines/
+        # https://docs.llamaindex.ai/en/stable/examples/query_engine/citation_query_engine/
+        # https://docs.llamaindex.ai/en/stable/examples/query_engine/knowledge_graph_rag_query_engine/
         query_engine = self.index.as_query_engine(
+            text_qa_template=PromptTemplate(QUERY_ENGINE_QA_TEMPLATE + ANSWER_FORMAT),
+            refine_template=PromptTemplate(
+                QUERY_ENGINE_REFINE_TEMPLATE
+            ),  # passing ANSWER_FORMAT here will not give the desired output, need to use the output parser from llama index?
             verbose=self.verbose,
         )
         # chat_engine = self.index.as_chat_engine(

models/vector_database.py ADDED Viewed

	@@ -0,0 +1,34 @@

+from pinecone import Pinecone, ServerlessSpec
+from llama_index.vector_stores.pinecone import PineconeVectorStore
+from dotenv import load_dotenv
+import os
+load_dotenv()
+# Pinecone Vector Database
+pc = Pinecone(api_key=os.environ.get("PINECONE_API_KEY"))
+pc_index_name = "llama-integration-pinecone"
+# pc_index_name = "openai-embeddings"
+pc_indexes = pc.list_indexes()
+# Check if the index already exists
+def index_exists(index_name):
+    for index in pc_indexes:
+        if index["name"] == index_name:
+            return True
+    return False
+# Create the index if it doesn't exist
+if not index_exists(pc_index_name):
+    pc.create_index(
+        name=pc_index_name,
+        dimension=1536,
+        spec=ServerlessSpec(cloud="aws", region="us-east-1"),
+    )
+# Initialize your index
+pinecone_index = pc.Index(pc_index_name)
+# Define the vector store
+pinecone_vector_store = PineconeVectorStore(pinecone_index=pinecone_index)

pages/llama_custom_demo.py CHANGED Viewed

@@ -7,6 +7,8 @@ from typing import List
 from models.llms import load_llm, integrated_llms
 from models.embeddings import hf_embed_model, openai_embed_model
 from models.llamaCustom import LlamaCustom
 from utils.chatbox import show_previous_messages, show_chat_input
 from utils.util import validate_openai_api_key
@@ -30,7 +32,8 @@ VECTOR_STORE_DIR = "vectorStores"
 HF_REPO_ID = "zhtet/RegBotBeta"
 # global
-Settings.embed_model = hf_embed_model
 # huggingface api
 hf_api = HfApi()
@@ -62,9 +65,10 @@ def init_session_state():
 # @st.cache_resource
-def index_docs(
     filename: str,
 ) -> VectorStoreIndex:
     try:
         index_path = pathlib.Path(f"{VECTOR_STORE_DIR}/{filename.replace('.', '_')}")
         if pathlib.Path.exists(index_path):
@@ -89,6 +93,23 @@ def index_docs(
     return index
 def check_api_key(model_name: str, source: str):
     if source.startswith("openai"):
         if not st.session_state.openai_api_key:
@@ -164,6 +185,13 @@ with tab1:
         label="Choose a file to chat with: ", options=os.listdir(SAVE_DIR)
     )
     if st.button("Submit", key="submit", help="Submit the form"):
         with st.status("Loading ...", expanded=True) as status:
             try:
@@ -176,7 +204,8 @@ with tab1:
                 Settings.llm = llama_llm
                 st.write("Processing Data ...")
-                index = index_docs(selected_file)
                 st.write("Finishing Up ...")
                 llama_custom = LlamaCustom(model_name=selected_llm_name, index=index)

 from models.llms import load_llm, integrated_llms
 from models.embeddings import hf_embed_model, openai_embed_model
 from models.llamaCustom import LlamaCustom
+# from models.vector_database import pinecone_vector_store
 from utils.chatbox import show_previous_messages, show_chat_input
 from utils.util import validate_openai_api_key
 HF_REPO_ID = "zhtet/RegBotBeta"
 # global
+# Settings.embed_model = hf_embed_model
+Settings.embed_model = openai_embed_model
 # huggingface api
 hf_api = HfApi()
 # @st.cache_resource
+def get_index(
     filename: str,
 ) -> VectorStoreIndex:
+    """This function loads the index from storage if it exists, otherwise it creates a new index from the document."""
     try:
         index_path = pathlib.Path(f"{VECTOR_STORE_DIR}/{filename.replace('.', '_')}")
         if pathlib.Path.exists(index_path):
     return index
+# def get_pinecone_index(filename: str) -> VectorStoreIndex:
+#     """Thie function loads the index from Pinecone if it exists, otherwise it creates a new index from the document."""
+#     reader = SimpleDirectoryReader(input_files=[f"{SAVE_DIR}/{filename}"])
+#     docs = reader.load_data(show_progress=True)
+#     storage_context = StorageContext.from_defaults(vector_store=pinecone_vector_store)
+#     index = VectorStoreIndex.from_documents(
+#         documents=docs, show_progress=True, storage_context=storage_context
+#     )
+#     return index
+def get_chroma_index(filename: str) -> VectorStoreIndex:
+    """This function loads the index from Chroma if it exists, otherwise it creates a new index from the document."""
+    pass
 def check_api_key(model_name: str, source: str):
     if source.startswith("openai"):
         if not st.session_state.openai_api_key:
         label="Choose a file to chat with: ", options=os.listdir(SAVE_DIR)
     )
+    if st.button("Clear all api keys"):
+        st.session_state.openai_api_key = ""
+        st.session_state.replicate_api_token = ""
+        st.session_state.hf_token = ""
+        st.success("All API keys cleared!")
+        st.rerun()
     if st.button("Submit", key="submit", help="Submit the form"):
         with st.status("Loading ...", expanded=True) as status:
             try:
                 Settings.llm = llama_llm
                 st.write("Processing Data ...")
+                index = get_index(selected_file)
+                # index = get_pinecone_index(selected_file)
                 st.write("Finishing Up ...")
                 llama_custom = LlamaCustom(model_name=selected_llm_name, index=index)

requirements.txt CHANGED Viewed

@@ -7,11 +7,13 @@ langchain_pinecone
 openai
 faiss-cpu
 python-dotenv
-streamlit==1.29.0
 huggingface_hub<0.21.0
 pypdf
 llama-index-llms-huggingface>=0.1.4
 llama-index-embeddings-langchain>=0.1.2
 replicate>=0.25.1
 llama-index-llms-replicate
 sentence-transformers>=2.6.1

 openai
 faiss-cpu
 python-dotenv
+streamlit>=1.24.0
 huggingface_hub<0.21.0
 pypdf
 llama-index-llms-huggingface>=0.1.4
 llama-index-embeddings-langchain>=0.1.2
+llama-index-vector-stores-pinecone
+pinecone-client>=3.0.0
 replicate>=0.25.1
 llama-index-llms-replicate
 sentence-transformers>=2.6.1