Spaces:

zhtet
/

RegBotBeta

Sleeping

App Files Files Community

Zwea Htet commited on Jul 11

Commit

5c0b510

•

2 Parent(s): e262fe5 66bfc6b

Merge branch 'pr/2'

Browse files

Files changed (9) hide show

app.py +5 -0
models/llamaCustom.py +3 -8
models/llamaCustomV2.py +229 -0
models/llms.py +2 -0
models/vector_database.py +51 -2
pages/langchain_demo.py +2 -2
pages/llama_custom_demo.py +8 -23
requirements.txt +2 -1
utils/chatbox1.py +2 -2

app.py CHANGED Viewed

@@ -26,6 +26,11 @@ st.title("Welcome to RegBotBeta2.0")
 st.header("Powered by `LlamaIndex🦙`, `Langchain🦜🔗` and `OpenAI API`")
 uploaded_files = st.file_uploader(
     "Upload Files",
     accept_multiple_files=True,

 st.header("Powered by `LlamaIndex🦙`, `Langchain🦜🔗` and `OpenAI API`")
+st.set_page_config(page_title="RegBotBeta", page_icon="📜🤖")
+st.title("Welcome to California Drinking Water Regulation Chatbot Demo!")
+st.header("Built with `LlamaIndex🦙`and `Langchain🦜🔗`")
 uploaded_files = st.file_uploader(
     "Upload Files",
     accept_multiple_files=True,

models/llamaCustom.py CHANGED Viewed

@@ -18,11 +18,7 @@ from assets.prompts import custom_prompts
 # llama index
 from llama_index.core import (
-    StorageContext,
-    SimpleDirectoryReader,
     VectorStoreIndex,
-    load_index_from_storage,
-    PromptHelper,
     PromptTemplate,
 )
 from llama_index.core.llms import (
@@ -47,12 +43,10 @@ NUM_OUTPUT = 525
 # set maximum chunk overlap
 CHUNK_OVERLAP_RATION = 0.2
-# TODO: use the following prompt to format the answer at the end of the context prompt
 ANSWER_FORMAT = """
-Use the following example format for your answer:
 [FORMAT]
-Answer:
-    The answer to the user question.
 Reference:
     The list of references (such as page number, title, chapter, section) to the specific sections of the documents that support your answer.
 [END_FORMAT]
@@ -200,6 +194,7 @@ class LlamaCustom:
         #     condense_prompt=CHAT_ENGINE_CONDENSE_PROMPT_TEMPLATE,
         #     # verbose=True,
         # )
         response = query_engine.query(query_str)
         # response = chat_engine.chat(message=query_str, chat_history=chat_history)

 # llama index
 from llama_index.core import (
     VectorStoreIndex,
     PromptTemplate,
 )
 from llama_index.core.llms import (
 # set maximum chunk overlap
 CHUNK_OVERLAP_RATION = 0.2
 ANSWER_FORMAT = """
+Provide the answer to the user question in the following format:
 [FORMAT]
+Your answer to the user question above.
 Reference:
     The list of references (such as page number, title, chapter, section) to the specific sections of the documents that support your answer.
 [END_FORMAT]
         #     condense_prompt=CHAT_ENGINE_CONDENSE_PROMPT_TEMPLATE,
         #     # verbose=True,
         # )
         response = query_engine.query(query_str)
         # response = chat_engine.chat(message=query_str, chat_history=chat_history)

models/llamaCustomV2.py ADDED Viewed

	@@ -0,0 +1,229 @@

+import os
+import time
+from llama_index.core import VectorStoreIndex
+from llama_index.core.query_pipeline import (
+    QueryPipeline,
+    InputComponent,
+    ArgPackComponent,
+)
+from llama_index.core.prompts import PromptTemplate
+from llama_index.llms.openai import OpenAI
+from llama_index.postprocessor.colbert_rerank import ColbertRerank
+from typing import Any, Dict, List, Optional
+from llama_index.core.bridge.pydantic import Field
+from llama_index.core.llms import ChatMessage
+from llama_index.core.query_pipeline import CustomQueryComponent
+from llama_index.core.schema import NodeWithScore
+from llama_index.core.memory import ChatMemoryBuffer
+llm = OpenAI(
+    model="gpt-3.5-turbo-0125",
+    api_key=os.getenv("OPENAI_API_KEY"),
+)
+# First, we create an input component to capture the user query
+input_component = InputComponent()
+# Next, we use the LLM to rewrite a user query
+rewrite = (
+    "Please write a query to a semantic search engine using the current conversation.\n"
+    "\n"
+    "\n"
+    "{chat_history_str}"
+    "\n"
+    "\n"
+    "Latest message: {query_str}\n"
+    'Query:"""\n'
+)
+rewrite_template = PromptTemplate(rewrite)
+# we will retrieve two times, so we need to pack the retrieved nodes into a single list
+argpack_component = ArgPackComponent()
+# then postprocess/rerank with Colbert
+reranker = ColbertRerank(top_n=3)
+DEFAULT_CONTEXT_PROMPT = (
+    "Here is some context that may be relevant:\n"
+    "-----\n"
+    "{node_context}\n"
+    "-----\n"
+    "Please write a response to the following question, using the above context:\n"
+    "{query_str}\n"
+    "Please formate your response in the following way:\n"
+    "Your answer here.\n"
+    "Reference:\n"
+    "    Your references here (e.g. page numbers, titles, etc.).\n"
+)
+class ResponseWithChatHistory(CustomQueryComponent):
+    llm: OpenAI = Field(..., description="OpenAI LLM")
+    system_prompt: Optional[str] = Field(
+        default=None, description="System prompt to use for the LLM"
+    )
+    context_prompt: str = Field(
+        default=DEFAULT_CONTEXT_PROMPT,
+        description="Context prompt to use for the LLM",
+    )
+    def _validate_component_inputs(self, input: Dict[str, Any]) -> Dict[str, Any]:
+        """Validate component inputs during run_component."""
+        # NOTE: this is OPTIONAL but we show you where to do validation as an example
+        return input
+    @property
+    def _input_keys(self) -> set:
+        """Input keys dict."""
+        # NOTE: These are required inputs. If you have optional inputs please override
+        # `optional_input_keys_dict`
+        return {"chat_history", "nodes", "query_str"}
+    @property
+    def _output_keys(self) -> set:
+        return {"response"}
+    def _prepare_context(
+        self,
+        chat_history: List[ChatMessage],
+        nodes: List[NodeWithScore],
+        query_str: str,
+    ) -> List[ChatMessage]:
+        node_context = ""
+        for idx, node in enumerate(nodes):
+            node_text = node.get_content(metadata_mode="llm")
+            node_context += f"Context Chunk {idx}:\n{node_text}\n\n"
+        formatted_context = self.context_prompt.format(
+            node_context=node_context, query_str=query_str
+        )
+        user_message = ChatMessage(role="user", content=formatted_context)
+        chat_history.append(user_message)
+        if self.system_prompt is not None:
+            chat_history = [
+                ChatMessage(role="system", content=self.system_prompt)
+            ] + chat_history
+        return chat_history
+    def _run_component(self, **kwargs) -> Dict[str, Any]:
+        """Run the component."""
+        chat_history = kwargs["chat_history"]
+        nodes = kwargs["nodes"]
+        query_str = kwargs["query_str"]
+        prepared_context = self._prepare_context(chat_history, nodes, query_str)
+        response = llm.chat(prepared_context)
+        return {"response": response}
+    async def _arun_component(self, **kwargs: Any) -> Dict[str, Any]:
+        """Run the component asynchronously."""
+        # NOTE: Optional, but async LLM calls are easy to implement
+        chat_history = kwargs["chat_history"]
+        nodes = kwargs["nodes"]
+        query_str = kwargs["query_str"]
+        prepared_context = self._prepare_context(chat_history, nodes, query_str)
+        response = await llm.achat(prepared_context)
+        return {"response": response}
+class LlamaCustomV2:
+    response_component = ResponseWithChatHistory(
+        llm=llm,
+        system_prompt=(
+            "You are a Q&A system. You will be provided with the previous chat history, "
+            "as well as possibly relevant context, to assist in answering a user message."
+        ),
+    )
+    def __init__(self, model_name: str, index: VectorStoreIndex):
+        self.model_name = model_name
+        self.index = index
+        self.retriever = index.as_retriever()
+        self.chat_mode = "condense_plus_context"
+        self.memory = ChatMemoryBuffer.from_defaults()
+        self.verbose = True
+        self._build_pipeline()
+    def _build_pipeline(self):
+        self.pipeline = QueryPipeline(
+            modules={
+                "input": input_component,
+                "rewrite_template": rewrite_template,
+                "llm": llm,
+                "rewrite_retriever": self.retriever,
+                "query_retriever": self.retriever,
+                "join": argpack_component,
+                "reranker": reranker,
+                "response_component": self.response_component,
+            },
+            verbose=self.verbose,
+        )
+        # run both retrievers -- once with the hallucinated query, once with the real query
+        self.pipeline.add_link(
+            "input", "rewrite_template", src_key="query_str", dest_key="query_str"
+        )
+        self.pipeline.add_link(
+            "input",
+            "rewrite_template",
+            src_key="chat_history_str",
+            dest_key="chat_history_str",
+        )
+        self.pipeline.add_link("rewrite_template", "llm")
+        self.pipeline.add_link("llm", "rewrite_retriever")
+        self.pipeline.add_link("input", "query_retriever", src_key="query_str")
+        # each input to the argpack component needs a dest key -- it can be anything
+        # then, the argpack component will pack all the inputs into a single list
+        self.pipeline.add_link("rewrite_retriever", "join", dest_key="rewrite_nodes")
+        self.pipeline.add_link("query_retriever", "join", dest_key="query_nodes")
+        # reranker needs the packed nodes and the query string
+        self.pipeline.add_link("join", "reranker", dest_key="nodes")
+        self.pipeline.add_link(
+            "input", "reranker", src_key="query_str", dest_key="query_str"
+        )
+        # synthesizer needs the reranked nodes and query str
+        self.pipeline.add_link("reranker", "response_component", dest_key="nodes")
+        self.pipeline.add_link(
+            "input", "response_component", src_key="query_str", dest_key="query_str"
+        )
+        self.pipeline.add_link(
+            "input",
+            "response_component",
+            src_key="chat_history",
+            dest_key="chat_history",
+        )
+    def get_response(self, query_str: str, chat_history: List[ChatMessage]):
+        chat_history = self.memory.get()
+        char_history_str = "\n".join([str(x) for x in chat_history])
+        response = self.pipeline.run(
+            query_str=query_str,
+            chat_history=chat_history,
+            chat_history_str=char_history_str,
+        )
+        user_msg = ChatMessage(role="user", content=query_str)
+        print("user_msg: ", str(user_msg))
+        print("response: ", str(response.message))
+        self.memory.put(user_msg)
+        self.memory.put(response.message)
+        return str(response.message)
+    def get_stream_response(self, query_str: str, chat_history: List[ChatMessage]):
+        response = self.get_response(query_str=query_str, chat_history=chat_history)
+        for word in response.split():
+            yield word + " "
+            time.sleep(0.05)

models/llms.py CHANGED Viewed

@@ -35,6 +35,7 @@ def load_llm(model_name: str, source: str = "huggingface"):
             llm_gpt_3_5_turbo_0125 = OpenAI(
                 model=model_name,
                 api_key=st.session_state.openai_api_key,
             )
             return llm_gpt_3_5_turbo_0125
@@ -45,6 +46,7 @@ def load_llm(model_name: str, source: str = "huggingface"):
                 is_chat_model=True,
                 additional_kwargs={"max_new_tokens": 250},
                 prompt_key=st.session_state.replicate_api_token,
             )
             return llm_llama_13b_v2_replicate

             llm_gpt_3_5_turbo_0125 = OpenAI(
                 model=model_name,
                 api_key=st.session_state.openai_api_key,
+                temperature=0.0,
             )
             return llm_gpt_3_5_turbo_0125
                 is_chat_model=True,
                 additional_kwargs={"max_new_tokens": 250},
                 prompt_key=st.session_state.replicate_api_token,
+                temperature=0.0,
             )
             return llm_llama_13b_v2_replicate

models/vector_database.py CHANGED Viewed

@@ -1,6 +1,14 @@
 from pinecone import Pinecone, ServerlessSpec
 from llama_index.vector_stores.pinecone import PineconeVectorStore
 from dotenv import load_dotenv
 import os
@@ -30,5 +38,46 @@ if not index_exists(pc_index_name):
 # Initialize your index
 pinecone_index = pc.Index(pc_index_name)
-# Define the vector store
-pinecone_vector_store = PineconeVectorStore(pinecone_index=pinecone_index)

+from typing import List
 from pinecone import Pinecone, ServerlessSpec
 from llama_index.vector_stores.pinecone import PineconeVectorStore
 from dotenv import load_dotenv
+from llama_index.core import (
+    SimpleDirectoryReader,
+    Document,
+    VectorStoreIndex,
+    StorageContext,
+)
+from huggingface_hub import HfFileSystem, HfApi
 import os
 # Initialize your index
 pinecone_index = pc.Index(pc_index_name)
+# print("Deleting all vectors in the pinecone index: ", pinecone_index.delete(delete_all=True))
+# print("Deleting all vectors with the namespace 'calregs_pdf': ", pinecone_index.delete(namespace="calregs_pdf"))
+SAVE_DIR = "uploaded_files"
+def _namespace_exists(namespace: str):
+    namespaces = pinecone_index.describe_index_stats()["namespaces"]
+    return namespace in namespaces
+def get_pinecone_index(filename: str) -> VectorStoreIndex:
+    """This function loads the index from Pinecone if it exists, otherwise it creates a new index from the document."""
+    namespace = filename.replace(".", "_").replace(" ", "_")
+    pinecone_vector_store = PineconeVectorStore(
+        pinecone_index=pinecone_index,
+        namespace=namespace,
+    )
+    index = None
+    if _namespace_exists(namespace=namespace):
+        print(f"Namespace {namespace} exists.")
+        index = VectorStoreIndex.from_vector_store(vector_store=pinecone_vector_store)
+    else:
+        reader = SimpleDirectoryReader(input_files=[f"{SAVE_DIR}/{filename}"])
+        docs = reader.load_data(show_progress=True)
+        storage_context = StorageContext.from_defaults(
+            vector_store=pinecone_vector_store
+        )
+        index = VectorStoreIndex.from_documents(
+            documents=docs, show_progress=True, storage_context=storage_context
+        )
+    return index
+api = HfApi(
+    token=os.environ.get("HF_TOKEN")
+)
+api.upload_file(
+    repo_id="hbui/RegBot4.0",
+    path_or_fileobj=f"{SAVE_DIR}/calregs.pdf",
+    path_in_repo=f"{SAVE_DIR}/calregs.pdf",
+)

pages/langchain_demo.py CHANGED Viewed

@@ -8,7 +8,7 @@ from utils.chatbox1 import chatbox
 st.set_page_config(page_title="Langchain", page_icon="")
-st.subheader("Langchain with OpenAI Demo")
 if "messages" not in st.session_state:
     st.session_state.messages = []
@@ -17,7 +17,7 @@ if "openai_api_key" not in st.session_state:
     st.info("Enter your openai key to access the chatbot.")
 else:
     option = st.selectbox(
-        label="Select your model:", options=("gpt-3.5-turbo", "gpt-4")
     )
     with st.spinner(f"Initializing {option} ..."):

 st.set_page_config(page_title="Langchain", page_icon="")
+st.subheader("California Drinking Water Regulation Chatbot - Langchain Demo")
 if "messages" not in st.session_state:
     st.session_state.messages = []
     st.info("Enter your openai key to access the chatbot.")
 else:
     option = st.selectbox(
+        label="Select your model:", options=("gpt-3.5-turbo", "gpt-4", "gpt-4o")
     )
     with st.spinner(f"Initializing {option} ..."):

pages/llama_custom_demo.py CHANGED Viewed

@@ -5,10 +5,11 @@ from typing import List
 # local imports
 from models.llms import load_llm, integrated_llms
-from models.embeddings import hf_embed_model, openai_embed_model
 from models.llamaCustom import LlamaCustom
-# from models.vector_database import pinecone_vector_store
 from utils.chatbox import show_previous_messages, show_chat_input
 from utils.util import validate_openai_api_key
@@ -21,6 +22,7 @@ from llama_index.core import (
     Settings,
     load_index_from_storage,
 )
 from llama_index.core.memory import ChatMemoryBuffer
 from llama_index.core.base.llms.types import ChatMessage
@@ -92,24 +94,6 @@ def get_index(
         raise e
     return index
-# def get_pinecone_index(filename: str) -> VectorStoreIndex:
-#     """Thie function loads the index from Pinecone if it exists, otherwise it creates a new index from the document."""
-#     reader = SimpleDirectoryReader(input_files=[f"{SAVE_DIR}/{filename}"])
-#     docs = reader.load_data(show_progress=True)
-#     storage_context = StorageContext.from_defaults(vector_store=pinecone_vector_store)
-#     index = VectorStoreIndex.from_documents(
-#         documents=docs, show_progress=True, storage_context=storage_context
-#     )
-#     return index
-def get_chroma_index(filename: str) -> VectorStoreIndex:
-    """This function loads the index from Chroma if it exists, otherwise it creates a new index from the document."""
-    pass
 def check_api_key(model_name: str, source: str):
     if source.startswith("openai"):
         if not st.session_state.openai_api_key:
@@ -168,7 +152,7 @@ init_session_state()
 st.set_page_config(page_title="Llama", page_icon="🦙")
-st.header("Llama Index with Custom LLM Demo")
 tab1, tab2 = st.tabs(["Config", "Chat"])
@@ -204,11 +188,12 @@ with tab1:
                 Settings.llm = llama_llm
                 st.write("Processing Data ...")
-                index = get_index(selected_file)
-                # index = get_pinecone_index(selected_file)
                 st.write("Finishing Up ...")
                 llama_custom = LlamaCustom(model_name=selected_llm_name, index=index)
                 st.session_state.llama_custom = llama_custom
                 status.update(label="Ready to query!", state="complete", expanded=False)

 # local imports
 from models.llms import load_llm, integrated_llms
+from models.embeddings import openai_embed_model
 from models.llamaCustom import LlamaCustom
+# from models.llamaCustomV2 import LlamaCustomV2
+from models.vector_database import get_pinecone_index
 from utils.chatbox import show_previous_messages, show_chat_input
 from utils.util import validate_openai_api_key
     Settings,
     load_index_from_storage,
 )
+from llama_index.vector_stores.pinecone import PineconeVectorStore
 from llama_index.core.memory import ChatMemoryBuffer
 from llama_index.core.base.llms.types import ChatMessage
         raise e
     return index
 def check_api_key(model_name: str, source: str):
     if source.startswith("openai"):
         if not st.session_state.openai_api_key:
 st.set_page_config(page_title="Llama", page_icon="🦙")
+st.header("California Drinking Water Regulation Chatbot - LlamaIndex Demo")
 tab1, tab2 = st.tabs(["Config", "Chat"])
                 Settings.llm = llama_llm
                 st.write("Processing Data ...")
+                # index = get_index(selected_file)
+                index = get_pinecone_index(selected_file)
                 st.write("Finishing Up ...")
                 llama_custom = LlamaCustom(model_name=selected_llm_name, index=index)
+                # llama_custom = LlamaCustomV2(model_name=selected_llm_name, index=index)
                 st.session_state.llama_custom = llama_custom
                 status.update(label="Ready to query!", state="complete", expanded=False)

requirements.txt CHANGED Viewed

@@ -16,4 +16,5 @@ llama-index-vector-stores-pinecone
 pinecone-client>=3.0.0
 replicate>=0.25.1
 llama-index-llms-replicate
-sentence-transformers>=2.6.1

 pinecone-client>=3.0.0
 replicate>=0.25.1
 llama-index-llms-replicate
+sentence-transformers>=2.6.1
+llama-index-postprocessor-colbert-rerank

utils/chatbox1.py CHANGED Viewed

@@ -8,7 +8,7 @@ def display_chat_history(model_name: str):
             st.markdown(message["content"])
 def chat_input(model_name: str):
-    if prompt := st.chat_input("Say something"):
         # Display user message in chat message container
         st.chat_message("user").markdown(prompt)
@@ -47,7 +47,7 @@ def chatbox(model_name: str, model: None):
             with st.chat_message(message["role"]):
                 st.markdown(message["content"])
-    if prompt := st.chat_input("Say something"):
         # Display user message in chat message container
         st.chat_message("user").markdown(prompt)

             st.markdown(message["content"])
 def chat_input(model_name: str):
+    if prompt := st.chat_input("Ask a question about California drinking water regulations"):
         # Display user message in chat message container
         st.chat_message("user").markdown(prompt)
             with st.chat_message(message["role"]):
                 st.markdown(message["content"])
+    if prompt := st.chat_input("Ask a question about California drinking water regulations"):
         # Display user message in chat message container
         st.chat_message("user").markdown(prompt)