Spaces:

zhtet
/

RegBotBeta

Sleeping

App Files Files Community

Zwea Htet commited on May 3, 2024

Commit

f5254ad

1 Parent(s): b1a958d

integrated open source llms

Browse files

Files changed (5) hide show

app.py +7 -16
models/llamaCustom.py +52 -13
models/llms.py +47 -31
pages/llama_custom_demo.py +96 -47
utils/util.py +7 -3

app.py CHANGED Viewed

@@ -29,22 +29,13 @@ st.set_page_config(page_title="RegBotBeta", page_icon="📜🤖")
 st.title("Welcome to RegBotBeta2.0")
 st.header("Powered by `LlamaIndex🦙`, `Langchain🦜🔗 ` and `OpenAI API`")
-# openai_api_key = st.text_input(
-#     "OpenAI API Key",
-#     type="password",
-#     help="Get your API key from https://platform.openai.com/account/api-keys",
-#     value=st.session_state.openai_api_key,
-# )
-# isKeyValid = False
-# if openai_api_key:
-#     resp = validate(openai_api_key)
-#     if "error" in resp.json():
-#         st.info("Invalid Token! Try again.")
-#     else:
-#         st.info("Success")
-#         st.session_state.openai_api_key = openai_api_key
-#         isKeyValid = True
 uploaded_files = st.file_uploader(
     "Upload Files",

 st.title("Welcome to RegBotBeta2.0")
 st.header("Powered by `LlamaIndex🦙`, `Langchain🦜🔗 ` and `OpenAI API`")
+def init_session_state():
+    if "huggingface_token" not in st.session_state:
+        st.session_state.huggingface_token = ""
+init_session_state()
 uploaded_files = st.file_uploader(
     "Upload Files",

models/llamaCustom.py CHANGED Viewed

@@ -58,7 +58,43 @@ Reference:
 [END_FORMAT]
 """
-CONTEXT_PROMPT_TEMPLATE = """
 The following is a friendly conversation between a user and an AI assistant.
 The assistant is talkative and provides lots of specific details from its context.
 If the assistant does not know the answer to a question, it truthfully says it
@@ -73,7 +109,7 @@ Include references to the specific sections of the documents that support your a
 Answer "don't know" if not present in the document.
 """
-CONDENSE_PROMPT_TEMPLATE = """
 Given the following conversation between a user and an AI assistant and a follow up question from user,
 rephrase the follow up question to be a standalone question.
@@ -144,21 +180,24 @@ class LlamaCustom:
         self.index = index
         self.chat_mode = "condense_plus_context"
         self.memory = ChatMemoryBuffer.from_defaults()
     def get_response(self, query_str: str, chat_history: List[ChatMessage]):
         # https://docs.llamaindex.ai/en/stable/module_guides/deploying/chat_engines/
-        # query_engine = self.index.as_query_engine(
-        #     text_qa_template=text_qa_template, refine_template=refine_template
-        # )
-        chat_engine = self.index.as_chat_engine(
-            chat_mode=self.chat_mode,
-            memory=self.memory,
-            context_prompt=CONTEXT_PROMPT_TEMPLATE,
-            condense_prompt=CONDENSE_PROMPT_TEMPLATE,
-            # verbose=True,
         )
-        # response = query_engine.query(query_str)
-        response = chat_engine.chat(message=query_str, chat_history=chat_history)
         return str(response)

 [END_FORMAT]
 """
+# query engine templates
+QUERY_ENGINE_QA_TEMPLATE = """
+We have provided context information below:
+[CONTEXT]
+{context_str}
+[END_CONTEXT]
+Given this information, please answer the following question:
+[QUESTION]
+{query_str}
+[END_QUESTION]
+"""
+QUERY_ENGINE_REFINE_TEMPLATE = """
+The original query is as follows:
+[QUESTION]
+{query_str}
+[END_QUESTION]
+We have providec an existing answer:
+[ANSWER]
+{existing_answer}
+[END_ANSWER]
+We have the opportunity to refine the existing answer (only if needed) with some more
+context below.
+[CONTEXT]
+{context_msg}
+[END_CONTEXT]
+Given the new context, refine the original answer to include more details like references \
+to the specific sections of the documents that support your answer.
+Refined Answer:
+"""
+CHAT_ENGINE_CONTEXT_PROMPT_TEMPLATE = """
 The following is a friendly conversation between a user and an AI assistant.
 The assistant is talkative and provides lots of specific details from its context.
 If the assistant does not know the answer to a question, it truthfully says it
 Answer "don't know" if not present in the document.
 """
+CHAT_ENGINE_CONDENSE_PROMPT_TEMPLATE = """
 Given the following conversation between a user and an AI assistant and a follow up question from user,
 rephrase the follow up question to be a standalone question.
         self.index = index
         self.chat_mode = "condense_plus_context"
         self.memory = ChatMemoryBuffer.from_defaults()
+        self.verbose = True
     def get_response(self, query_str: str, chat_history: List[ChatMessage]):
         # https://docs.llamaindex.ai/en/stable/module_guides/deploying/chat_engines/
+        query_engine = self.index.as_query_engine(
+            text_qa_template=PromptTemplate(QUERY_ENGINE_QA_TEMPLATE),
+            refine_template=PromptTemplate(QUERY_ENGINE_REFINE_TEMPLATE),
+            verbose=self.verbose,
         )
+        # chat_engine = self.index.as_chat_engine(
+        #     chat_mode=self.chat_mode,
+        #     memory=self.memory,
+        #     context_prompt=CHAT_ENGINE_CONTEXT_PROMPT_TEMPLATE,
+        #     condense_prompt=CHAT_ENGINE_CONDENSE_PROMPT_TEMPLATE,
+        #     # verbose=True,
+        # )
+        response = query_engine.query(query_str)
+        # response = chat_engine.chat(message=query_str, chat_history=chat_history)
         return str(response)

models/llms.py CHANGED Viewed

@@ -1,16 +1,13 @@
-from llama_index.llms.huggingface import HuggingFaceLLM
 from llama_index.llms.openai import OpenAI
-# from llama_index.llms.replicate import Replicate
 from dotenv import load_dotenv
 import os
 load_dotenv()
-# llm_mixtral_8x7b = HuggingFaceInferenceAPI(
-#     model_name="mistralai/Mixtral-8x7B-Instruct-v0.1",
-#     token=os.getenv("HUGGINGFACE_API_TOKEN"),
-# )
 # download the model from the Hugging Face Hub and run it locally
 # llm_mixtral_8x7b = HuggingFaceLLM(model_name="mistralai/Mixtral-8x7B-Instruct-v0.1")
@@ -19,27 +16,46 @@ load_dotenv()
 #     token=os.getenv("HUGGINGFACE_API_TOKEN"),
 # )
-# llm_bloomz_560m = HuggingFaceInferenceAPI(
-#     model_name="bigscience/bloomz-560m",
-#     token=os.getenv("HUGGINGFACE_API_TOKEN"),
-# )
-llm_bloomz_560m = HuggingFaceLLM(model_name="bigscience/bloomz-560m")
-# llm_gpt_3_5_turbo = OpenAI(
-#     api_key=os.getenv("OPENAI_API_KEY"),
-# )
-llm_gpt_3_5_turbo_0125 = OpenAI(
-    model="gpt-3.5-turbo-0125",
-    api_key=os.getenv("OPENAI_API_KEY"),
-)
-# llm_gpt_4_0125 = OpenAI(
-#     model="gpt-4-0125-preview",
-#     api_key=os.getenv("OPENAI_API_KEY"),
-# )
-# llm_llama_13b_v2_replicate = Replicate(
-#     model="meta/llama-2-13b-chat",
-#     prompt_key=os.getenv("REPLICATE_API_KEY"),
-# )

+from llama_index.llms.huggingface import HuggingFaceLLM, HuggingFaceInferenceAPI
 from llama_index.llms.openai import OpenAI
+from llama_index.llms.replicate import Replicate
 from dotenv import load_dotenv
 import os
+import streamlit as st
 load_dotenv()
 # download the model from the Hugging Face Hub and run it locally
 # llm_mixtral_8x7b = HuggingFaceLLM(model_name="mistralai/Mixtral-8x7B-Instruct-v0.1")
 #     token=os.getenv("HUGGINGFACE_API_TOKEN"),
 # )
+# dict = {"source": "model_name"}
+integrated_llms = {
+    "gpt-3.5-turbo-0125": "openai",
+    "meta/llama-2-13b-chat": "replicate",
+    "mistralai/Mistral-7B-Instruct-v0.2": "huggingface",
+    # "mistralai/Mixtral-8x7B-v0.1": "huggingface", # 93 GB model
+    # "meta-llama/Meta-Llama-3-8B": "huggingface", # too large >10G for llama index hf interference to load
+}
+def load_llm(model_name: str, source: str = "huggingface"):
+    print("model_name: ", model_name, "source: ", source)
+    if integrated_llms.get(model_name) is None:
+        return None
+    try:
+        if source.startswith("openai"):
+            llm_gpt_3_5_turbo_0125 = OpenAI(
+                model=model_name,
+                api_key=st.session_state.openai_api_key,
+            )
+            return llm_gpt_3_5_turbo_0125
+        elif source.startswith("replicate"):
+            llm_llama_13b_v2_replicate = Replicate(
+                model=model_name,
+                is_chat_model=True,
+                additional_kwargs={"max_new_tokens": 250},
+                prompt_key=st.session_state.replicate_api_token,
+            )
+            return llm_llama_13b_v2_replicate
+        elif source.startswith("huggingface"):
+            llm_mixtral_8x7b = HuggingFaceInferenceAPI(
+                model_name=model_name,
+                token=st.session_state.hf_token,
+            )
+            return llm_mixtral_8x7b
+    except Exception as e:
+        print(e)

pages/llama_custom_demo.py CHANGED Viewed

@@ -1,18 +1,16 @@
-import random
-import time
 import streamlit as st
 import os
 import pathlib
 from typing import List
-from models.llms import (
-    llm_bloomz_560m,
-    llm_gpt_3_5_turbo_0125,
-)
 from models.embeddings import hf_embed_model, openai_embed_model
 from models.llamaCustom import LlamaCustom
-# from models.llamaCustom import LlamaCustom
 from utils.chatbox import show_previous_messages, show_chat_input
 from llama_index.core import (
     SimpleDirectoryReader,
     Document,
@@ -24,21 +22,18 @@ from llama_index.core import (
 from llama_index.core.memory import ChatMemoryBuffer
 from llama_index.core.base.llms.types import ChatMessage
 SAVE_DIR = "uploaded_files"
 VECTOR_STORE_DIR = "vectorStores"
 # global
 Settings.embed_model = hf_embed_model
-llama_llms = {
-    "bigscience/bloomz-560m": llm_bloomz_560m,
-    # "mistral/mixtral": llm_mixtral_8x7b,
-    # "meta-llama/Llama-2-7b-chat-hf": llm_llama_2_7b_chat,
-    # "openai/gpt-3.5-turbo": llm_gpt_3_5_turbo,
-    "openai/gpt-3.5-turbo-0125": llm_gpt_3_5_turbo_0125,
-    # "openai/gpt-4-0125-preview": llm_gpt_4_0125,
-    # "meta/llama-2-13b-chat": llm_llama_13b_v2_replicate,
-}
 def init_session_state():
@@ -56,6 +51,15 @@ def init_session_state():
     if "llama_custom" not in st.session_state:
         st.session_state.llama_custom = None
 # @st.cache_resource
 def index_docs(
@@ -68,9 +72,6 @@ def index_docs(
             storage_context = StorageContext.from_defaults(persist_dir=index_path)
             index = load_index_from_storage(storage_context=storage_context)
-            # test the index
-            index.as_query_engine().query("What is the capital of France?")
         else:
             reader = SimpleDirectoryReader(input_files=[f"{SAVE_DIR}/{filename}"])
             docs = reader.load_data(show_progress=True)
@@ -84,12 +85,62 @@ def index_docs(
     except Exception as e:
         print(f"Error: {e}")
-        index = None
     return index
-def load_llm(model_name: str):
-    return llama_llms[model_name]
 init_session_state()
@@ -101,43 +152,41 @@ st.header("Llama Index with Custom LLM Demo")
 tab1, tab2 = st.tabs(["Config", "Chat"])
 with tab1:
-    with st.form(key="llama_form"):
-        selected_llm_name = st.selectbox(
-            label="Select a model:", options=llama_llms.keys()
-        )
-        if selected_llm_name.startswith("openai"):
-            # ask for the api key
-            if st.secrets.get("OPENAI_API_KEY") is None:
-                # st.stop()
-                st.info("OpenAI API Key not found in secrets. Please enter it below.")
-                st.secrets["OPENAI_API_KEY"] = st.text_input(
-                    "OpenAI API Key",
-                    type="password",
-                    help="Get your API key from https://platform.openai.com/account/api-keys",
-                )
-        selected_file = st.selectbox(
-            label="Choose a file to chat with: ", options=os.listdir(SAVE_DIR)
-        )
-        if st.form_submit_button(label="Submit"):
-            with st.status("Loading ...", expanded=True) as status:
                 st.write("Loading Model ...")
-                llama_llm = load_llm(selected_llm_name)
                 Settings.llm = llama_llm
                 st.write("Processing Data ...")
                 index = index_docs(selected_file)
-                if index is None:
-                    st.error("Failed to index the documents.")
-                    st.stop()
                 st.write("Finishing Up ...")
                 llama_custom = LlamaCustom(model_name=selected_llm_name, index=index)
                 st.session_state.llama_custom = llama_custom
                 status.update(label="Ready to query!", state="complete", expanded=False)
 with tab2:
     messages_container = st.container(height=300)

 import streamlit as st
 import os
 import pathlib
 from typing import List
+# local imports
+from models.llms import load_llm, integrated_llms
 from models.embeddings import hf_embed_model, openai_embed_model
 from models.llamaCustom import LlamaCustom
 from utils.chatbox import show_previous_messages, show_chat_input
+from utils.util import validate_openai_api_key
+# llama_index
 from llama_index.core import (
     SimpleDirectoryReader,
     Document,
 from llama_index.core.memory import ChatMemoryBuffer
 from llama_index.core.base.llms.types import ChatMessage
+# huggingface
+from huggingface_hub import HfApi
 SAVE_DIR = "uploaded_files"
 VECTOR_STORE_DIR = "vectorStores"
+HF_REPO_ID = "zhtet/RegBotBeta"
 # global
 Settings.embed_model = hf_embed_model
+# huggingface api
+hf_api = HfApi()
 def init_session_state():
     if "llama_custom" not in st.session_state:
         st.session_state.llama_custom = None
+    if "openai_api_key" not in st.session_state:
+        st.session_state.openai_api_key = ""
+    if "replicate_api_token" not in st.session_state:
+        st.session_state.replicate_api_token = ""
+    if "hf_token" not in st.session_state:
+        st.session_state.hf_token = ""
 # @st.cache_resource
 def index_docs(
             storage_context = StorageContext.from_defaults(persist_dir=index_path)
             index = load_index_from_storage(storage_context=storage_context)
         else:
             reader = SimpleDirectoryReader(input_files=[f"{SAVE_DIR}/{filename}"])
             docs = reader.load_data(show_progress=True)
     except Exception as e:
         print(f"Error: {e}")
+        raise e
     return index
+def check_api_key(model_name: str, source: str):
+    if source.startswith("openai"):
+        if not st.session_state.openai_api_key:
+            with st.expander("OpenAI API Key", expanded=True):
+                openai_api_key = st.text_input(
+                    label="Enter your OpenAI API Key:",
+                    type="password",
+                    help="Get your key from https://platform.openai.com/account/api-keys",
+                    value=st.session_state.openai_api_key,
+                )
+                if openai_api_key and st.spinner("Validating OpenAI API Key ..."):
+                    result = validate_openai_api_key(openai_api_key)
+                    if result["status"] == "success":
+                        st.session_state.openai_api_key = openai_api_key
+                        st.success(result["message"])
+                    else:
+                        st.error(result["message"])
+                        st.info("You can still select a different model to proceed.")
+                        st.stop()
+    elif source.startswith("replicate"):
+        if not st.session_state.replicate_api_token:
+            with st.expander("Replicate API Token", expanded=True):
+                replicate_api_token = st.text_input(
+                    label="Enter your Replicate API Token:",
+                    type="password",
+                    help="Get your key from https://replicate.ai/account",
+                    value=st.session_state.replicate_api_token,
+                )
+                # TODO: need to validate the token
+                if replicate_api_token:
+                    st.session_state.replicate_api_token = replicate_api_token
+                    # set the environment variable
+                    os.environ["REPLICATE_API_TOKEN"] = replicate_api_token
+    elif source.startswith("huggingface"):
+        if not st.session_state.hf_token:
+            with st.expander("Hugging Face Token", expanded=True):
+                hf_token = st.text_input(
+                    label="Enter your Hugging Face Token:",
+                    type="password",
+                    help="Get your key from https://huggingface.co/settings/token",
+                    value=st.session_state.hf_token,
+                )
+                if hf_token:
+                    st.session_state.hf_token = hf_token
+                    # set the environment variable
+                    os.environ["HF_TOKEN"] = hf_token
 init_session_state()
 tab1, tab2 = st.tabs(["Config", "Chat"])
 with tab1:
+    selected_llm_name = st.selectbox(
+        label="Select a model:",
+        options=[f"{key} | {value}" for key, value in integrated_llms.items()],
+    )
+    model_name, source = selected_llm_name.split("|")
+    check_api_key(model_name=model_name.strip(), source=source.strip())
+    selected_file = st.selectbox(
+        label="Choose a file to chat with: ", options=os.listdir(SAVE_DIR)
+    )
+    if st.button("Submit", key="submit", help="Submit the form"):
+        with st.status("Loading ...", expanded=True) as status:
+            try:
                 st.write("Loading Model ...")
+                llama_llm = load_llm(
+                    model_name=model_name.strip(), source=source.strip()
+                )
+                if llama_llm is None:
+                    raise ValueError("Model not found!")
                 Settings.llm = llama_llm
                 st.write("Processing Data ...")
                 index = index_docs(selected_file)
                 st.write("Finishing Up ...")
                 llama_custom = LlamaCustom(model_name=selected_llm_name, index=index)
                 st.session_state.llama_custom = llama_custom
                 status.update(label="Ready to query!", state="complete", expanded=False)
+            except Exception as e:
+                status.update(label="Error!", state="error", expanded=False)
+                st.error(f"Error: {e}")
+                st.stop()
 with tab2:
     messages_container = st.container(height=300)

utils/util.py CHANGED Viewed

@@ -1,7 +1,7 @@
 import requests
-def validate(token: str):
     api_endpoint = "https://api.openai.com/v1/chat/completions"
     api_key = token
@@ -12,4 +12,8 @@ def validate(token: str):
     data = {"model": "gpt-3.5-turbo", "messages": messages}
     response = requests.post(api_endpoint, json=data, headers=headers)
-    return response

 import requests
+from typing import Dict
+def validate_openai_api_key(token: str) -> Dict[str, str]:
     api_endpoint = "https://api.openai.com/v1/chat/completions"
     api_key = token
     data = {"model": "gpt-3.5-turbo", "messages": messages}
     response = requests.post(api_endpoint, json=data, headers=headers)
+    if response.status_code == 200:
+        return {"status": "success", "message": "API key is valid"}
+    else:
+        return {"status": "error", "message": response.json()["error"]["message"]}