learn-ai

Runtime error

App Files Files Community

dh-mc commited on Aug 4, 2023

Commit

e182c41

1 Parent(s): 328b268

completed gradio app for qa

Browse files

Files changed (7) hide show

app.py +23 -54
app_modules/init.py +78 -0
app_modules/llm_inference.py +2 -2
app_modules/llm_qa_chain.py +1 -1
app_modules/presets.py +0 -91
app_modules/utils.py +0 -8
test.py +43 -28

app.py CHANGED Viewed

@@ -6,67 +6,36 @@ from timeit import default_timer as timer
 import gradio as gr
 from anyio.from_thread import start_blocking_portal
-from langchain.embeddings import HuggingFaceInstructEmbeddings
-from langchain.vectorstores.chroma import Chroma
-from langchain.vectorstores.faiss import FAISS
-from app_modules.presets import *
-from app_modules.qa_chain import QAChain
-from app_modules.utils import *
-# Constants
-init_settings()
-# https://github.com/huggingface/transformers/issues/17611
-os.environ["CURL_CA_BUNDLE"] = ""
-hf_embeddings_device_type, hf_pipeline_device_type = get_device_types()
-print(f"hf_embeddings_device_type: {hf_embeddings_device_type}")
-print(f"hf_pipeline_device_type: {hf_pipeline_device_type}")
-hf_embeddings_model_name = (
-    os.environ.get("HF_EMBEDDINGS_MODEL_NAME") or "hkunlp/instructor-xl"
-)
-n_threds = int(os.environ.get("NUMBER_OF_CPU_CORES") or "4")
-index_path = os.environ.get("FAISS_INDEX_PATH") or os.environ.get("CHROMADB_INDEX_PATH")
-using_faiss = os.environ.get("FAISS_INDEX_PATH") is not None
-llm_model_type = os.environ.get("LLM_MODEL_TYPE")
 chat_history_enabled = os.environ.get("CHAT_HISTORY_ENABLED") == "true"
 show_param_settings = os.environ.get("SHOW_PARAM_SETTINGS") == "true"
 share_gradio_app = os.environ.get("SHARE_GRADIO_APP") == "true"
-streaming_enabled = True  # llm_model_type in ["openai", "llamacpp"]
-start = timer()
-embeddings = HuggingFaceInstructEmbeddings(
-    model_name=hf_embeddings_model_name,
-    model_kwargs={"device": hf_embeddings_device_type},
 )
-end = timer()
-print(f"Completed in {end - start:.3f}s")
-start = timer()
-print(f"Load index from {index_path} with {'FAISS' if using_faiss else 'Chroma'}")
-if not os.path.isdir(index_path):
-    raise ValueError(f"{index_path} does not exist!")
-elif using_faiss:
-    vectorstore = FAISS.load_local(index_path, embeddings)
-else:
-    vectorstore = Chroma(embedding_function=embeddings, persist_directory=index_path)
-end = timer()
-print(f"Completed in {end - start:.3f}s")
-start = timer()
-qa_chain = QAChain(vectorstore, llm_model_type)
-qa_chain.init(n_threds=n_threds, hf_pipeline_device_type=hf_pipeline_device_type)
-end = timer()
-print(f"Completed in {end - start:.3f}s")
 def qa(chatbot):
@@ -77,7 +46,7 @@ def qa(chatbot):
     def task(question, chat_history):
         start = timer()
-        ret = qa_chain.call(
             {"question": question, "chat_history": chat_history}, None, q
         )
         end = timer()
@@ -135,7 +104,7 @@ def qa(chatbot):
 with open("assets/custom.css", "r", encoding="utf-8") as f:
     customCSS = f.read()
-with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
     user_question = gr.State("")
     with gr.Row():
         gr.HTML(title)
@@ -219,5 +188,5 @@ with gr.Blocks(css=customCSS, theme=small_and_beautiful_theme) as demo:
         api_name="reset",
     )
-demo.title = "Chat with PCI DSS v4"
-demo.queue(concurrency_count=1).launch(share=share_gradio_app)

 import gradio as gr
 from anyio.from_thread import start_blocking_portal
+from app_modules.init import app_init
+from app_modules.utils import print_llm_response
+qa_chain = app_init()
 chat_history_enabled = os.environ.get("CHAT_HISTORY_ENABLED") == "true"
 show_param_settings = os.environ.get("SHOW_PARAM_SETTINGS") == "true"
 share_gradio_app = os.environ.get("SHARE_GRADIO_APP") == "true"
+using_openai = os.environ.get("LLM_MODEL_TYPE") == "openai"
+model = (
+    "OpenAI GPT-4" if using_openai else os.environ.get("HUGGINGFACE_MODEL_NAME_OR_PATH")
 )
+href = "https://openai.com/gpt-4" if using_openai else f"https://huggingface.co/{model}"
+title = """<h1 align="left" style="min-width:200px; margin-top:0;"> Chat with AI Books </h1>"""
+description_top = f"""\
+<div align="left">
+<p> Currently Running: <a href="{href}">{model}</a></p>
+</div>
+"""
+description = """\
+<div align="center" style="margin:16px 0">
+The demo is built on <a href="https://github.com/hwchase17/langchain">LangChain</a>.
+</div>
+"""
+CONCURRENT_COUNT = 100
 def qa(chatbot):
     def task(question, chat_history):
         start = timer()
+        ret = qa_chain.call_chain(
             {"question": question, "chat_history": chat_history}, None, q
         )
         end = timer()
 with open("assets/custom.css", "r", encoding="utf-8") as f:
     customCSS = f.read()
+with gr.Blocks(css=customCSS) as demo:
     user_question = gr.State("")
     with gr.Row():
         gr.HTML(title)
         api_name="reset",
     )
+demo.title = "Chat with AI Books"
+demo.queue(concurrency_count=CONCURRENT_COUNT).launch(share=share_gradio_app)

app_modules/init.py ADDED Viewed

	@@ -0,0 +1,78 @@

+"""Main entrypoint for the app."""
+import os
+from timeit import default_timer as timer
+from typing import List, Optional
+from dotenv import find_dotenv, load_dotenv
+from langchain.embeddings import HuggingFaceInstructEmbeddings
+from langchain.vectorstores.chroma import Chroma
+from langchain.vectorstores.faiss import FAISS
+from app_modules.llm_loader import LLMLoader
+from app_modules.llm_qa_chain import QAChain
+from app_modules.utils import get_device_types, init_settings
+found_dotenv = find_dotenv(".env")
+if len(found_dotenv) == 0:
+    found_dotenv = find_dotenv(".env.example")
+print(f"loading env vars from: {found_dotenv}")
+load_dotenv(found_dotenv, override=False)
+# Constants
+init_settings()
+def app_init():
+    # https://github.com/huggingface/transformers/issues/17611
+    os.environ["CURL_CA_BUNDLE"] = ""
+    hf_embeddings_device_type, hf_pipeline_device_type = get_device_types()
+    print(f"hf_embeddings_device_type: {hf_embeddings_device_type}")
+    print(f"hf_pipeline_device_type: {hf_pipeline_device_type}")
+    hf_embeddings_model_name = (
+        os.environ.get("HF_EMBEDDINGS_MODEL_NAME") or "hkunlp/instructor-xl"
+    )
+    n_threds = int(os.environ.get("NUMBER_OF_CPU_CORES") or "4")
+    index_path = os.environ.get("FAISS_INDEX_PATH") or os.environ.get(
+        "CHROMADB_INDEX_PATH"
+    )
+    using_faiss = os.environ.get("FAISS_INDEX_PATH") is not None
+    llm_model_type = os.environ.get("LLM_MODEL_TYPE")
+    start = timer()
+    embeddings = HuggingFaceInstructEmbeddings(
+        model_name=hf_embeddings_model_name,
+        model_kwargs={"device": hf_embeddings_device_type},
+    )
+    end = timer()
+    print(f"Completed in {end - start:.3f}s")
+    start = timer()
+    print(f"Load index from {index_path} with {'FAISS' if using_faiss else 'Chroma'}")
+    if not os.path.isdir(index_path):
+        raise ValueError(f"{index_path} does not exist!")
+    elif using_faiss:
+        vectorstore = FAISS.load_local(index_path, embeddings)
+    else:
+        vectorstore = Chroma(
+            embedding_function=embeddings, persist_directory=index_path
+        )
+    end = timer()
+    print(f"Completed in {end - start:.3f}s")
+    start = timer()
+    llm_loader = LLMLoader(llm_model_type)
+    llm_loader.init(n_threds=n_threds, hf_pipeline_device_type=hf_pipeline_device_type)
+    qa_chain = QAChain(vectorstore, llm_loader)
+    end = timer()
+    print(f"Completed in {end - start:.3f}s")
+    return qa_chain

app_modules/llm_inference.py CHANGED Viewed

@@ -8,8 +8,8 @@ from threading import Thread
 from langchain.callbacks.tracers import LangChainTracer
 from langchain.chains.base import Chain
-from app_modules.llm_loader import *
-from app_modules.utils import *
 class LLMInference(metaclass=abc.ABCMeta):

 from langchain.callbacks.tracers import LangChainTracer
 from langchain.chains.base import Chain
+from app_modules.llm_loader import LLMLoader, TextIteratorStreamer
+from app_modules.utils import remove_extra_spaces
 class LLMInference(metaclass=abc.ABCMeta):

app_modules/llm_qa_chain.py CHANGED Viewed

@@ -9,7 +9,7 @@ class QAChain(LLMInference):
     vectorstore: VectorStore
     def __init__(self, vectorstore, llm_loader: int = 2048):
-        super.__init__(llm_loader)
         self.vectorstore = vectorstore
     def create_chain(self) -> Chain:

     vectorstore: VectorStore
     def __init__(self, vectorstore, llm_loader: int = 2048):
+        super().__init__(llm_loader)
         self.vectorstore = vectorstore
     def create_chain(self) -> Chain:

app_modules/presets.py DELETED Viewed

@@ -1,91 +0,0 @@
-# -*- coding:utf-8 -*-
-import os
-import gradio as gr
-from app_modules.utils import *
-using_openai = os.environ.get("LLM_MODEL_TYPE") == "openai"
-model = (
-    "OpenAI GPT-4" if using_openai else os.environ.get("HUGGINGFACE_MODEL_NAME_OR_PATH")
-)
-href = "https://openai.com/gpt-4" if using_openai else f"https://huggingface.co/{model}"
-title = """<h1 align="left" style="min-width:200px; margin-top:0;"> Chat with AI Books </h1>"""
-description_top = f"""\
-<div align="left">
-<p> Currently Running: <a href="{href}">{model}</a></p>
-</div>
-"""
-description = """\
-<div align="center" style="margin:16px 0">
-The demo is built on <a href="https://github.com/hwchase17/langchain">LangChain</a>.
-</div>
-"""
-CONCURRENT_COUNT = 100
-ALREADY_CONVERTED_MARK = "<!-- ALREADY CONVERTED BY PARSER. -->"
-small_and_beautiful_theme = gr.themes.Soft(
-    primary_hue=gr.themes.Color(
-        c50="#02C160",
-        c100="rgba(2, 193, 96, 0.2)",
-        c200="#02C160",
-        c300="rgba(2, 193, 96, 0.32)",
-        c400="rgba(2, 193, 96, 0.32)",
-        c500="rgba(2, 193, 96, 1.0)",
-        c600="rgba(2, 193, 96, 1.0)",
-        c700="rgba(2, 193, 96, 0.32)",
-        c800="rgba(2, 193, 96, 0.32)",
-        c900="#02C160",
-        c950="#02C160",
-    ),
-    secondary_hue=gr.themes.Color(
-        c50="#576b95",
-        c100="#576b95",
-        c200="#576b95",
-        c300="#576b95",
-        c400="#576b95",
-        c500="#576b95",
-        c600="#576b95",
-        c700="#576b95",
-        c800="#576b95",
-        c900="#576b95",
-        c950="#576b95",
-    ),
-    neutral_hue=gr.themes.Color(
-        name="gray",
-        c50="#f9fafb",
-        c100="#f3f4f6",
-        c200="#e5e7eb",
-        c300="#d1d5db",
-        c400="#B2B2B2",
-        c500="#808080",
-        c600="#636363",
-        c700="#515151",
-        c800="#393939",
-        c900="#272727",
-        c950="#171717",
-    ),
-    radius_size=gr.themes.sizes.radius_sm,
-).set(
-    button_primary_background_fill="#06AE56",
-    button_primary_background_fill_dark="#06AE56",
-    button_primary_background_fill_hover="#07C863",
-    button_primary_border_color="#06AE56",
-    button_primary_border_color_dark="#06AE56",
-    button_primary_text_color="#FFFFFF",
-    button_primary_text_color_dark="#FFFFFF",
-    button_secondary_background_fill="#F2F2F2",
-    button_secondary_background_fill_dark="#2B2B2B",
-    button_secondary_text_color="#393939",
-    button_secondary_text_color_dark="#FFFFFF",
-    # background_fill_primary="#F7F7F7",
-    # background_fill_primary_dark="#1F1F1F",
-    block_title_text_color="*primary_500",
-    block_title_background_fill="*primary_100",
-    input_background_fill="#F6F6F6",
-)

app_modules/utils.py CHANGED Viewed

@@ -9,16 +9,8 @@ from pathlib import Path
 import requests
 import torch
-from dotenv import find_dotenv, load_dotenv
 from tqdm import tqdm
-found_dotenv = find_dotenv(".env")
-if len(found_dotenv) == 0:
-    found_dotenv = find_dotenv(".env.example")
-print(f"loading env vars from: {found_dotenv}")
-load_dotenv(found_dotenv, override=False)
-# print(f"loaded env vars: {os.environ}")
 class LogRecord(logging.LogRecord):
     def getMessage(self):

 import requests
 import torch
 from tqdm import tqdm
 class LogRecord(logging.LogRecord):
     def getMessage(self):

test.py CHANGED Viewed

@@ -7,36 +7,21 @@ from timeit import default_timer as timer
 from langchain.callbacks.base import BaseCallbackHandler
 from langchain.schema import HumanMessage
 from app_modules.llm_loader import LLMLoader
-from app_modules.utils import *
-user_question = "What's the capital city of Malaysia?"
-n_threds = int(os.environ.get("NUMBER_OF_CPU_CORES") or "4")
-hf_embeddings_device_type, hf_pipeline_device_type = get_device_types()
-print(f"hf_embeddings_device_type: {hf_embeddings_device_type}")
-print(f"hf_pipeline_device_type: {hf_pipeline_device_type}")
-class MyCustomHandler(BaseCallbackHandler):
-    def __init__(self):
-        self.reset()
-    def reset(self):
-        self.texts = []
-    def get_standalone_question(self) -> str:
-        return self.texts[0].strip() if len(self.texts) > 0 else None
-    def on_llm_end(self, response, **kwargs) -> None:
-        """Run when chain ends running."""
-        print("\non_llm_end - response:")
-        print(response)
-        self.texts.append(response.generations[0][0].text)
-class TestLLMLoader(unittest.TestCase):
-    def run_test_case(self, llm_model_type, query):
         llm_loader = LLMLoader(llm_model_type)
         start = timer()
         llm_loader.init(
@@ -53,16 +38,46 @@ class TestLLMLoader(unittest.TestCase):
         print(result)
     def test_openai(self):
-        self.run_test_case("openai", user_question)
     def test_llamacpp(self):
-        self.run_test_case("llamacpp", user_question)
     def test_gpt4all_j(self):
-        self.run_test_case("gpt4all-j", user_question)
     def test_huggingface(self):
-        self.run_test_case("huggingface", user_question)
 if __name__ == "__main__":

 from langchain.callbacks.base import BaseCallbackHandler
 from langchain.schema import HumanMessage
+from app_modules.init import app_init
 from app_modules.llm_loader import LLMLoader
+from app_modules.utils import get_device_types, print_llm_response
+class TestLLMLoader:  # (unittest.TestCase):
+    question = "What's the capital city of Malaysia?"
+    def run_test_case(self, llm_model_type, query):
+        n_threds = int(os.environ.get("NUMBER_OF_CPU_CORES") or "4")
+        hf_embeddings_device_type, hf_pipeline_device_type = get_device_types()
+        print(f"hf_embeddings_device_type: {hf_embeddings_device_type}")
+        print(f"hf_pipeline_device_type: {hf_pipeline_device_type}")
         llm_loader = LLMLoader(llm_model_type)
         start = timer()
         llm_loader.init(
         print(result)
     def test_openai(self):
+        self.run_test_case("openai", self.question)
+    def test_llamacpp(self):
+        self.run_test_case("llamacpp", self.question)
+    def test_gpt4all_j(self):
+        self.run_test_case("gpt4all-j", self.question)
+    def test_huggingface(self):
+        self.run_test_case("huggingface", self.question)
+class TestQAChain(unittest.TestCase):
+    qa_chain: any
+    question = "What's deep learning?"
+    def run_test_case(self, llm_model_type, query):
+        start = timer()
+        os.environ["LLM_MODEL_TYPE"] = llm_model_type
+        qa_chain = app_init()
+        end = timer()
+        print(f"App initialized in {end - start:.3f}s")
+        inputs = {"question": query, "chat_history": []}
+        result = qa_chain.call_chain(inputs, None)
+        end2 = timer()
+        print(f"Inference completed in {end2 - end:.3f}s")
+        print_llm_response(result)
+    def test_openai(self):
+        self.run_test_case("openai", self.question)
     def test_llamacpp(self):
+        self.run_test_case("llamacpp", self.question)
     def test_gpt4all_j(self):
+        self.run_test_case("gpt4all-j", self.question)
     def test_huggingface(self):
+        self.run_test_case("huggingface", self.question)
 if __name__ == "__main__":