llm-qa-bench

Sleeping

File size: 3,520 Bytes

"""Main entrypoint for the app."""

import os
from timeit import default_timer as timer
from typing import List, Optional

from dotenv import find_dotenv, load_dotenv
from langchain_community.embeddings import HuggingFaceInstructEmbeddings
from langchain.vectorstores.chroma import Chroma
from langchain.vectorstores.faiss import FAISS

from app_modules.llm_loader import LLMLoader
from app_modules.utils import get_device_types, init_settings

found_dotenv = find_dotenv(".env")

if len(found_dotenv) == 0:
    found_dotenv = find_dotenv(".env.example")
print(f"loading env vars from: {found_dotenv}")
load_dotenv(found_dotenv, override=False)

# Constants
init_settings()

if os.environ.get("LANGCHAIN_DEBUG") == "true":
    from langchain.globals import set_debug

    set_debug(True)

from app_modules.llm_qa_chain import QAChain
from app_modules.llm_chat_chain import ChatChain
import nltk


def app_init():
    # https://github.com/huggingface/transformers/issues/17611
    os.environ["CURL_CA_BUNDLE"] = ""

    nltk.download("punkt")

    hf_embeddings_device_type, hf_pipeline_device_type = get_device_types()
    print(f"hf_embeddings_device_type: {hf_embeddings_device_type}")
    print(f"hf_pipeline_device_type: {hf_pipeline_device_type}")

    hf_embeddings_model_name = (
        os.environ.get("HF_EMBEDDINGS_MODEL_NAME") or "hkunlp/instructor-xl"
    )

    n_threds = int(os.environ.get("NUMBER_OF_CPU_CORES") or "4")
    index_path = os.environ.get("FAISS_INDEX_PATH") or os.environ.get(
        "CHROMADB_INDEX_PATH"
    )
    using_faiss = os.environ.get("FAISS_INDEX_PATH") is not None
    llm_model_type = os.environ.get("LLM_MODEL_TYPE")

    qa_with_rag = os.getenv("QA_WITH_RAG", "true").lower() == "true"
    print(f"qa_with_rag: {qa_with_rag}")

    retrieve_from_questions_file = os.getenv("RETRIEVER_TYPE") == "questions_file"
    print(f"retrieve_from_questions_file: {retrieve_from_questions_file}", flush=True)

    if qa_with_rag and not retrieve_from_questions_file:
        print(f"hf_embeddings_model_name: {hf_embeddings_model_name}")
        start = timer()
        embeddings = HuggingFaceInstructEmbeddings(
            model_name=hf_embeddings_model_name,
            model_kwargs={"device": hf_embeddings_device_type},
        )
        end = timer()

        print(f"Completed in {end - start:.3f}s")

    vectorstore = None
    if qa_with_rag and not retrieve_from_questions_file:
        start = timer()

        print(
            f"Load index from {index_path} with {'FAISS' if using_faiss else 'Chroma'}"
        )

        if not os.path.isdir(index_path):
            raise ValueError(f"{index_path} does not exist!")
        elif using_faiss:
            vectorstore = FAISS.load_local(
                index_path, embeddings, allow_dangerous_deserialization=True
            )
        else:
            vectorstore = Chroma(
                embedding_function=embeddings, persist_directory=index_path
            )

        end = timer()

        print(f"Completed in {end - start:.3f}s")

    start = timer()
    llm_loader = LLMLoader(llm_model_type)
    llm_loader.init(n_threds=n_threds, hf_pipeline_device_type=hf_pipeline_device_type)
    if qa_with_rag:
        qa_chain = QAChain(vectorstore, llm_loader)
    else:
        qa_chain = ChatChain(llm_loader)
    end = timer()
    print(f"Completed in {end - start:.3f}s")

    return llm_loader, qa_chain