import utils import os import openai from llama_index import SimpleDirectoryReader from llama_index import Document from llama_index import VectorStoreIndex from llama_index import ServiceContext from llama_index.llms import OpenAI from llama_index.embeddings import HuggingFaceEmbedding from trulens_eval import Tru from llama_index.memory import ChatMemoryBuffer from utils import get_prebuilt_trulens_recorder import time openai.api_key = utils.get_openai_api_key() documents = SimpleDirectoryReader( input_files=["./raw_documents/HI_Knowledge_Base.pdf"] ).load_data() document = Document(text="\n\n".join([doc.text for doc in documents])) ### gpt-4-1106-preview ### gpt-3.5-turbo-1106 / gpt-3.5-turbo print("Initializing GPT 3.5 ..") llm = OpenAI(model="gpt-3.5-turbo-1106", temperature=0.1) print("Initializing bge-small-en-v1.5 embedding model ..") embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5") print("Creating vector store ..") service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model) index = VectorStoreIndex.from_documents([document], service_context=service_context) if False: query_engine = index.as_query_engine(streaming=True) else: memory = ChatMemoryBuffer.from_defaults(token_limit=15000) # chat_engine = index.as_query_engine(streaming=True) chat_engine = index.as_chat_engine( chat_mode="context", memory=memory ) while True: input_str = input("[User]: ") if input_str == "END": break # res = chat_engine.query(input_str) res = chat_engine.stream_chat(input_str) bot_response = "" print("[Bot]: ", end="") for s in res.response_gen: bot_response += s print(s, end="") print("")