from llama_index import LLMPredictor, PromptHelper, ServiceContext, StorageContext, OpenAIEmbedding, set_global_service_context, load_index_from_storage from llama_index.langchain_helpers.text_splitter import TokenTextSplitter from llama_index.node_parser import SimpleNodeParser from llama_index.callbacks import CallbackManager, LlamaDebugHandler, TokenCountingHandler from llama_index.vector_stores import PineconeVectorStore from llama_index.storage.docstore import MongoDocumentStore from llama_index.storage.index_store.mongo_index_store import MongoIndexStore from langchain.chat_models import ChatOpenAI import tiktoken import openai import pinecone import sys import os import logging _pinecode_index_name = os.environ['PINECONE_INDEX_NAME'] _pinecone_api_key = os.environ['PINECONE_API_KEY'] _pinecone_environment = os.environ['PINECONE_ENVIRONMENT'] _openai_api_key = os.environ['OPENAI_API_KEY'] _mongo_uri = os.environ['MONGO_URI'] _model = 'gpt-3.5-turbo-16k' # configure logging logging.basicConfig( stream=sys.stdout, level=logging.DEBUG, format="%(asctime)s [%(levelname)s] [%(filename)s:%(lineno)d] %(message)s", ) logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout)) # initialize pinecone and openai openai.api_key = _openai_api_key pinecone.init(api_key=_pinecone_api_key, environment=_pinecone_environment) # Creating a Pinecone index if _pinecode_index_name not in pinecone.list_indexes(): pinecone.create_index( _pinecode_index_name, dimension=1536, metric="euclidean", pod_type="p1" ) pinecode_index = pinecone.Index(_pinecode_index_name) # construct vector store vector_store = PineconeVectorStore(pinecone_index=pinecode_index) # construct document store docstore = MongoDocumentStore.from_uri(uri=_mongo_uri) # construct index store index_store = MongoIndexStore.from_uri(uri=_mongo_uri) # construct callback manager token_counter = TokenCountingHandler( tokenizer=tiktoken.encoding_for_model(_model).encode, verbose=False # set to true to see usage printed to the console ) llama_debug = LlamaDebugHandler(print_trace_on_end=True) callback_manager = CallbackManager([ llama_debug, token_counter, ]) # construct embed model embed_model = OpenAIEmbedding() # construct prompt helper context_window = 4096 num_outputs = 512 chunk_overlap_ratio = 0.1 chunk_size_limit = 600 prompt_helper = PromptHelper( context_window, num_outputs, chunk_overlap_ratio, chunk_size_limit) # construct llm predictor llm = ChatOpenAI(temperature=0.7, model=_model, max_tokens=num_outputs) llm_predictor = LLMPredictor(llm=llm) # construct node parser node_parser = SimpleNodeParser( text_splitter=TokenTextSplitter(chunk_size=1024, chunk_overlap=20)) # create storage context logging.info("Constructing service context") service_context = ServiceContext.from_defaults( llm_predictor=llm_predictor, embed_model=embed_model, node_parser=node_parser, prompt_helper=prompt_helper, callback_manager=callback_manager, ) logging.info("Constructing storage context") storage_context = StorageContext.from_defaults( vector_store=vector_store, docstore=docstore, index_store=index_store, ) logging.info("Setting global service context") set_global_service_context(service_context) def get_index(): logging.info("Loading index") index = load_index_from_storage(storage_context) logging.info("Loaded index") return index