import logging import sys import gradio as gr from llama_index import GPTVectorStoreIndex, SimpleDirectoryReader, ServiceContext from llama_index import OpenAIEmbedding import os from llama_index.memory import ChatMemoryBuffer import time logging.basicConfig(stream=sys.stdout, level=logging.INFO) logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout)) os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY") def load_data(): return SimpleDirectoryReader("./subfolder_0/").load_data() def build_index(documents): for document in documents: # Add a delay here time.sleep(5) index = GPTVectorStoreIndex.from_documents([document]) return index def query_index(index, similarity_top_k=3, streaming=True): memory = ChatMemoryBuffer.from_defaults(token_limit=4000) chat_engine = index.as_chat_engine( chat_mode="context", memory=memory, system_prompt=( "generate detailed answers, but dont be lengthy" ), ) return chat_engine # return the query engine instance # return the query engine instance data = load_data() index = build_index(data) query_engine = query_index(index) # initialize the query engine def get_response(text,history=None): # Use the initialized query engine to perform the query response = str(query_engine.chat(text)) return response t = gr.ChatInterface(get_response, analytics_enabled=True) t.launch(debug=True, share=True)