Spaces:
Paused
Paused
| from llama_index import GPTPineconeIndex, LLMPredictor, ServiceContext | |
| import pinecone | |
| from langchain import OpenAI | |
| import os | |
| from llama_index.langchain_helpers.agents import IndexToolConfig, LlamaIndexTool, LlamaToolkit, create_llama_chat_agent | |
| from langchain.chains.conversation.memory import ConversationBufferMemory | |
| from llama_index import QuestionAnswerPrompt | |
| # logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) | |
| # logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout)) | |
| pinecone_key=os.environ['PINECONE_KEY'] | |
| def askQuestion(brain, question, prompt, temperature, maxTokens): | |
| temperature = float(temperature) | |
| finalQuestion = prompt+question | |
| print(finalQuestion) | |
| print(temperature, maxTokens) | |
| #print(type(temperature)) | |
| #print(type(maxTokens)) | |
| Brain_Name = brain.lower() | |
| print(Brain_Name) | |
| pinecone.init(api_key=pinecone_key, | |
| environment="us-west4-gcp") | |
| pineconeindex = pinecone.Index(Brain_Name) | |
| pineconeindex.describe_index_stats | |
| index = GPTPineconeIndex([], pinecone_index=pineconeindex) | |
| # index = GPTSimpleVectorIndex.load_from_disk('index.json') | |
| # For Q-A set this value to 4, For Content-Genration set this value b/w 7-10. | |
| data_chunks = 5 | |
| QA_PROMPT_TMPL = ( | |
| "We have provided context information below. \n" | |
| "---------------------\n" | |
| "{context_str}" | |
| "\n---------------------\n" | |
| "Given this information, please answer the question at the end of this main prompt: "+prompt+" {query_str}\n" | |
| ) | |
| QA_PROMPT = QuestionAnswerPrompt(QA_PROMPT_TMPL) | |
| query = question | |
| # relevant info from brain goes here | |
| info = ["pdf"] | |
| llm_predictor = LLMPredictor(llm=OpenAI( | |
| temperature=temperature, model_name="text-davinci-003", max_tokens=maxTokens)) | |
| service_context_gpt4 = ServiceContext.from_defaults( | |
| llm_predictor=llm_predictor) | |
| response = index.query(query, service_context=service_context_gpt4, | |
| similarity_top_k=data_chunks, response_mode="compact",text_qa_template=QA_PROMPT) | |
| print(question) | |
| print(response) | |
| if(response.response==None): | |
| return response,False | |
| memory = ConversationBufferMemory(memory_key="chat_history") | |
| memory.chat_memory.add_user_message(question) | |
| memory.chat_memory.add_ai_message(response.response) | |
| return response, memory | |
| def getBrains(name): | |
| pinecone.init(api_key=pinecone_key, | |
| environment="us-west4-gcp") | |
| active_indexes = pinecone.list_indexes() | |
| print(active_indexes) | |
| name = name.lower() | |
| if name in active_indexes: | |
| return True | |
| else: | |
| return False | |
| def runAgent(brainName,memory, question, temperature, maxTokens): | |
| if (memory == False): | |
| return "Please Initiate the Chat first.." | |
| temperature = float(temperature) | |
| pinecone.init(api_key=pinecone_key, | |
| environment="us-west4-gcp") | |
| pineconeindex = pinecone.Index(brainName) | |
| index = GPTPineconeIndex([], pinecone_index=pineconeindex) | |
| # memory = ConversationBufferMemory(memory_key="chat_history") | |
| print(memory.chat_memory) | |
| llm = OpenAI( | |
| temperature=temperature, model_name="text-davinci-003", max_tokens=maxTokens) | |
| tool_config = IndexToolConfig( | |
| index=index, | |
| name="Vector Index", | |
| description="Use this tool if you can't find the required Information in the previous message history", | |
| index_query_kwargs={"similarity_top_k": 4, "response_mode": "compact"}, | |
| tool_kwargs={"return_direct": True} | |
| ) | |
| toolkit = LlamaToolkit(index_configs=[tool_config]) | |
| agent_chain = create_llama_chat_agent( | |
| toolkit, llm, memory=memory, verbose=True) | |
| response = agent_chain.run(question) | |
| print(memory.chat_memory) | |
| return response, memory | |