AMGPT3

Sleeping

App Files Files Community

AMGPT3 / app.py

achuthc1298

Update app.py

ce4e6ee verified 4 months ago

raw

history blame

3.68 kB

	import streamlit as st
	import os
	from pathlib import Path
	from llama_index.core.query_engine.router_query_engine import RouterQueryEngine
	from llama_index.core.selectors import LLMSingleSelector
	from llama_index.core.tools import QueryEngineTool
	from llama_index.core import SummaryIndex, VectorStoreIndex
	from llama_index.core import VectorStoreIndex, Settings
	from llama_index.core import SimpleDirectoryReader
	from llama_index.llms.groq import Groq
	from llama_index.embeddings.huggingface import HuggingFaceEmbedding
	from typing import Tuple
	from llama_index.core import StorageContext, load_index_from_storage
	from llama_index.core.objects import ObjectIndex
	from llama_index.core.agent import ReActAgent

	# Function to process files and create document tools
	async def create_doc_tools(document_fp: str, doc_name: str, verbose: bool = True) -> Tuple[QueryEngineTool,]:
	documents = SimpleDirectoryReader(input_files=[document_fp]).load_data()

	Settings.llm = Groq(model="mixtral-8x7b-32768")
	Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5")

	load_dir_path = f"/home/user/app/agentic_index/{doc_name}"
	storage_context = StorageContext.from_defaults(persist_dir=load_dir_path)
	vector_index = load_index_from_storage(storage_context)
	vector_query_engine = vector_index.as_query_engine()

	vector_tool = QueryEngineTool.from_defaults(
	name=f"{doc_name}_vector_query_engine_tool",
	query_engine=vector_query_engine,
	description=f"Useful for retrieving specific context from the {doc_name}.",
	)

	return vector_tool

	# Function to find and sort .tex files
	def find_tex_files(directory: str):
	tex_files = []
	for root, dirs, files in os.walk(directory):
	for file in files:
	if file.endswith(('.tex', '.txt')):
	file_path = os.path.abspath(os.path.join(root, file))
	tex_files.append(file_path)
	tex_files.sort()
	return tex_files

	# Main app function
	def main():
	st.title("PDF Question Answering with LangChain")

	# API Key input
	api_key = st.text_input("Enter your Groq API Key", type="password")

	if api_key:
	directory = '/home/user/app/rag_docs_final_review_tex_merged'
	tex_files = find_tex_files(directory)

	paper_to_tools_dict = {}
	for paper in tex_files:
	path = Path(paper)
	vector_tool = await create_doc_tools(doc_name=path.stem, document_fp=path)
	paper_to_tools_dict[path.stem] = [vector_tool]

	initial_tools = [t for paper in tex_files for t in paper_to_tools_dict[Path(paper).stem]]

	obj_index = ObjectIndex.from_objects(
	initial_tools,
	index_cls=VectorStoreIndex,
	)

	obj_retriever = obj_index.as_retriever(similarity_top_k=6)

	llm = Groq(model="mixtral-8x7b-32768")

	context = """You are an agent designed to answer scientific queries over a set of given documents.
	Please always use the tools provided to answer a question. Do not rely on prior knowledge.
	"""

	agent = ReActAgent.from_tools(
	tool_retriever=obj_retriever,
	llm=llm,
	verbose=True,
	context=context
	)

	user_prompt = st.text_input("Enter your question")

	if user_prompt:
	with st.spinner("Processing..."):
	response = agent.query(user_prompt)
	markdown_response = f"""
	### Query Response:

	{response}
	"""
	st.write(markdown_response)

	if __name__ == "__main__":
	main()