Spaces:
Sleeping
Sleeping
import streamlit as st | |
import os | |
from pathlib import Path | |
from llama_index.core.query_engine.router_query_engine import RouterQueryEngine | |
from llama_index.core.selectors import LLMSingleSelector | |
from llama_index.core.tools import QueryEngineTool | |
from llama_index.core import SummaryIndex, VectorStoreIndex | |
from llama_index.core import VectorStoreIndex, Settings | |
from llama_index.core import SimpleDirectoryReader | |
from llama_index.llms.groq import Groq | |
from llama_index.embeddings.huggingface import HuggingFaceEmbedding | |
from typing import Tuple | |
from llama_index.core import StorageContext, load_index_from_storage | |
from llama_index.core.objects import ObjectIndex | |
from llama_index.core.agent import ReActAgent | |
# Function to process files and create document tools | |
async def create_doc_tools(document_fp: str, doc_name: str, verbose: bool = True) -> Tuple[QueryEngineTool,]: | |
documents = SimpleDirectoryReader(input_files=[document_fp]).load_data() | |
Settings.llm = Groq(model="mixtral-8x7b-32768") | |
Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-large-en-v1.5") | |
load_dir_path = f"/home/user/app/agentic_index/{doc_name}" | |
storage_context = StorageContext.from_defaults(persist_dir=load_dir_path) | |
vector_index = load_index_from_storage(storage_context) | |
vector_query_engine = vector_index.as_query_engine() | |
vector_tool = QueryEngineTool.from_defaults( | |
name=f"{doc_name}_vector_query_engine_tool", | |
query_engine=vector_query_engine, | |
description=f"Useful for retrieving specific context from the {doc_name}.", | |
) | |
return vector_tool | |
# Function to find and sort .tex files | |
def find_tex_files(directory: str): | |
tex_files = [] | |
for root, dirs, files in os.walk(directory): | |
for file in files: | |
if file.endswith(('.tex', '.txt')): | |
file_path = os.path.abspath(os.path.join(root, file)) | |
tex_files.append(file_path) | |
tex_files.sort() | |
return tex_files | |
# Main app function | |
def main(): | |
st.title("PDF Question Answering with LangChain") | |
# API Key input | |
api_key = st.text_input("Enter your Groq API Key", type="password") | |
if api_key: | |
directory = '/home/user/app/rag_docs_final_review_tex_merged' | |
tex_files = find_tex_files(directory) | |
paper_to_tools_dict = {} | |
for paper in tex_files: | |
path = Path(paper) | |
vector_tool = await create_doc_tools(doc_name=path.stem, document_fp=path) | |
paper_to_tools_dict[path.stem] = [vector_tool] | |
initial_tools = [t for paper in tex_files for t in paper_to_tools_dict[Path(paper).stem]] | |
obj_index = ObjectIndex.from_objects( | |
initial_tools, | |
index_cls=VectorStoreIndex, | |
) | |
obj_retriever = obj_index.as_retriever(similarity_top_k=6) | |
llm = Groq(model="mixtral-8x7b-32768") | |
context = """You are an agent designed to answer scientific queries over a set of given documents. | |
Please always use the tools provided to answer a question. Do not rely on prior knowledge. | |
""" | |
agent = ReActAgent.from_tools( | |
tool_retriever=obj_retriever, | |
llm=llm, | |
verbose=True, | |
context=context | |
) | |
user_prompt = st.text_input("Enter your question") | |
if user_prompt: | |
with st.spinner("Processing..."): | |
response = agent.query(user_prompt) | |
markdown_response = f""" | |
### Query Response: | |
{response} | |
""" | |
st.write(markdown_response) | |
if __name__ == "__main__": | |
main() | |