Spaces:
Runtime error
Runtime error
from llama_index.core import SimpleDirectoryReader | |
from llama_index.core.node_parser import SentenceSplitter | |
from llama_index.core import Settings | |
from llama_index.llms.openai import OpenAI | |
from llama_index.embeddings.openai import OpenAIEmbedding | |
from llama_index.core import SummaryIndex, VectorStoreIndex | |
from llama_index.core.tools import QueryEngineTool | |
from llama_index.core.query_engine.router_query_engine import RouterQueryEngine | |
from llama_index.core.selectors import LLMSingleSelector | |
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, SummaryIndex | |
from llama_index.core.node_parser import SentenceSplitter | |
from llama_index.core.tools import FunctionTool, QueryEngineTool | |
from llama_index.core.vector_stores import MetadataFilters, FilterCondition | |
from typing import List, Optional | |
def get_doc_tools( | |
file_path: str, | |
name: str, | |
) -> str: | |
"""Get vector query and summary query tools from a document.""" | |
# load documents | |
documents = SimpleDirectoryReader(input_files=[file_path]).load_data() | |
splitter = SentenceSplitter(chunk_size=1024) | |
nodes = splitter.get_nodes_from_documents(documents) | |
vector_index = VectorStoreIndex(nodes) | |
def vector_query( | |
query: str, | |
page_numbers: Optional[List[str]] = None | |
) -> str: | |
"""Use to answer questions over a given paper. | |
Useful if you have specific questions over the paper. | |
Always leave page_numbers as None UNLESS there is a specific page you want to search for. | |
Args: | |
query (str): the string query to be embedded. | |
page_numbers (Optional[List[str]]): Filter by set of pages. Leave as NONE | |
if we want to perform a vector search | |
over all pages. Otherwise, filter by the set of specified pages. | |
""" | |
page_numbers = page_numbers or [] | |
metadata_dicts = [ | |
{"key": "page_label", "value": p} for p in page_numbers | |
] | |
query_engine = vector_index.as_query_engine( | |
similarity_top_k=2, | |
filters=MetadataFilters.from_dicts( | |
metadata_dicts, | |
condition=FilterCondition.OR | |
) | |
) | |
response = query_engine.query(query) | |
return response | |
vector_query_tool = FunctionTool.from_defaults( | |
name=f"vector_tool_{name}", | |
fn=vector_query | |
) | |
summary_index = SummaryIndex(nodes) | |
summary_query_engine = summary_index.as_query_engine( | |
response_mode="tree_summarize", | |
use_async=True, | |
) | |
summary_tool = QueryEngineTool.from_defaults( | |
name=f"summary_tool_{name}", | |
query_engine=summary_query_engine, | |
description=( | |
f"Useful for summarization questions related to {name}" | |
), | |
) | |
return vector_query_tool, summary_tool |