File size: 4,686 Bytes
e13d87a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 |
from typing import Dict, List, Optional, Any
from llama_index.query_engine import RetrieverQueryEngine
from llama_index.retrievers import VectorIndexRetriever
from llama_index.response_synthesizers import ResponseMode
from llama_index.llms import HuggingFaceLLM
from llama_index import ServiceContext, QueryBundle
from llama_index.prompts import PromptTemplate
class CSVQueryEngine:
"""Query engine for CSV data with multi-file support."""
def __init__(self, index_manager, llm, response_mode="compact"):
"""Initialize with index manager and language model."""
self.index_manager = index_manager
self.llm = llm
self.service_context = ServiceContext.from_defaults(llm=llm)
self.response_mode = response_mode
# Set up custom prompts
self._setup_prompts()
def _setup_prompts(self):
"""Set up custom prompts for CSV querying."""
self.csv_query_prompt = PromptTemplate(
"""You are an AI assistant specialized in analyzing CSV data.
Answer the following query using the provided CSV information.
If calculations are needed, explain your process.
CSV Context: {context_str}
Query: {query_str}
Answer:"""
)
def query(self, query_text: str) -> Dict[str, Any]:
"""Process a natural language query across CSV files."""
# Find relevant CSV files
relevant_csvs = self.index_manager.find_relevant_csvs(query_text)
if not relevant_csvs:
return {
"answer": "No relevant CSV files found for your query.",
"sources": []
}
# Prepare response
responses = []
sources = []
# Query each relevant CSV
for csv_id in relevant_csvs:
index_info = self.index_manager.indexes.get(csv_id)
if not index_info:
continue
index = index_info["index"]
metadata = index_info["metadata"]
# Create retriever for this index
retriever = VectorIndexRetriever(
index=index,
similarity_top_k=5
)
# Create query engine
query_engine = RetrieverQueryEngine.from_args(
retriever=retriever,
service_context=self.service_context,
text_qa_template=self.csv_query_prompt,
response_mode=self.response_mode
)
# Execute query
response = query_engine.query(query_text)
responses.append({
"csv_id": csv_id,
"filename": metadata["filename"],
"response": response
})
# Collect source information
if hasattr(response, "source_nodes"):
for node in response.source_nodes:
sources.append({
"csv": metadata["filename"],
"content": node.node.get_content()[:100] + "..."
})
# Combine responses if multiple CSVs were queried
if len(responses) > 1:
combined_response = self._combine_responses(query_text, responses)
return {
"answer": combined_response,
"sources": sources
}
elif len(responses) == 1:
return {
"answer": responses[0]["response"],
"sources": sources
}
else:
return {
"answer": "Failed to process query with the available CSV data.",
"sources": []
}
def _combine_responses(self, query_text: str, responses: List[Dict]) -> str:
"""Combine responses from multiple CSV files."""
# Create a prompt for combining multiple CSV responses
combine_prompt = f"""
I need to answer this question: {query_text}
I've analyzed multiple CSV files and found these results:
{chr(10).join([f"From {r['filename']}: {str(r['response'])}" for r in responses])}
Please provide a unified answer that combines these insights.
"""
# Use the LLM to generate a combined response
combined_response = self.llm.complete(combine_prompt)
return combined_response.text
|