hackrx-llm-document-processor / query_handler.py
PalakMeena's picture
Upload 9 files
aaa7449 verified
Raw
History Blame Contribute Delete
7.34 kB
from transformers import pipeline
import re
import json
from typing import Dict, List
from models import StructuredQuery
class QueryHandler:
def __init__(self):
# Initialize a small local LLM for text generation
try:
self.llm = pipeline(
"text-generation",
model="microsoft/DialoGPT-medium",
max_length=512,
device=-1 # Use CPU
)
except:
# Fallback: Use rule-based approach if model loading fails
self.llm = None
print("Using rule-based approach for query processing")
def parse_query(self, query: str) -> StructuredQuery:
"""Parse natural language query into structured format"""
# Rule-based parsing (works without internet)
parsed = {
"age": self._extract_age(query),
"gender": self._extract_gender(query),
"procedure": self._extract_procedure(query),
"location": self._extract_location(query),
"policy_duration": self._extract_policy_duration(query),
"raw_query": query
}
return StructuredQuery(**parsed)
def _extract_age(self, query: str) -> int:
"""Extract age from query"""
# Look for patterns like "46M", "46-year-old", "46 years"
patterns = [
r'(\d+)M',
r'(\d+)F',
r'(\d+)-year-old',
r'(\d+)\s*years?\s*old',
r'age\s*(\d+)',
]
for pattern in patterns:
match = re.search(pattern, query, re.IGNORECASE)
if match:
return int(match.group(1))
return None
def _extract_gender(self, query: str) -> str:
"""Extract gender from query"""
if re.search(r'\b(\d+)M\b', query):
return "male"
elif re.search(r'\b(\d+)F\b', query):
return "female"
elif re.search(r'\bmale\b', query, re.IGNORECASE):
return "male"
elif re.search(r'\bfemale\b', query, re.IGNORECASE):
return "female"
return None
def _extract_procedure(self, query: str) -> str:
"""Extract medical procedure from query"""
procedures = [
"knee surgery", "cardiac", "heart surgery", "hip replacement",
"cataract", "dental", "surgery", "operation", "procedure"
]
query_lower = query.lower()
for procedure in procedures:
if procedure in query_lower:
return procedure
return None
def _extract_location(self, query: str) -> str:
"""Extract location from query"""
# Common Indian cities
cities = [
"mumbai", "delhi", "bangalore", "hyderabad", "pune", "chennai",
"kolkata", "ahmedabad", "jaipur", "lucknow", "kanpur", "nagpur"
]
query_lower = query.lower()
for city in cities:
if city in query_lower:
return city.title()
return None
def _extract_policy_duration(self, query: str) -> str:
"""Extract policy duration from query"""
patterns = [
r'(\d+)-month policy',
r'(\d+)\s*month\s*policy',
r'(\d+)-year policy',
r'(\d+)\s*year\s*policy',
]
for pattern in patterns:
match = re.search(pattern, query, re.IGNORECASE)
if match:
return match.group(0)
return None
def make_decision(self, original_query: str, structured_query: StructuredQuery,
relevant_chunks: List[Dict]) -> Dict:
"""Make decision based on query and retrieved documents"""
decision_result = {
"decision": "rejected",
"amount": 0.0,
"justification": "",
"clauses_used": []
}
if not relevant_chunks:
decision_result["justification"] = "No relevant policy information found for this query."
return decision_result
# Extract relevant text from chunks
relevant_text = " ".join([chunk["text"] for chunk in relevant_chunks])
# Age validation
if structured_query.age and (structured_query.age < 18 or structured_query.age > 65):
decision_result["justification"] = f"Age {structured_query.age} is outside policy coverage (18-65 years)."
decision_result["clauses_used"] = ["Age limit: 18-65 years"]
return decision_result
# Policy duration validation (minimum 6 months)
if structured_query.policy_duration:
if "month" in structured_query.policy_duration.lower():
months = int(re.search(r'(\d+)', structured_query.policy_duration).group(1))
if months < 6:
decision_result["justification"] = f"Policy duration {months} months is below minimum requirement of 6 months."
decision_result["clauses_used"] = ["Minimum policy duration: 6 months for coverage"]
return decision_result
# Procedure validation and amount calculation
if structured_query.procedure:
procedure_lower = structured_query.procedure.lower()
if "knee" in procedure_lower and "surgery" in procedure_lower:
if "1,00,000" in relevant_text or "100000" in relevant_text:
decision_result["decision"] = "approved"
decision_result["amount"] = 100000.0
decision_result["justification"] = "Knee surgery is covered under the policy up to Rs. 1,00,000."
decision_result["clauses_used"] = ["Knee surgeries are covered up to Rs. 1,00,000"]
elif "cardiac" in procedure_lower:
if "2,00,000" in relevant_text or "200000" in relevant_text:
decision_result["decision"] = "approved"
decision_result["amount"] = 200000.0
decision_result["justification"] = "Cardiac procedure is covered under the policy up to Rs. 2,00,000."
decision_result["clauses_used"] = ["Cardiac procedures covered up to Rs. 2,00,000"]
else:
decision_result["justification"] = f"Procedure '{structured_query.procedure}' is not explicitly covered in the policy."
# Location validation
if structured_query.location:
covered_areas = ["mumbai", "delhi", "pune", "bangalore"]
if structured_query.location.lower() not in covered_areas:
if decision_result["decision"] == "approved":
decision_result["decision"] = "rejected"
decision_result["amount"] = 0.0
decision_result["justification"] = f"Location '{structured_query.location}' is not in coverage area."
decision_result["clauses_used"] = ["Coverage areas: Mumbai, Delhi, Pune, Bangalore"]
return decision_result