jansahayak / agents /profiling_agent.py
Anmol4521's picture
Upload 95 files
388aa42 verified
"""
User Profiling Agent
Extracts structured user information for eligibility matching
"""
import json
from langchain_groq import ChatGroq
from langchain_core.messages import HumanMessage, SystemMessage
from prompts.profiling_prompt import PROFILING_PROMPT
from config import GROQ_API_KEY
def get_llm():
"""Initialize Groq LLM"""
if not GROQ_API_KEY:
raise ValueError("GROQ_API_KEY not found in environment variables")
return ChatGroq(
api_key=GROQ_API_KEY,
model="llama-3.3-70b-versatile",
temperature=0.1 # Low temperature for structured extraction
)
def extract_json_from_text(text: str) -> dict:
"""Extract JSON from text that might contain markdown or extra content"""
import re
# Try direct JSON parse first
try:
return json.loads(text.strip())
except json.JSONDecodeError:
pass
# Try to extract JSON from markdown code blocks
json_pattern = r'```(?:json)?\s*(\{.*?\})\s*```'
matches = re.findall(json_pattern, text, re.DOTALL)
if matches:
try:
return json.loads(matches[0])
except json.JSONDecodeError:
pass
# Try to find complete JSON object (improved pattern)
# Match from first { to last }
start_idx = text.find('{')
end_idx = text.rfind('}')
if start_idx != -1 and end_idx != -1 and end_idx > start_idx:
try:
potential_json = text[start_idx:end_idx+1]
return json.loads(potential_json)
except json.JSONDecodeError:
pass
# Fallback: try to find any JSON-like structure
json_pattern = r'\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}'
matches = re.findall(json_pattern, text, re.DOTALL)
for match in matches:
try:
return json.loads(match)
except json.JSONDecodeError:
continue
return None
def run_profiling_agent(user_input: str) -> dict:
"""
Extracts structured profile information from user input
Args:
user_input: Raw user input text
Returns:
Structured profile dictionary
"""
try:
llm = get_llm()
prompt = PROFILING_PROMPT.format(user_input=user_input)
messages = [
SystemMessage(content="You are an expert user profiling agent. Return ONLY a valid JSON object, nothing else."),
HumanMessage(content=prompt)
]
response = llm.invoke(messages)
print(f"\n🤖 LLM Response (first 200 chars): {response.content[:200]}...")
# Extract JSON from response
profile_data = extract_json_from_text(response.content)
if profile_data:
# Normalize keys to lowercase with underscores
normalized_profile = {}
for key, value in profile_data.items():
normalized_key = key.lower().replace(' ', '_').replace('-', '_')
normalized_profile[normalized_key] = value
print(f"✅ Profile extracted: {list(normalized_profile.keys())}")
return normalized_profile
else:
# Fallback: Create basic profile from user input
print("⚠️ Could not parse JSON, creating basic profile")
return {
"user_input": user_input,
"raw_profile": response.content,
"note": "Profile extraction incomplete. Using raw input."
}
except Exception as e:
print(f"❌ Profiling error: {str(e)}")
return {
"error": str(e),
"user_input": user_input
}
def validate_profile(profile_data: dict) -> bool:
"""
Validates that profile has minimum required information
Args:
profile_data: Profile dictionary
Returns:
True if valid, False otherwise
"""
required_fields = ['age', 'state', 'education']
for field in required_fields:
if field not in profile_data or profile_data[field] == "Not Provided":
return False
return True
if __name__ == "__main__":
# Test the agent
test_input = """
I am a 25-year-old male from Maharashtra. I completed my Bachelor's in Engineering.
My family income is around 3 lakh per year. I belong to the OBC category.
I am currently unemployed and looking for government job opportunities.
"""
result = run_profiling_agent(test_input)
print(json.dumps(result, indent=2))