Spaces:

Anmol4521
/

jansahayak

Running

App Files Files Community

jansahayak / agents /profiling_agent.py

Anmol4521

Upload 95 files

388aa42 verified 10 days ago

raw

history blame contribute delete

4.7 kB

	"""
	User Profiling Agent
	Extracts structured user information for eligibility matching
	"""

	import json
	from langchain_groq import ChatGroq
	from langchain_core.messages import HumanMessage, SystemMessage
	from prompts.profiling_prompt import PROFILING_PROMPT
	from config import GROQ_API_KEY


	def get_llm():
	"""Initialize Groq LLM"""
	if not GROQ_API_KEY:
	raise ValueError("GROQ_API_KEY not found in environment variables")

	return ChatGroq(
	api_key=GROQ_API_KEY,
	model="llama-3.3-70b-versatile",
	temperature=0.1 # Low temperature for structured extraction
	)


	def extract_json_from_text(text: str) -> dict:
	"""Extract JSON from text that might contain markdown or extra content"""
	import re

	# Try direct JSON parse first
	try:
	return json.loads(text.strip())
	except json.JSONDecodeError:
	pass

	# Try to extract JSON from markdown code blocks
	json_pattern = r'```(?:json)?\s(\{.?\})\s*```'
	matches = re.findall(json_pattern, text, re.DOTALL)
	if matches:
	try:
	return json.loads(matches[0])
	except json.JSONDecodeError:
	pass

	# Try to find complete JSON object (improved pattern)
	# Match from first { to last }
	start_idx = text.find('{')
	end_idx = text.rfind('}')

	if start_idx != -1 and end_idx != -1 and end_idx > start_idx:
	try:
	potential_json = text[start_idx:end_idx+1]
	return json.loads(potential_json)
	except json.JSONDecodeError:
	pass

	# Fallback: try to find any JSON-like structure
	json_pattern = r'\{[^{}](?:\{[^{}]\}[^{}])\}'
	matches = re.findall(json_pattern, text, re.DOTALL)
	for match in matches:
	try:
	return json.loads(match)
	except json.JSONDecodeError:
	continue

	return None


	def run_profiling_agent(user_input: str) -> dict:
	"""
	Extracts structured profile information from user input

	Args:
	user_input: Raw user input text

	Returns:
	Structured profile dictionary
	"""
	try:
	llm = get_llm()

	prompt = PROFILING_PROMPT.format(user_input=user_input)

	messages = [
	SystemMessage(content="You are an expert user profiling agent. Return ONLY a valid JSON object, nothing else."),
	HumanMessage(content=prompt)
	]

	response = llm.invoke(messages)

	print(f"\n🤖 LLM Response (first 200 chars): {response.content[:200]}...")

	# Extract JSON from response
	profile_data = extract_json_from_text(response.content)

	if profile_data:
	# Normalize keys to lowercase with underscores
	normalized_profile = {}
	for key, value in profile_data.items():
	normalized_key = key.lower().replace(' ', '_').replace('-', '_')
	normalized_profile[normalized_key] = value

	print(f"✅ Profile extracted: {list(normalized_profile.keys())}")
	return normalized_profile
	else:
	# Fallback: Create basic profile from user input
	print("⚠️ Could not parse JSON, creating basic profile")
	return {
	"user_input": user_input,
	"raw_profile": response.content,
	"note": "Profile extraction incomplete. Using raw input."
	}

	except Exception as e:
	print(f"❌ Profiling error: {str(e)}")
	return {
	"error": str(e),
	"user_input": user_input
	}


	def validate_profile(profile_data: dict) -> bool:
	"""
	Validates that profile has minimum required information

	Args:
	profile_data: Profile dictionary

	Returns:
	True if valid, False otherwise
	"""
	required_fields = ['age', 'state', 'education']

	for field in required_fields:
	if field not in profile_data or profile_data[field] == "Not Provided":
	return False

	return True


	if __name__ == "__main__":
	# Test the agent
	test_input = """
	I am a 25-year-old male from Maharashtra. I completed my Bachelor's in Engineering.
	My family income is around 3 lakh per year. I belong to the OBC category.
	I am currently unemployed and looking for government job opportunities.
	"""

	result = run_profiling_agent(test_input)
	print(json.dumps(result, indent=2))