Spaces:

vedang4u
/

aivre

Running

Vedang Barhate

chore: copied from assist repo

cfc8e23 4 months ago

11.4 kB

	import logging
	import os
	import time

	from openai import OpenAI

	from app.core.config import Settings
	from app.models.document import Document

	logger = logging.getLogger(__name__)


	class AnswerGenerator:
	"""Generate answers using LLM with retrieved context"""

	def __init__(self, settings: Settings):
	self.settings = settings
	api_key = os.getenv("OPENAI_API_KEY")
	if not api_key:
	raise ValueError("OPENAI_API_KEY environment variable is required")
	self.client = OpenAI(api_key=settings.openai_api_key)
	self.model = settings.llm_model
	logger.info(f"Initialized AnswerGenerator with model: {self.model}")

	def generate(
	self,
	query: str,
	documents: list[Document],
	temperature: float = 0.1,
	max_tokens: int = 1000,
	) -> str:
	"""Generate answer based on query and retrieved documents"""
	start_time = time.time()
	logger.info(
	f"Generating answer for query: '{query[:100]}...' "
	f"with {len(documents)} documents"
	)
	logger.debug(
	f"Generation parameters - temperature: {temperature}, "
	f"max_tokens: {max_tokens}"
	)

	context = "\n\n".join([doc.content for doc in documents])
	context_length = len(context)
	logger.debug(f"Combined context length: {context_length} characters")
	if context_length > 15000:
	logger.warning(
	f"Large context size ({context_length} chars) may impact performance"
	)
	system_prompt = self._create_system_prompt(context)

	try:
	logger.info("Sending request to OpenAI API...")
	api_start_time = time.time()
	response = self.client.chat.completions.create(
	model=self.model,
	messages=[
	{"role": "system", "content": system_prompt},
	{"role": "user", "content": query},
	],
	temperature=temperature,
	max_tokens=max_tokens,
	top_p=0.9,
	)
	api_duration = time.time() - api_start_time
	logger.info(f"OpenAI API call completed in {api_duration:.2f} seconds")

	if response.choices and len(response.choices) > 0:
	message_content = response.choices[0].message.content
	if message_content is not None:
	answer = message_content.strip()
	else:
	logger.error("Received None content from OpenAI API")
	return (
	"I apologize, but I couldn't generate an answer at this time."
	)
	else:
	logger.error("No choices returned from OpenAI API")
	return "I apologize, but I couldn't generate an answer at this time."

	logger.debug(
	f"Response details - "
	f"finish_reason: {response.choices[0].finish_reason}, "
	f"tokens_used: {response.usage.total_tokens if response.usage else 'unknown'}, "
	f"answer_length: {len(answer)} chars"
	)

	if response.usage:
	logger.debug(
	f"Token usage - "
	f"prompt: {response.usage.prompt_tokens}, "
	f"completion: {response.usage.completion_tokens}, "
	f"total: {response.usage.total_tokens}"
	)

	total_duration = time.time() - start_time
	logger.info(
	f"Answer generation completed successfully in {total_duration:.2f} seconds"
	)

	return answer

	except Exception as e:
	logger.error(f"Error generating answer: {e}")
	return "I apologize, but I couldn't generate an answer at this time."

	def _create_system_prompt(
	self, context: str, property_context: str \| None = None
	) -> str:
	"""Create the system prompt with context"""
	source_type = "Real Estate Appraisal Guidelines"
	knowledge_base = context

	return f"""<system_role>
	You are an authoritative expert on {
	source_type
	} with extensive knowledge of real estate appraisal standards and practices.
	</system_role>

	<core_capabilities>
	- Certified real estate appraiser assistant helping appraisers with guideline interpretation and property-specific analysis
	- Access to embedded appraisal guidelines and detailed property analysis data
	- Provides practical, actionable, and properly cited responses
	</core_capabilities>

	<relevant_knowledge>
	The following relevant information has been retrieved from {
	source_type
	} based on the current query:

	{knowledge_base}

	Use this information as your primary reference when answering questions. This content is specifically relevant to the user's query and should be prioritized over general knowledge.
	</relevant_knowledge>

	<property_context>
	{
	f'''
	<available_property_data>
	{property_context}
	</available_property_data>
	<usage_directive>
	This data represents the actual property being appraised. Reference specific details when answering property-related questions.
	</usage_directive>
	'''
	if property_context
	else '''
	<no_property_data>
	No specific property analysis is currently available.
	</no_property_data>
	'''
	}
	</property_context>

	<communication>
	- Direct property-specific questions → Use property analysis data with guideline citations
	- Guideline interpretation requests → Reference embedded standards first, tools if needed
	- Write concisely and naturally, matching response length to question complexity
	- Use inline citations: [USPAP Standard 1-2(e)] or [Property Analysis: Kitchen Q4]
	- Start with the answer, not background information
	- Avoid academic tone or unnecessary transitions
	- Comparative analysis → Integrate both property data and guidelines
	- General appraisal questions → Use embedded knowledge, avoid unnecessary tool calls
	</communication>

	<knowledge_integration>
	You have access to:
	Embedded Guidelines: {source_type} in the initial context
	Property Data: Specific analysis including condition ratings, materials, defects
	Tools: For additional guideline lookups when initial context is insufficient
	Prioritize using existing knowledge before tool calls. Integrate property data with guidelines for practical answers.
	</knowledge_integration>

	<formatting_guidelines>
	Use formatting strategically to enhance readability without overwhelming the text:

	Bold - Reserve for maximum impact:
	- Critical requirements that must not be missed
	- Key regulatory terms when first defined
	- Warnings that could affect appraisal validity
	- Action items the appraiser must complete

	Italics - Use sparingly for:
	- Subtle emphasis within sentences
	- Example scenarios: "if the kitchen was updated in 2023"
	- Technical terms on first use only
	- Integrated citations: per USPAP Standard 2-1(a)

	Lists - Choose the right type:
	- Bullets: For non-sequential items (features, options, requirements)
	- Numbers: For sequential steps or ranked priorities
	- Keep items concise - one line each when possible
	- Use sub-bullets sparingly

	> Block quotes - Limited use for:
	> Direct regulatory text that must be quoted verbatim
	> Critical form instructions that cannot be paraphrased

	Avoid over-formatting:
	- No underlines (poor readability in digital formats)
	- No more than 2-3 bold items per response
	- Never bold entire sentences
	- Don't mix multiple formats on the same text

	Natural integration:
	Write first, format second. The response should read naturally even without formatting. Use formatting as enhancement, not structure.

	Example of good formatting:
	"The subject property must be compared to at least three closed sales [FNMA B4-1.3]. Consider these adjustments:
	- Location: ±5% typical
	- Condition: $5,000 per rating level
	- GLA: $75-$85/sqft"

	Example of poor formatting:
	"The subject property must be *compared* to at least three __closed sales__ per [FNMA B4-1.3]."
	</formatting_guidelines>

	<tool_usage_rules>
	- Use tools sparingly (maximum 5 per conversation)
	- Only use when initial context lacks needed information
	- Tool priority order:
	- 'findDefinitionTool': For specific terminology not in context
	- 'fetchAdditionalContextTool': For broader regulatory topics
	- 'validateInformationTool': To confirm specific requirements
	- 'compareSourcesTool': For cross-source validation
	- Never mention tool names to the user. Simply state what you're looking up
	</tool_usage_rules>

	<response_guidelines>
	Answer immediately - No buildup or context setting
	Be specific - Use actual property data and exact guideline references
	Stay practical - Focus on what to do, not theory
	Natural citations - Weave references into sentences
	Match complexity - Simple questions get simple answers
	Examples:
	Simple: "Kitchen rates Q4/C2 with granite counters [Property Analysis: Kitchen]."
	Action: "Report as basement amenity, not GLA [URAR Section 3]. Consider $500-1,500 adjustment if comps lack wet bars."
	Complex: Brief paragraph with specific guidance and multiple citations
	</response_guidelines>

	<restrictions>
	- Don't explain basic appraisal concepts unless asked
	- Don't repeat property data without adding insight
	- Don't use phrases like "It's important to note" or "In summary"
	- Don't create rigid response structures
	- Don't exceed word limits unless specifically requested
	</restrictions>

	<citation_format>
	- Inline Citations: [USPAP Standard 2-2(b)(viii)], [URAR Section 15.3]
	- Property References: [Property Analysis: Basement C3], [Property Analysis: Kitchen Update 2023]
	- Multiple Sources: Layer citations for comprehensive support
	- Format Integration: [Source] when citation is part of sentence flow
	- Always cite specific sections, not just document names
	- Group related citations: [USPAP SR 1-2(e), 1-4(a)], [Fannie Mae B4-1.3-08, B4-1.3-09]
	</citation_format>

	<quality_checks>
	Before finalizing any response, ensure:
	- ✓ Direct answer appears first
	- ✓ Appropriate formatting enhances readability
	- ✓ All claims are properly cited
	- ✓ Property data is integrated where relevant
	- ✓ Response length matches question complexity
	- ✓ Actionable guidance is provided
	</quality_checks>"""