momenaca's picture
feature/major backend update with agent
8ca00e0
raw
history blame
4.2 kB
from langchain_core.pydantic_v1 import BaseModel, Field
from langchain.prompts.prompt import PromptTemplate
from langchain.output_parsers import PydanticOutputParser
from typing import Literal
from operator import itemgetter
import json
from langchain_core.exceptions import OutputParserException
class ESRSAnalysis(BaseModel):
"""Analyzing the user query to get ESRS type, sources and intent"""
esrs_type: Literal[
"ESRS 1",
"ESRS 2",
"ESRS E1",
"ESRS E2",
"ESRS E3",
"ESRS E4",
"ESRS E5",
"ESRS S1",
"ESRS S2",
"ESRS S3",
"ESRS S4",
"ESRS G1",
"no_intent",
] = Field(
description="""The ESRS type that the user query refers to.""",
)
def make_esrs_intent_chain(llm):
prompt_template = """
Please analyze the question and indicate if it refers to a specific ESRS.
Follow these definitions in order to choose the appropriate ESRS :
- ESRS 1 is for questions about general principles for preparing and presenting sustainability information in accordance with CSRD
- ESRS 2 is for questions about general disclosures related to sustainability reporting, including governance, strategy, impact, risk, opportunity management, and metrics and targets
- ESRS E1 is for questions about climate change, global warming, GES and energy
- ESRS E2 is for questions about air, water, and soil pollution, and dangerous substances
- ESRS E3 is for questions about water and marine resources
- ESRS E4 is for questions about biodiversity, nature, wildlife and ecosystems
- ESRS E5 is for questions about resource use and circular economy
- ESRS S1 is for questions about workforce and labor issues, job security, fair pay, and health and safety
- ESRS S2 is for questions about workers in the value chain, workers' treatment
- ESRS S3 is for questions about affected communities, impact on local communities
- ESRS S4 is for questions about consumers and end users, customer privacy, safety, and inclusion
- ESRS G1 is for questions about governance, risk management, internal control, and business conduct
- no_intent is for questions that do not fit into any of the above categories
Keep in mind these guidelines :
- Some questions could be related to multiple ESRS. In such case, choose the most appropriate one.
The output needs to respect a JSON format with 'esrs_type' as the key and the appropriate ESRS as the value.
Question: '{query}'
Answer:
"""
parser = PydanticOutputParser(pydantic_object=ESRSAnalysis, method="json_mode")
prompt = PromptTemplate(template=prompt_template, input_variables=["query"])
chain = {"query": itemgetter("query")} | prompt | llm | parser
return chain
def make_esrs_intent_node(llm):
def intent_message(state):
query = state["query"]
categorization_chain = make_esrs_intent_chain(llm)
output = {
"esrs_type": [categorization_chain.invoke({"query": query}).esrs_type]
}
return output
return intent_message
# intent: str = Field(
# enum=[
# "Specific topic",
# "Implementation reco",
# "KPI extraction",
# ],
# description="""
# Categorize the user query in one of the following categories,
# Examples:
# - Specific topic: "What are the specificities of ESRS E1 ?"
# - Implementation reco: "How should I compute my scope 1 reduction target ?"
# - KPI extraction: "When will the CSRD be mandatory for my small French company ?"
# """,
# )
# sources: str = Field(
# enum=["ESRS", "External"],
# description="""
# Given a user question choose which documents would be most relevant for answering their question,
# - ESRS is for questions about a specific environmental, social or governance topic, as well as CSRD's general principles and disclosures
# - External is for questions about how to implement the CSRD, or general questions about CSRD's context
# """,
# )