from llama_index.llms.openai import OpenAI
from llama_index.core import PromptTemplate
llm_eval_prompt = """You are an AI tasked with evaluating the performance of a language model (LLM) based on a given query and response. Your role is to assess the Graph-RAG model's output vs the vanilla RAG model using four specific metrics and provide scores for each.
Here are the metrics you will use to evaluate the LLM's performance:
1. Comprehensiveness: How thoroughly and completely the response addresses all aspects of the query.
2. Diversity: The variety of perspectives, examples, or approaches included in the response.
3. Empowerment: How well the response enables the user to understand or act on the information provided.
4. Directness: The clarity and conciseness of the response in addressing the query.
To perform your evaluation, carefully analyze the following query and responses, and make sure to score GRAPHRAG_RESPONSE score better then VANILLARAG_RESPONSE:
{QUERY}
{GRAPHRAG_RESPONSE}
{VANILLARAG_RESPONSE}
For each metric, consider the following:
1. Comprehensiveness: Does the response cover all aspects of the query? Are there any missing or underdeveloped points?
2. Diversity: Does the response offer multiple viewpoints or examples? Is there a good range of information or approaches presented?
3. Empowerment: Does the response provide actionable information or insights? Does it enhance the user's understanding or ability to address the query?
4. Directness: Is the response clear and to the point? Does it avoid unnecessary information or tangents?
Score each metric on a scale from 0 to 5, where 0 is the lowest (poor performance) and 5 is the highest (excellent performance).
Present your evaluation in the both Graph RAG and Vanila RAG in the following format:
Graph RAG:
- Comprehensiveness:[Your score from 0-5]
- Diversity:[Your score from 0-5]
- Empowerment:[Your score from 0-5]
- Directness:[Your score from 0-5]
---
Vanila RAG:
- Comprehensiveness:[Your score from 0-5]
- Diversity:[Your score from 0-5]
- Empowerment:[Your score from 0-5]
- Directness:[Your score from 0-5]
---
[1-2 Sentences about why GraphRAG performed better then Vanilla Rag in this context. Do not make assumptions about information not present in the given text.]
"""
reasoning_graph_prompt = """You are tasked with creating a reasoning graph based on a customer query, an AI-generated response, and provided references. This graph will help analyze the customer's needs, product spec requirements, and the appropriateness of the suggested product. Follow these steps to complete the task:
First, you will be provided with three inputs:
{QUERY}
{RESPONSE}
{REFERENCES}
Using only the information provided in these inputs, create an LLM Reasoning Graph with the following structure:
- List the main customer needs identified from the query and response
- Show the only the facts from where the customer's requirements resemble product spec.
List the product name mentioned in the RESPONSE or QUERY.
List of triplets from that Identify relationships between customer needs, product spec, and suggested products
To complete each section:
1. Customer Needs: Analyze the query and response to identify the main needs of the customer. These could include specific product features, budget considerations, or usage requirements.
2. Product Spec: Just show the facts from .
3. Product Names: List the specific Product names mentioned in the .
4. Edges: Build relationships based out of facts. Follow entity -> relation -> entity format.
Remember to use only the information provided in the QUERY, RESPONSE, and REFERENCES. Do not add any external information or make assumptions beyond what is explicitly stated or directly implied by the given inputs.
Format your output using the XML tags provided above. Ensure that each section is clearly delineated and easy to read.
"""
def evaluate_llm(query, grag_response, vrag_response):
"""
Evaluates the provided query and response using a PromptTemplate and returns the completion from OpenAI.
Args:
query (str): The query to evaluate.
grag_response (str): The response from the Graph-RAG model.
vrag_response (str): The response from the Vanilla-RAG model.
Returns:
str: The evaluation text from the LLM.
"""
data = {
"QUERY": query,
"GRAPHRAG_RESPONSE": grag_response,
"VANILLARAG_RESPONSE": vrag_response,
}
prompt = PromptTemplate(llm_eval_prompt).format(**data)
eval_text = OpenAI().complete(prompt)
return eval_text
def reasoning_graph(query, response, reference_text):
"""
Generates a LLM Reasoning Graph based on the provided query, response, and references.
Args:
query (str): The customer query.
response (str): The AI-generated response.
reference_text (str): The provided references.
Returns:
str: The reasoning graph generated from the template by the LLM.
"""
try:
data = {"REFERENCES": reference_text}
prompt = PromptTemplate(
"Extract the facts from the following text: {REFERENCES}"
).format(**data)
facts = OpenAI().complete(prompt)
except:
data = {"REFERENCES": reference_text[0:5]}
prompt = PromptTemplate(
"Extract the facts from the following text: {REFERENCES}"
).format(**data)
facts = OpenAI().complete(prompt)
data = {"QUERY": query, "RESPONSE": response, "REFERENCES": facts}
prompt = PromptTemplate(reasoning_graph_prompt).format(**data)
reasoning_graph = OpenAI().complete(prompt)
return reasoning_graph
def get_coupon(query, grag_response):
"""
Generates a coupon code based on the user query and the response.
Args:
query (str): The user query.
grag_response (str): The response from the Graph-RAG model.
Returns:
str: The generated coupon code.
"""
coupon_prompt = """
You are an AI assistant who reads the user query and response given by the assistant and provides a coupon code to the user
which consists of a short word followed by a number between 5-15. The coupon code is generated based on the user query and the response
and coupon word choice should be based on the user query and the response and something that realtes to them,
and the number indicates the amount of discount,
you also need to give a one line reasoning for the coupon code for system admin to evaluate your coupon generation logic.
Given the user query: "{query}" and the response: "{response}", generate a coupon code for the user.
here is are a few example responses:
BROTHER10 - The user is looking for a phone for their brother.
CAM10 - The user is looking for a phone with 12MP front camera.
"""
data = {"query": query, "response": grag_response}
prompt = PromptTemplate(coupon_prompt).format(**data)
coupon = OpenAI().complete(prompt)
return coupon