RAG-Evaluator1 / ragas_code.py
sango07's picture
Update ragas_code.py
fa1332c verified
raw
history blame
1.78 kB
import os
import ragas
import pandas as pd
from datasets import Dataset, load_dataset
from langchain.chat_models import AzureChatOpenAI, ChatOpenAI
from langchain.embeddings import AzureOpenAIEmbeddings, OpenAIEmbeddings
from ragas.llms import LangchainLLMWrapper
from ragas import evaluate
from ragas.metrics.critique import harmfulness
from ragas.metrics import (
context_precision,
answer_relevancy,
faithfulness,
context_recall,
context_relevancy)
from ragas.metrics._answer_correctness import answer_correctness
from ragas.metrics._answer_similarity import answer_similarity
# Ragas Evaluation
def ragas_eval(metrics, openai_api_key, df):
os.environ["OPENAI_API_KEY"] = openai_api_key
llm = ChatOpenAI()
embeddings = OpenAIEmbeddings()
df.rename(columns={"context": "contexts", "ground_truths": "ground_truth"}, inplace=True)
df["contexts"] = df["contexts"].apply(lambda x: [x])
eval_data = Dataset.from_pandas(df)
metric_mappings = {
"answer_correctness": answer_correctness,
"answer_relevancy": answer_relevancy,
"faithfulness": faithfulness,
"context_precision": context_precision,
"context_recall": context_recall,
"context_relevancy": context_relevancy,
"answer_similarity": answer_similarity,
}
for metric in metrics:
if metric in metric_mappings:
result = evaluate(eval_data, metrics=[metric_mappings[metric]], llm=llm, embeddings=embeddings, raise_exceptions=False)
df2 = result.to_pandas()
new_columns = [col for col in df2.columns if col not in df.columns]
# Append only the new columns to final_df
for col in new_columns:
df[col] = df2[col]
return df