Spaces:
Sleeping
Sleeping
import os | |
import ragas | |
import pandas as pd | |
from datasets import Dataset, load_dataset | |
from langchain.chat_models import AzureChatOpenAI, ChatOpenAI | |
from langchain.embeddings import AzureOpenAIEmbeddings, OpenAIEmbeddings | |
from ragas.llms import LangchainLLMWrapper | |
from ragas import evaluate | |
from ragas.metrics.critique import harmfulness | |
from ragas.metrics import ( | |
context_precision, | |
answer_relevancy, | |
faithfulness, | |
context_recall, | |
context_relevancy) | |
from ragas.metrics._answer_correctness import answer_correctness | |
from ragas.metrics._answer_similarity import answer_similarity | |
# Ragas Evaluation | |
def ragas_eval(metrics, openai_api_key, df): | |
os.environ["OPENAI_API_KEY"] = openai_api_key | |
llm = ChatOpenAI() | |
embeddings = OpenAIEmbeddings() | |
df.rename(columns={"context": "contexts", "ground_truths": "ground_truth"}, inplace=True) | |
df["contexts"] = df["contexts"].apply(lambda x: [x]) | |
eval_data = Dataset.from_pandas(df) | |
metric_mappings = { | |
"answer_correctness": answer_correctness, | |
"answer_relevancy": answer_relevancy, | |
"faithfulness": faithfulness, | |
"context_precision": context_precision, | |
"context_recall": context_recall, | |
"context_relevancy": context_relevancy, | |
"answer_similarity": answer_similarity, | |
} | |
for metric in metrics: | |
if metric in metric_mappings: | |
result = evaluate(eval_data, metrics=[metric_mappings[metric]], llm=llm, embeddings=embeddings, raise_exceptions=False) | |
df2 = result.to_pandas() | |
new_columns = [col for col in df2.columns if col not in df.columns] | |
# Append only the new columns to final_df | |
for col in new_columns: | |
df[col] = df2[col] | |
return df |