embedchain / tests /evaluation /test_answer_relevancy_metric.py
rajesh1501's picture
Upload folder using huggingface_hub
a85c9b8 verified
import numpy as np
import pytest
from embedchain.config.evaluation.base import AnswerRelevanceConfig
from embedchain.evaluation.metrics import AnswerRelevance
from embedchain.utils.evaluation import EvalData, EvalMetric
@pytest.fixture
def mock_data():
return [
EvalData(
contexts=[
"This is a test context 1.",
],
question="This is a test question 1.",
answer="This is a test answer 1.",
),
EvalData(
contexts=[
"This is a test context 2-1.",
"This is a test context 2-2.",
],
question="This is a test question 2.",
answer="This is a test answer 2.",
),
]
@pytest.fixture
def mock_answer_relevance_metric(monkeypatch):
monkeypatch.setenv("OPENAI_API_KEY", "test_api_key")
metric = AnswerRelevance()
return metric
def test_answer_relevance_init(monkeypatch):
monkeypatch.setenv("OPENAI_API_KEY", "test_api_key")
metric = AnswerRelevance()
assert metric.name == EvalMetric.ANSWER_RELEVANCY.value
assert metric.config.model == "gpt-4"
assert metric.config.embedder == "text-embedding-ada-002"
assert metric.config.api_key is None
assert metric.config.num_gen_questions == 1
monkeypatch.delenv("OPENAI_API_KEY")
def test_answer_relevance_init_with_config():
metric = AnswerRelevance(config=AnswerRelevanceConfig(api_key="test_api_key"))
assert metric.name == EvalMetric.ANSWER_RELEVANCY.value
assert metric.config.model == "gpt-4"
assert metric.config.embedder == "text-embedding-ada-002"
assert metric.config.api_key == "test_api_key"
assert metric.config.num_gen_questions == 1
def test_answer_relevance_init_without_api_key(monkeypatch):
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
with pytest.raises(ValueError):
AnswerRelevance()
def test_generate_prompt(mock_answer_relevance_metric, mock_data):
prompt = mock_answer_relevance_metric._generate_prompt(mock_data[0])
assert "This is a test answer 1." in prompt
prompt = mock_answer_relevance_metric._generate_prompt(mock_data[1])
assert "This is a test answer 2." in prompt
def test_generate_questions(mock_answer_relevance_metric, mock_data, monkeypatch):
monkeypatch.setattr(
mock_answer_relevance_metric.client.chat.completions,
"create",
lambda model, messages: type(
"obj",
(object,),
{
"choices": [
type(
"obj",
(object,),
{"message": type("obj", (object,), {"content": "This is a test question response.\n"})},
)
]
},
)(),
)
prompt = mock_answer_relevance_metric._generate_prompt(mock_data[0])
questions = mock_answer_relevance_metric._generate_questions(prompt)
assert len(questions) == 1
monkeypatch.setattr(
mock_answer_relevance_metric.client.chat.completions,
"create",
lambda model, messages: type(
"obj",
(object,),
{
"choices": [
type("obj", (object,), {"message": type("obj", (object,), {"content": "question 1?\nquestion2?"})})
]
},
)(),
)
prompt = mock_answer_relevance_metric._generate_prompt(mock_data[1])
questions = mock_answer_relevance_metric._generate_questions(prompt)
assert len(questions) == 2
def test_generate_embedding(mock_answer_relevance_metric, mock_data, monkeypatch):
monkeypatch.setattr(
mock_answer_relevance_metric.client.embeddings,
"create",
lambda input, model: type("obj", (object,), {"data": [type("obj", (object,), {"embedding": [1, 2, 3]})]})(),
)
embedding = mock_answer_relevance_metric._generate_embedding("This is a test question.")
assert len(embedding) == 3
def test_compute_similarity(mock_answer_relevance_metric, mock_data):
original = np.array([1, 2, 3])
generated = np.array([[1, 2, 3], [1, 2, 3]])
similarity = mock_answer_relevance_metric._compute_similarity(original, generated)
assert len(similarity) == 2
assert similarity[0] == 1.0
assert similarity[1] == 1.0
def test_compute_score(mock_answer_relevance_metric, mock_data, monkeypatch):
monkeypatch.setattr(
mock_answer_relevance_metric.client.chat.completions,
"create",
lambda model, messages: type(
"obj",
(object,),
{
"choices": [
type(
"obj",
(object,),
{"message": type("obj", (object,), {"content": "This is a test question response.\n"})},
)
]
},
)(),
)
monkeypatch.setattr(
mock_answer_relevance_metric.client.embeddings,
"create",
lambda input, model: type("obj", (object,), {"data": [type("obj", (object,), {"embedding": [1, 2, 3]})]})(),
)
score = mock_answer_relevance_metric._compute_score(mock_data[0])
assert score == 1.0
monkeypatch.setattr(
mock_answer_relevance_metric.client.chat.completions,
"create",
lambda model, messages: type(
"obj",
(object,),
{
"choices": [
type("obj", (object,), {"message": type("obj", (object,), {"content": "question 1?\nquestion2?"})})
]
},
)(),
)
monkeypatch.setattr(
mock_answer_relevance_metric.client.embeddings,
"create",
lambda input, model: type("obj", (object,), {"data": [type("obj", (object,), {"embedding": [1, 2, 3]})]})(),
)
score = mock_answer_relevance_metric._compute_score(mock_data[1])
assert score == 1.0
def test_evaluate(mock_answer_relevance_metric, mock_data, monkeypatch):
monkeypatch.setattr(
mock_answer_relevance_metric.client.chat.completions,
"create",
lambda model, messages: type(
"obj",
(object,),
{
"choices": [
type(
"obj",
(object,),
{"message": type("obj", (object,), {"content": "This is a test question response.\n"})},
)
]
},
)(),
)
monkeypatch.setattr(
mock_answer_relevance_metric.client.embeddings,
"create",
lambda input, model: type("obj", (object,), {"data": [type("obj", (object,), {"embedding": [1, 2, 3]})]})(),
)
score = mock_answer_relevance_metric.evaluate(mock_data)
assert score == 1.0
monkeypatch.setattr(
mock_answer_relevance_metric.client.chat.completions,
"create",
lambda model, messages: type(
"obj",
(object,),
{
"choices": [
type("obj", (object,), {"message": type("obj", (object,), {"content": "question 1?\nquestion2?"})})
]
},
)(),
)
monkeypatch.setattr(
mock_answer_relevance_metric.client.embeddings,
"create",
lambda input, model: type("obj", (object,), {"data": [type("obj", (object,), {"embedding": [1, 2, 3]})]})(),
)
score = mock_answer_relevance_metric.evaluate(mock_data)
assert score == 1.0