Spaces:
No application file
No application file
import numpy as np | |
import pytest | |
from embedchain.config.evaluation.base import AnswerRelevanceConfig | |
from embedchain.evaluation.metrics import AnswerRelevance | |
from embedchain.utils.evaluation import EvalData, EvalMetric | |
def mock_data(): | |
return [ | |
EvalData( | |
contexts=[ | |
"This is a test context 1.", | |
], | |
question="This is a test question 1.", | |
answer="This is a test answer 1.", | |
), | |
EvalData( | |
contexts=[ | |
"This is a test context 2-1.", | |
"This is a test context 2-2.", | |
], | |
question="This is a test question 2.", | |
answer="This is a test answer 2.", | |
), | |
] | |
def mock_answer_relevance_metric(monkeypatch): | |
monkeypatch.setenv("OPENAI_API_KEY", "test_api_key") | |
metric = AnswerRelevance() | |
return metric | |
def test_answer_relevance_init(monkeypatch): | |
monkeypatch.setenv("OPENAI_API_KEY", "test_api_key") | |
metric = AnswerRelevance() | |
assert metric.name == EvalMetric.ANSWER_RELEVANCY.value | |
assert metric.config.model == "gpt-4" | |
assert metric.config.embedder == "text-embedding-ada-002" | |
assert metric.config.api_key is None | |
assert metric.config.num_gen_questions == 1 | |
monkeypatch.delenv("OPENAI_API_KEY") | |
def test_answer_relevance_init_with_config(): | |
metric = AnswerRelevance(config=AnswerRelevanceConfig(api_key="test_api_key")) | |
assert metric.name == EvalMetric.ANSWER_RELEVANCY.value | |
assert metric.config.model == "gpt-4" | |
assert metric.config.embedder == "text-embedding-ada-002" | |
assert metric.config.api_key == "test_api_key" | |
assert metric.config.num_gen_questions == 1 | |
def test_answer_relevance_init_without_api_key(monkeypatch): | |
monkeypatch.delenv("OPENAI_API_KEY", raising=False) | |
with pytest.raises(ValueError): | |
AnswerRelevance() | |
def test_generate_prompt(mock_answer_relevance_metric, mock_data): | |
prompt = mock_answer_relevance_metric._generate_prompt(mock_data[0]) | |
assert "This is a test answer 1." in prompt | |
prompt = mock_answer_relevance_metric._generate_prompt(mock_data[1]) | |
assert "This is a test answer 2." in prompt | |
def test_generate_questions(mock_answer_relevance_metric, mock_data, monkeypatch): | |
monkeypatch.setattr( | |
mock_answer_relevance_metric.client.chat.completions, | |
"create", | |
lambda model, messages: type( | |
"obj", | |
(object,), | |
{ | |
"choices": [ | |
type( | |
"obj", | |
(object,), | |
{"message": type("obj", (object,), {"content": "This is a test question response.\n"})}, | |
) | |
] | |
}, | |
)(), | |
) | |
prompt = mock_answer_relevance_metric._generate_prompt(mock_data[0]) | |
questions = mock_answer_relevance_metric._generate_questions(prompt) | |
assert len(questions) == 1 | |
monkeypatch.setattr( | |
mock_answer_relevance_metric.client.chat.completions, | |
"create", | |
lambda model, messages: type( | |
"obj", | |
(object,), | |
{ | |
"choices": [ | |
type("obj", (object,), {"message": type("obj", (object,), {"content": "question 1?\nquestion2?"})}) | |
] | |
}, | |
)(), | |
) | |
prompt = mock_answer_relevance_metric._generate_prompt(mock_data[1]) | |
questions = mock_answer_relevance_metric._generate_questions(prompt) | |
assert len(questions) == 2 | |
def test_generate_embedding(mock_answer_relevance_metric, mock_data, monkeypatch): | |
monkeypatch.setattr( | |
mock_answer_relevance_metric.client.embeddings, | |
"create", | |
lambda input, model: type("obj", (object,), {"data": [type("obj", (object,), {"embedding": [1, 2, 3]})]})(), | |
) | |
embedding = mock_answer_relevance_metric._generate_embedding("This is a test question.") | |
assert len(embedding) == 3 | |
def test_compute_similarity(mock_answer_relevance_metric, mock_data): | |
original = np.array([1, 2, 3]) | |
generated = np.array([[1, 2, 3], [1, 2, 3]]) | |
similarity = mock_answer_relevance_metric._compute_similarity(original, generated) | |
assert len(similarity) == 2 | |
assert similarity[0] == 1.0 | |
assert similarity[1] == 1.0 | |
def test_compute_score(mock_answer_relevance_metric, mock_data, monkeypatch): | |
monkeypatch.setattr( | |
mock_answer_relevance_metric.client.chat.completions, | |
"create", | |
lambda model, messages: type( | |
"obj", | |
(object,), | |
{ | |
"choices": [ | |
type( | |
"obj", | |
(object,), | |
{"message": type("obj", (object,), {"content": "This is a test question response.\n"})}, | |
) | |
] | |
}, | |
)(), | |
) | |
monkeypatch.setattr( | |
mock_answer_relevance_metric.client.embeddings, | |
"create", | |
lambda input, model: type("obj", (object,), {"data": [type("obj", (object,), {"embedding": [1, 2, 3]})]})(), | |
) | |
score = mock_answer_relevance_metric._compute_score(mock_data[0]) | |
assert score == 1.0 | |
monkeypatch.setattr( | |
mock_answer_relevance_metric.client.chat.completions, | |
"create", | |
lambda model, messages: type( | |
"obj", | |
(object,), | |
{ | |
"choices": [ | |
type("obj", (object,), {"message": type("obj", (object,), {"content": "question 1?\nquestion2?"})}) | |
] | |
}, | |
)(), | |
) | |
monkeypatch.setattr( | |
mock_answer_relevance_metric.client.embeddings, | |
"create", | |
lambda input, model: type("obj", (object,), {"data": [type("obj", (object,), {"embedding": [1, 2, 3]})]})(), | |
) | |
score = mock_answer_relevance_metric._compute_score(mock_data[1]) | |
assert score == 1.0 | |
def test_evaluate(mock_answer_relevance_metric, mock_data, monkeypatch): | |
monkeypatch.setattr( | |
mock_answer_relevance_metric.client.chat.completions, | |
"create", | |
lambda model, messages: type( | |
"obj", | |
(object,), | |
{ | |
"choices": [ | |
type( | |
"obj", | |
(object,), | |
{"message": type("obj", (object,), {"content": "This is a test question response.\n"})}, | |
) | |
] | |
}, | |
)(), | |
) | |
monkeypatch.setattr( | |
mock_answer_relevance_metric.client.embeddings, | |
"create", | |
lambda input, model: type("obj", (object,), {"data": [type("obj", (object,), {"embedding": [1, 2, 3]})]})(), | |
) | |
score = mock_answer_relevance_metric.evaluate(mock_data) | |
assert score == 1.0 | |
monkeypatch.setattr( | |
mock_answer_relevance_metric.client.chat.completions, | |
"create", | |
lambda model, messages: type( | |
"obj", | |
(object,), | |
{ | |
"choices": [ | |
type("obj", (object,), {"message": type("obj", (object,), {"content": "question 1?\nquestion2?"})}) | |
] | |
}, | |
)(), | |
) | |
monkeypatch.setattr( | |
mock_answer_relevance_metric.client.embeddings, | |
"create", | |
lambda input, model: type("obj", (object,), {"data": [type("obj", (object,), {"embedding": [1, 2, 3]})]})(), | |
) | |
score = mock_answer_relevance_metric.evaluate(mock_data) | |
assert score == 1.0 | |