Spaces:
No application file
No application file
import numpy as np | |
import pytest | |
from embedchain.config.evaluation.base import GroundednessConfig | |
from embedchain.evaluation.metrics import Groundedness | |
from embedchain.utils.evaluation import EvalData, EvalMetric | |
def mock_data(): | |
return [ | |
EvalData( | |
contexts=[ | |
"This is a test context 1.", | |
], | |
question="This is a test question 1.", | |
answer="This is a test answer 1.", | |
), | |
EvalData( | |
contexts=[ | |
"This is a test context 2-1.", | |
"This is a test context 2-2.", | |
], | |
question="This is a test question 2.", | |
answer="This is a test answer 2.", | |
), | |
] | |
def mock_groundedness_metric(monkeypatch): | |
monkeypatch.setenv("OPENAI_API_KEY", "test_api_key") | |
metric = Groundedness() | |
return metric | |
def test_groundedness_init(monkeypatch): | |
monkeypatch.setenv("OPENAI_API_KEY", "test_api_key") | |
metric = Groundedness() | |
assert metric.name == EvalMetric.GROUNDEDNESS.value | |
assert metric.config.model == "gpt-4" | |
assert metric.config.api_key is None | |
monkeypatch.delenv("OPENAI_API_KEY") | |
def test_groundedness_init_with_config(): | |
metric = Groundedness(config=GroundednessConfig(api_key="test_api_key")) | |
assert metric.name == EvalMetric.GROUNDEDNESS.value | |
assert metric.config.model == "gpt-4" | |
assert metric.config.api_key == "test_api_key" | |
def test_groundedness_init_without_api_key(monkeypatch): | |
monkeypatch.delenv("OPENAI_API_KEY", raising=False) | |
with pytest.raises(ValueError): | |
Groundedness() | |
def test_generate_answer_claim_prompt(mock_groundedness_metric, mock_data): | |
prompt = mock_groundedness_metric._generate_answer_claim_prompt(data=mock_data[0]) | |
assert "This is a test question 1." in prompt | |
assert "This is a test answer 1." in prompt | |
def test_get_claim_statements(mock_groundedness_metric, mock_data, monkeypatch): | |
monkeypatch.setattr( | |
mock_groundedness_metric.client.chat.completions, | |
"create", | |
lambda *args, **kwargs: type( | |
"obj", | |
(object,), | |
{ | |
"choices": [ | |
type( | |
"obj", | |
(object,), | |
{ | |
"message": type( | |
"obj", | |
(object,), | |
{ | |
"content": """This is a test answer 1. | |
This is a test answer 2. | |
This is a test answer 3.""" | |
}, | |
) | |
}, | |
) | |
] | |
}, | |
)(), | |
) | |
prompt = mock_groundedness_metric._generate_answer_claim_prompt(data=mock_data[0]) | |
claim_statements = mock_groundedness_metric._get_claim_statements(prompt=prompt) | |
assert len(claim_statements) == 3 | |
assert "This is a test answer 1." in claim_statements | |
def test_generate_claim_inference_prompt(mock_groundedness_metric, mock_data): | |
prompt = mock_groundedness_metric._generate_answer_claim_prompt(data=mock_data[0]) | |
claim_statements = [ | |
"This is a test claim 1.", | |
"This is a test claim 2.", | |
] | |
prompt = mock_groundedness_metric._generate_claim_inference_prompt( | |
data=mock_data[0], claim_statements=claim_statements | |
) | |
assert "This is a test context 1." in prompt | |
assert "This is a test claim 1." in prompt | |
def test_get_claim_verdict_scores(mock_groundedness_metric, mock_data, monkeypatch): | |
monkeypatch.setattr( | |
mock_groundedness_metric.client.chat.completions, | |
"create", | |
lambda *args, **kwargs: type( | |
"obj", | |
(object,), | |
{"choices": [type("obj", (object,), {"message": type("obj", (object,), {"content": "1\n0\n-1"})})]}, | |
)(), | |
) | |
prompt = mock_groundedness_metric._generate_answer_claim_prompt(data=mock_data[0]) | |
claim_statements = mock_groundedness_metric._get_claim_statements(prompt=prompt) | |
prompt = mock_groundedness_metric._generate_claim_inference_prompt( | |
data=mock_data[0], claim_statements=claim_statements | |
) | |
claim_verdict_scores = mock_groundedness_metric._get_claim_verdict_scores(prompt=prompt) | |
assert len(claim_verdict_scores) == 3 | |
assert claim_verdict_scores[0] == 1 | |
assert claim_verdict_scores[1] == 0 | |
def test_compute_score(mock_groundedness_metric, mock_data, monkeypatch): | |
monkeypatch.setattr( | |
mock_groundedness_metric, | |
"_get_claim_statements", | |
lambda *args, **kwargs: np.array( | |
[ | |
"This is a test claim 1.", | |
"This is a test claim 2.", | |
] | |
), | |
) | |
monkeypatch.setattr(mock_groundedness_metric, "_get_claim_verdict_scores", lambda *args, **kwargs: np.array([1, 0])) | |
score = mock_groundedness_metric._compute_score(data=mock_data[0]) | |
assert score == 0.5 | |
def test_evaluate(mock_groundedness_metric, mock_data, monkeypatch): | |
monkeypatch.setattr(mock_groundedness_metric, "_compute_score", lambda *args, **kwargs: 0.5) | |
score = mock_groundedness_metric.evaluate(dataset=mock_data) | |
assert score == 0.5 | |