embedchain / tests /evaluation /test_groundedness_metric.py
rajesh1501's picture
Upload folder using huggingface_hub
a85c9b8 verified
import numpy as np
import pytest
from embedchain.config.evaluation.base import GroundednessConfig
from embedchain.evaluation.metrics import Groundedness
from embedchain.utils.evaluation import EvalData, EvalMetric
@pytest.fixture
def mock_data():
return [
EvalData(
contexts=[
"This is a test context 1.",
],
question="This is a test question 1.",
answer="This is a test answer 1.",
),
EvalData(
contexts=[
"This is a test context 2-1.",
"This is a test context 2-2.",
],
question="This is a test question 2.",
answer="This is a test answer 2.",
),
]
@pytest.fixture
def mock_groundedness_metric(monkeypatch):
monkeypatch.setenv("OPENAI_API_KEY", "test_api_key")
metric = Groundedness()
return metric
def test_groundedness_init(monkeypatch):
monkeypatch.setenv("OPENAI_API_KEY", "test_api_key")
metric = Groundedness()
assert metric.name == EvalMetric.GROUNDEDNESS.value
assert metric.config.model == "gpt-4"
assert metric.config.api_key is None
monkeypatch.delenv("OPENAI_API_KEY")
def test_groundedness_init_with_config():
metric = Groundedness(config=GroundednessConfig(api_key="test_api_key"))
assert metric.name == EvalMetric.GROUNDEDNESS.value
assert metric.config.model == "gpt-4"
assert metric.config.api_key == "test_api_key"
def test_groundedness_init_without_api_key(monkeypatch):
monkeypatch.delenv("OPENAI_API_KEY", raising=False)
with pytest.raises(ValueError):
Groundedness()
def test_generate_answer_claim_prompt(mock_groundedness_metric, mock_data):
prompt = mock_groundedness_metric._generate_answer_claim_prompt(data=mock_data[0])
assert "This is a test question 1." in prompt
assert "This is a test answer 1." in prompt
def test_get_claim_statements(mock_groundedness_metric, mock_data, monkeypatch):
monkeypatch.setattr(
mock_groundedness_metric.client.chat.completions,
"create",
lambda *args, **kwargs: type(
"obj",
(object,),
{
"choices": [
type(
"obj",
(object,),
{
"message": type(
"obj",
(object,),
{
"content": """This is a test answer 1.
This is a test answer 2.
This is a test answer 3."""
},
)
},
)
]
},
)(),
)
prompt = mock_groundedness_metric._generate_answer_claim_prompt(data=mock_data[0])
claim_statements = mock_groundedness_metric._get_claim_statements(prompt=prompt)
assert len(claim_statements) == 3
assert "This is a test answer 1." in claim_statements
def test_generate_claim_inference_prompt(mock_groundedness_metric, mock_data):
prompt = mock_groundedness_metric._generate_answer_claim_prompt(data=mock_data[0])
claim_statements = [
"This is a test claim 1.",
"This is a test claim 2.",
]
prompt = mock_groundedness_metric._generate_claim_inference_prompt(
data=mock_data[0], claim_statements=claim_statements
)
assert "This is a test context 1." in prompt
assert "This is a test claim 1." in prompt
def test_get_claim_verdict_scores(mock_groundedness_metric, mock_data, monkeypatch):
monkeypatch.setattr(
mock_groundedness_metric.client.chat.completions,
"create",
lambda *args, **kwargs: type(
"obj",
(object,),
{"choices": [type("obj", (object,), {"message": type("obj", (object,), {"content": "1\n0\n-1"})})]},
)(),
)
prompt = mock_groundedness_metric._generate_answer_claim_prompt(data=mock_data[0])
claim_statements = mock_groundedness_metric._get_claim_statements(prompt=prompt)
prompt = mock_groundedness_metric._generate_claim_inference_prompt(
data=mock_data[0], claim_statements=claim_statements
)
claim_verdict_scores = mock_groundedness_metric._get_claim_verdict_scores(prompt=prompt)
assert len(claim_verdict_scores) == 3
assert claim_verdict_scores[0] == 1
assert claim_verdict_scores[1] == 0
def test_compute_score(mock_groundedness_metric, mock_data, monkeypatch):
monkeypatch.setattr(
mock_groundedness_metric,
"_get_claim_statements",
lambda *args, **kwargs: np.array(
[
"This is a test claim 1.",
"This is a test claim 2.",
]
),
)
monkeypatch.setattr(mock_groundedness_metric, "_get_claim_verdict_scores", lambda *args, **kwargs: np.array([1, 0]))
score = mock_groundedness_metric._compute_score(data=mock_data[0])
assert score == 0.5
def test_evaluate(mock_groundedness_metric, mock_data, monkeypatch):
monkeypatch.setattr(mock_groundedness_metric, "_compute_score", lambda *args, **kwargs: 0.5)
score = mock_groundedness_metric.evaluate(dataset=mock_data)
assert score == 0.5