Spaces:

rajesh1501
/

embedchain

No application file

App Files Files Community

embedchain / tests /evaluation /test_groundedness_metric.py

rajesh1501

Upload folder using huggingface_hub

a85c9b8 verified over 1 year ago

raw

history blame contribute delete

5.49 kB

	import numpy as np
	import pytest

	from embedchain.config.evaluation.base import GroundednessConfig
	from embedchain.evaluation.metrics import Groundedness
	from embedchain.utils.evaluation import EvalData, EvalMetric


	@pytest.fixture
	def mock_data():
	return [
	EvalData(
	contexts=[
	"This is a test context 1.",
	],
	question="This is a test question 1.",
	answer="This is a test answer 1.",
	),
	EvalData(
	contexts=[
	"This is a test context 2-1.",
	"This is a test context 2-2.",
	],
	question="This is a test question 2.",
	answer="This is a test answer 2.",
	),
	]


	@pytest.fixture
	def mock_groundedness_metric(monkeypatch):
	monkeypatch.setenv("OPENAI_API_KEY", "test_api_key")
	metric = Groundedness()
	return metric


	def test_groundedness_init(monkeypatch):
	monkeypatch.setenv("OPENAI_API_KEY", "test_api_key")
	metric = Groundedness()
	assert metric.name == EvalMetric.GROUNDEDNESS.value
	assert metric.config.model == "gpt-4"
	assert metric.config.api_key is None
	monkeypatch.delenv("OPENAI_API_KEY")


	def test_groundedness_init_with_config():
	metric = Groundedness(config=GroundednessConfig(api_key="test_api_key"))
	assert metric.name == EvalMetric.GROUNDEDNESS.value
	assert metric.config.model == "gpt-4"
	assert metric.config.api_key == "test_api_key"


	def test_groundedness_init_without_api_key(monkeypatch):
	monkeypatch.delenv("OPENAI_API_KEY", raising=False)
	with pytest.raises(ValueError):
	Groundedness()


	def test_generate_answer_claim_prompt(mock_groundedness_metric, mock_data):
	prompt = mock_groundedness_metric._generate_answer_claim_prompt(data=mock_data[0])
	assert "This is a test question 1." in prompt
	assert "This is a test answer 1." in prompt


	def test_get_claim_statements(mock_groundedness_metric, mock_data, monkeypatch):
	monkeypatch.setattr(
	mock_groundedness_metric.client.chat.completions,
	"create",
	lambda args, *kwargs: type(
	"obj",
	(object,),
	{
	"choices": [
	type(
	"obj",
	(object,),
	{
	"message": type(
	"obj",
	(object,),
	{
	"content": """This is a test answer 1.
	This is a test answer 2.
	This is a test answer 3."""
	},
	)
	},
	)
	]
	},
	)(),
	)
	prompt = mock_groundedness_metric._generate_answer_claim_prompt(data=mock_data[0])
	claim_statements = mock_groundedness_metric._get_claim_statements(prompt=prompt)
	assert len(claim_statements) == 3
	assert "This is a test answer 1." in claim_statements


	def test_generate_claim_inference_prompt(mock_groundedness_metric, mock_data):
	prompt = mock_groundedness_metric._generate_answer_claim_prompt(data=mock_data[0])
	claim_statements = [
	"This is a test claim 1.",
	"This is a test claim 2.",
	]
	prompt = mock_groundedness_metric._generate_claim_inference_prompt(
	data=mock_data[0], claim_statements=claim_statements
	)
	assert "This is a test context 1." in prompt
	assert "This is a test claim 1." in prompt


	def test_get_claim_verdict_scores(mock_groundedness_metric, mock_data, monkeypatch):
	monkeypatch.setattr(
	mock_groundedness_metric.client.chat.completions,
	"create",
	lambda args, *kwargs: type(
	"obj",
	(object,),
	{"choices": [type("obj", (object,), {"message": type("obj", (object,), {"content": "1\n0\n-1"})})]},
	)(),
	)
	prompt = mock_groundedness_metric._generate_answer_claim_prompt(data=mock_data[0])
	claim_statements = mock_groundedness_metric._get_claim_statements(prompt=prompt)
	prompt = mock_groundedness_metric._generate_claim_inference_prompt(
	data=mock_data[0], claim_statements=claim_statements
	)
	claim_verdict_scores = mock_groundedness_metric._get_claim_verdict_scores(prompt=prompt)
	assert len(claim_verdict_scores) == 3
	assert claim_verdict_scores[0] == 1
	assert claim_verdict_scores[1] == 0


	def test_compute_score(mock_groundedness_metric, mock_data, monkeypatch):
	monkeypatch.setattr(
	mock_groundedness_metric,
	"_get_claim_statements",
	lambda args, *kwargs: np.array(
	[
	"This is a test claim 1.",
	"This is a test claim 2.",
	]
	),
	)
	monkeypatch.setattr(mock_groundedness_metric, "_get_claim_verdict_scores", lambda args, *kwargs: np.array([1, 0]))
	score = mock_groundedness_metric._compute_score(data=mock_data[0])
	assert score == 0.5


	def test_evaluate(mock_groundedness_metric, mock_data, monkeypatch):
	monkeypatch.setattr(mock_groundedness_metric, "_compute_score", lambda args, *kwargs: 0.5)
	score = mock_groundedness_metric.evaluate(dataset=mock_data)
	assert score == 0.5