Spaces:

Rom89823974978
/

RAG_Eval

Sleeping

RAG_Eval / evaluation /metrics /composite.py

Resolved issues

4dc151e 4 months ago

1.48 kB

	"""Composite RAG metrics combining retrieval + generation (single‐mapping version)."""

	from __future__ import annotations
	from typing import Mapping
	import math


	def harmonic_mean(scores: Mapping[str, float]) -> float:
	"""Compute the harmonic mean of positive scores."""
	if not scores:
	return 0.0
	if any(v <= 0 for v in scores.values()):
	return 0.0
	else:
	inv_sum = sum(1.0 / (v) for v in scores.values())
	return len(scores) / inv_sum if inv_sum and inv_sum != 0 else 0.0


	def rag_score(
	scores: Mapping[str, float],
	*,
	alpha: float = 0.5,
	) -> float:
	"""
	Combine retrieval & generation sub-scores (0-1) via weighted HM.
	"""
	# Split the incoming flat mapping into two maps: retrieval vs generation
	retr_map: dict[str, float] = {}
	gen_map: dict[str, float] = {}
	for k, v in scores.items():
	if k.startswith("retrieval_"):
	retr_map[k[len("retrieval_"):]] = v
	elif k.startswith("generation_"):
	gen_map[k[len("generation_"):]] = v
	else:
	# ignore any key that doesn't start with 'retrieval_' or 'generation_'
	pass

	# If either side is empty, we cannot score
	if not retr_map or not gen_map:
	return 0.0

	retr_score = harmonic_mean(retr_map)
	gen_score = harmonic_mean(gen_map)

	if retr_score == 0 or gen_score == 0:
	return 0.0

	return 1.0 / (alpha / retr_score + (1 - alpha) / gen_score)