|
"""Evaluators for parsing strings.""" |
|
|
|
import json |
|
from operator import eq |
|
from typing import Any, Callable, Optional, Union, cast |
|
|
|
from langchain_core.utils.json import parse_json_markdown |
|
|
|
from langchain.evaluation.schema import StringEvaluator |
|
|
|
|
|
class JsonValidityEvaluator(StringEvaluator): |
|
"""Evaluate whether the prediction is valid JSON. |
|
|
|
This evaluator checks if the prediction is a valid JSON string. It does not |
|
require any input or reference. |
|
|
|
Attributes: |
|
requires_input (bool): Whether this evaluator requires an input |
|
string. Always False. |
|
requires_reference (bool): Whether this evaluator requires a |
|
reference string. Always False. |
|
evaluation_name (str): The name of the evaluation metric. |
|
Always "json". |
|
|
|
Examples: |
|
>>> evaluator = JsonValidityEvaluator() |
|
>>> prediction = '{"name": "John", "age": 30, "city": "New York"}' |
|
>>> evaluator.evaluate(prediction) |
|
{'score': 1} |
|
|
|
>>> prediction = '{"name": "John", "age": 30, "city": "New York",}' |
|
>>> evaluator.evaluate(prediction) |
|
{'score': 0, 'reasoning': 'Expecting property name enclosed in double quotes'} |
|
""" |
|
|
|
def __init__(self, **kwargs: Any) -> None: |
|
super().__init__() |
|
|
|
@property |
|
def requires_input(self) -> bool: |
|
return False |
|
|
|
@property |
|
def requires_reference(self) -> bool: |
|
return False |
|
|
|
@property |
|
def evaluation_name(self) -> str: |
|
return "json_validity" |
|
|
|
def _evaluate_strings( |
|
self, |
|
prediction: str, |
|
input: Optional[str] = None, |
|
reference: Optional[str] = None, |
|
**kwargs: Any, |
|
) -> dict: |
|
"""Evaluate the prediction string. |
|
|
|
Args: |
|
prediction (str): The prediction string to evaluate. |
|
input (str, optional): Not used in this evaluator. Defaults to None. |
|
reference (str, optional): Not used in this evaluator. Defaults to None. |
|
|
|
Returns: |
|
dict: A dictionary containing the evaluation score. The score is 1 if |
|
the prediction is valid JSON, and 0 otherwise. |
|
If the prediction is not valid JSON, the dictionary also contains |
|
a "reasoning" field with the error message. |
|
|
|
""" |
|
try: |
|
parse_json_markdown(prediction, parser=json.loads) |
|
return {"score": 1} |
|
except Exception as e: |
|
return {"score": 0, "reasoning": str(e)} |
|
|
|
|
|
class JsonEqualityEvaluator(StringEvaluator): |
|
"""Evaluate whether the prediction is equal to the reference after |
|
parsing both as JSON. |
|
|
|
This evaluator checks if the prediction, after parsing as JSON, is equal |
|
to the reference, |
|
which is also parsed as JSON. It does not require an input string. |
|
|
|
Attributes: |
|
requires_input (bool): Whether this evaluator requires an |
|
input string. Always False. |
|
requires_reference (bool): Whether this evaluator requires |
|
a reference string. Always True. |
|
evaluation_name (str): The name of the evaluation metric. |
|
Always "parsed_equality". |
|
|
|
Examples: |
|
>>> evaluator = JsonEqualityEvaluator() |
|
>>> evaluator.evaluate_strings('{"a": 1}', reference='{"a": 1}') |
|
{'score': True} |
|
>>> evaluator.evaluate_strings('{"a": 1}', reference='{"a": 2}') |
|
{'score': False} |
|
|
|
>>> evaluator = JsonEqualityEvaluator(operator=lambda x, y: x['a'] == y['a']) |
|
>>> evaluator.evaluate_strings('{"a": 1}', reference='{"a": 1}') |
|
{'score': True} |
|
>>> evaluator.evaluate_strings('{"a": 1}', reference='{"a": 2}') |
|
{'score': False} |
|
|
|
""" |
|
|
|
def __init__(self, operator: Optional[Callable] = None, **kwargs: Any) -> None: |
|
super().__init__() |
|
self.operator = operator or eq |
|
|
|
@property |
|
def requires_input(self) -> bool: |
|
return False |
|
|
|
@property |
|
def requires_reference(self) -> bool: |
|
return True |
|
|
|
@property |
|
def evaluation_name(self) -> str: |
|
return "json_equality" |
|
|
|
def _parse_json( |
|
self, |
|
string: Any, |
|
) -> Union[dict, list, None, float, bool, int, str]: |
|
if isinstance(string, str): |
|
return parse_json_markdown(string) |
|
return string |
|
|
|
def _evaluate_strings( |
|
self, |
|
prediction: str, |
|
input: Optional[str] = None, |
|
reference: Optional[str] = None, |
|
**kwargs: Any, |
|
) -> dict: |
|
"""Evaluate the prediction string. |
|
|
|
Args: |
|
prediction (str): The prediction string to evaluate. |
|
input (str, optional): Not used in this evaluator. |
|
reference (str): The reference string to compare against. |
|
|
|
Returns: |
|
dict: A dictionary containing the evaluation score. |
|
""" |
|
parsed = self._parse_json(prediction) |
|
label = self._parse_json(cast(str, reference)) |
|
if isinstance(label, list): |
|
if not isinstance(parsed, list): |
|
return {"score": 0} |
|
parsed = sorted(parsed, key=lambda x: str(x)) |
|
label = sorted(label, key=lambda x: str(x)) |
|
return {"score": self.operator(parsed, label)} |
|
|