Spaces:
Runtime error
Runtime error
File size: 5,215 Bytes
129cd69 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 |
"""Evaluators for parsing strings."""
from operator import eq
from typing import Any, Callable, Optional, Union, cast
from langchain.evaluation.schema import StringEvaluator
from langchain.output_parsers.json import parse_json_markdown
class JsonValidityEvaluator(StringEvaluator):
"""Evaluates whether the prediction is valid JSON.
This evaluator checks if the prediction is a valid JSON string. It does not
require any input or reference.
Attributes:
requires_input (bool): Whether this evaluator requires an input
string. Always False.
requires_reference (bool): Whether this evaluator requires a
reference string. Always False.
evaluation_name (str): The name of the evaluation metric.
Always "json".
Examples:
>>> evaluator = JsonValidityEvaluator()
>>> prediction = '{"name": "John", "age": 30, "city": "New York"}'
>>> evaluator.evaluate(prediction)
{'score': 1}
>>> prediction = '{"name": "John", "age": 30, "city": "New York",}'
>>> evaluator.evaluate(prediction)
{'score': 0, 'reasoning': 'Expecting property name enclosed in double quotes'}
"""
def __init__(self, **kwargs: Any) -> None:
super().__init__()
@property
def requires_input(self) -> bool:
return False
@property
def requires_reference(self) -> bool:
return False
@property
def evaluation_name(self) -> str:
return "json_validity"
def _evaluate_strings(
self,
prediction: str,
input: Optional[str] = None,
reference: Optional[str] = None,
**kwargs: Any,
) -> dict:
"""Evaluate the prediction string.
Args:
prediction (str): The prediction string to evaluate.
input (str, optional): Not used in this evaluator. Defaults to None.
reference (str, optional): Not used in this evaluator. Defaults to None.
Returns:
dict: A dictionary containing the evaluation score. The score is 1 if
the prediction is valid JSON, and 0 otherwise.
If the prediction is not valid JSON, the dictionary also contains
a "reasoning" field with the error message.
"""
try:
parse_json_markdown(prediction)
return {"score": 1}
except Exception as e:
return {"score": 0, "reasoning": str(e)}
class JsonEqualityEvaluator(StringEvaluator):
"""Evaluates whether the prediction is equal to the reference after
parsing both as JSON.
This evaluator checks if the prediction, after parsing as JSON, is equal
to the reference,
which is also parsed as JSON. It does not require an input string.
Attributes:
requires_input (bool): Whether this evaluator requires an
input string. Always False.
requires_reference (bool): Whether this evaluator requires
a reference string. Always True.
evaluation_name (str): The name of the evaluation metric.
Always "parsed_equality".
Examples:
>>> evaluator = JsonEqualityEvaluator()
>>> evaluator.evaluate_strings('{"a": 1}', reference='{"a": 1}')
{'score': True}
>>> evaluator.evaluate_strings('{"a": 1}', reference='{"a": 2}')
{'score': False}
>>> evaluator = JsonEqualityEvaluator(operator=lambda x, y: x['a'] == y['a'])
>>> evaluator.evaluate_strings('{"a": 1}', reference='{"a": 1}')
{'score': True}
>>> evaluator.evaluate_strings('{"a": 1}', reference='{"a": 2}')
{'score': False}
"""
def __init__(self, operator: Optional[Callable] = None, **kwargs: Any) -> None:
super().__init__()
self.operator = operator or eq
@property
def requires_input(self) -> bool:
return False
@property
def requires_reference(self) -> bool:
return True
@property
def evaluation_name(self) -> str:
return "json_equality"
def _parse_json(
self,
string: Any,
) -> Union[dict, list, None, float, bool, int, str]:
if isinstance(string, str):
return parse_json_markdown(string)
return string
def _evaluate_strings(
self,
prediction: str,
input: Optional[str] = None,
reference: Optional[str] = None,
**kwargs: Any,
) -> dict:
"""Evaluate the prediction string.
Args:
prediction (str): The prediction string to evaluate.
input (str, optional): Not used in this evaluator.
reference (str): The reference string to compare against.
Returns:
dict: A dictionary containing the evaluation score.
"""
parsed = self._parse_json(prediction)
label = self._parse_json(cast(str, reference))
if isinstance(label, list):
if not isinstance(parsed, list):
return {"score": 0}
parsed = sorted(parsed, key=lambda x: str(x))
label = sorted(label, key=lambda x: str(x))
return {"score": self.operator(parsed, label)}
|