Spaces:
Runtime error
Runtime error
"""A chain for comparing the output of two models using embeddings.""" | |
from enum import Enum | |
from typing import Any, Dict, List, Optional | |
import numpy as np | |
from langchain_core.embeddings import Embeddings | |
from langchain_core.pydantic_v1 import Field, root_validator | |
from langchain.callbacks.manager import ( | |
AsyncCallbackManagerForChainRun, | |
CallbackManagerForChainRun, | |
Callbacks, | |
) | |
from langchain.chains.base import Chain | |
from langchain.embeddings.openai import OpenAIEmbeddings | |
from langchain.evaluation.schema import PairwiseStringEvaluator, StringEvaluator | |
from langchain.schema import RUN_KEY | |
from langchain.utils.math import cosine_similarity | |
class EmbeddingDistance(str, Enum): | |
"""Embedding Distance Metric. | |
Attributes: | |
COSINE: Cosine distance metric. | |
EUCLIDEAN: Euclidean distance metric. | |
MANHATTAN: Manhattan distance metric. | |
CHEBYSHEV: Chebyshev distance metric. | |
HAMMING: Hamming distance metric. | |
""" | |
COSINE = "cosine" | |
EUCLIDEAN = "euclidean" | |
MANHATTAN = "manhattan" | |
CHEBYSHEV = "chebyshev" | |
HAMMING = "hamming" | |
class _EmbeddingDistanceChainMixin(Chain): | |
"""Shared functionality for embedding distance evaluators. | |
Attributes: | |
embeddings (Embeddings): The embedding objects to vectorize the outputs. | |
distance_metric (EmbeddingDistance): The distance metric to use | |
for comparing the embeddings. | |
""" | |
embeddings: Embeddings = Field(default_factory=OpenAIEmbeddings) | |
distance_metric: EmbeddingDistance = Field(default=EmbeddingDistance.COSINE) | |
def _validate_tiktoken_installed(cls, values: Dict[str, Any]) -> Dict[str, Any]: | |
"""Validate that the TikTok library is installed. | |
Args: | |
values (Dict[str, Any]): The values to validate. | |
Returns: | |
Dict[str, Any]: The validated values. | |
""" | |
embeddings = values.get("embeddings") | |
if isinstance(embeddings, OpenAIEmbeddings): | |
try: | |
import tiktoken # noqa: F401 | |
except ImportError: | |
raise ImportError( | |
"The tiktoken library is required to use the default " | |
"OpenAI embeddings with embedding distance evaluators." | |
" Please either manually select a different Embeddings object" | |
" or install tiktoken using `pip install tiktoken`." | |
) | |
return values | |
class Config: | |
"""Permit embeddings to go unvalidated.""" | |
arbitrary_types_allowed: bool = True | |
def output_keys(self) -> List[str]: | |
"""Return the output keys of the chain. | |
Returns: | |
List[str]: The output keys. | |
""" | |
return ["score"] | |
def _prepare_output(self, result: dict) -> dict: | |
parsed = {"score": result["score"]} | |
if RUN_KEY in result: | |
parsed[RUN_KEY] = result[RUN_KEY] | |
return parsed | |
def _get_metric(self, metric: EmbeddingDistance) -> Any: | |
"""Get the metric function for the given metric name. | |
Args: | |
metric (EmbeddingDistance): The metric name. | |
Returns: | |
Any: The metric function. | |
""" | |
metrics = { | |
EmbeddingDistance.COSINE: self._cosine_distance, | |
EmbeddingDistance.EUCLIDEAN: self._euclidean_distance, | |
EmbeddingDistance.MANHATTAN: self._manhattan_distance, | |
EmbeddingDistance.CHEBYSHEV: self._chebyshev_distance, | |
EmbeddingDistance.HAMMING: self._hamming_distance, | |
} | |
if metric in metrics: | |
return metrics[metric] | |
else: | |
raise ValueError(f"Invalid metric: {metric}") | |
def _cosine_distance(a: np.ndarray, b: np.ndarray) -> np.ndarray: | |
"""Compute the cosine distance between two vectors. | |
Args: | |
a (np.ndarray): The first vector. | |
b (np.ndarray): The second vector. | |
Returns: | |
np.ndarray: The cosine distance. | |
""" | |
return 1.0 - cosine_similarity(a, b) | |
def _euclidean_distance(a: np.ndarray, b: np.ndarray) -> np.floating: | |
"""Compute the Euclidean distance between two vectors. | |
Args: | |
a (np.ndarray): The first vector. | |
b (np.ndarray): The second vector. | |
Returns: | |
np.floating: The Euclidean distance. | |
""" | |
return np.linalg.norm(a - b) | |
def _manhattan_distance(a: np.ndarray, b: np.ndarray) -> np.floating: | |
"""Compute the Manhattan distance between two vectors. | |
Args: | |
a (np.ndarray): The first vector. | |
b (np.ndarray): The second vector. | |
Returns: | |
np.floating: The Manhattan distance. | |
""" | |
return np.sum(np.abs(a - b)) | |
def _chebyshev_distance(a: np.ndarray, b: np.ndarray) -> np.floating: | |
"""Compute the Chebyshev distance between two vectors. | |
Args: | |
a (np.ndarray): The first vector. | |
b (np.ndarray): The second vector. | |
Returns: | |
np.floating: The Chebyshev distance. | |
""" | |
return np.max(np.abs(a - b)) | |
def _hamming_distance(a: np.ndarray, b: np.ndarray) -> np.floating: | |
"""Compute the Hamming distance between two vectors. | |
Args: | |
a (np.ndarray): The first vector. | |
b (np.ndarray): The second vector. | |
Returns: | |
np.floating: The Hamming distance. | |
""" | |
return np.mean(a != b) | |
def _compute_score(self, vectors: np.ndarray) -> float: | |
"""Compute the score based on the distance metric. | |
Args: | |
vectors (np.ndarray): The input vectors. | |
Returns: | |
float: The computed score. | |
""" | |
metric = self._get_metric(self.distance_metric) | |
score = metric(vectors[0].reshape(1, -1), vectors[1].reshape(1, -1)).item() | |
return score | |
class EmbeddingDistanceEvalChain(_EmbeddingDistanceChainMixin, StringEvaluator): | |
"""Use embedding distances to score semantic difference between | |
a prediction and reference. | |
Examples: | |
>>> chain = EmbeddingDistanceEvalChain() | |
>>> result = chain.evaluate_strings(prediction="Hello", reference="Hi") | |
>>> print(result) | |
{'score': 0.5} | |
""" | |
def requires_reference(self) -> bool: | |
"""Return whether the chain requires a reference. | |
Returns: | |
bool: True if a reference is required, False otherwise. | |
""" | |
return True | |
def evaluation_name(self) -> str: | |
return f"embedding_{self.distance_metric.value}_distance" | |
def input_keys(self) -> List[str]: | |
"""Return the input keys of the chain. | |
Returns: | |
List[str]: The input keys. | |
""" | |
return ["prediction", "reference"] | |
def _call( | |
self, | |
inputs: Dict[str, Any], | |
run_manager: Optional[CallbackManagerForChainRun] = None, | |
) -> Dict[str, Any]: | |
"""Compute the score for a prediction and reference. | |
Args: | |
inputs (Dict[str, Any]): The input data. | |
run_manager (Optional[CallbackManagerForChainRun], optional): | |
The callback manager. | |
Returns: | |
Dict[str, Any]: The computed score. | |
""" | |
vectors = np.array( | |
self.embeddings.embed_documents([inputs["prediction"], inputs["reference"]]) | |
) | |
score = self._compute_score(vectors) | |
return {"score": score} | |
async def _acall( | |
self, | |
inputs: Dict[str, Any], | |
run_manager: Optional[AsyncCallbackManagerForChainRun] = None, | |
) -> Dict[str, Any]: | |
"""Asynchronously compute the score for a prediction and reference. | |
Args: | |
inputs (Dict[str, Any]): The input data. | |
run_manager (AsyncCallbackManagerForChainRun, optional): | |
The callback manager. | |
Returns: | |
Dict[str, Any]: The computed score. | |
""" | |
embedded = await self.embeddings.aembed_documents( | |
[inputs["prediction"], inputs["reference"]] | |
) | |
vectors = np.array(embedded) | |
score = self._compute_score(vectors) | |
return {"score": score} | |
def _evaluate_strings( | |
self, | |
*, | |
prediction: str, | |
reference: Optional[str] = None, | |
callbacks: Callbacks = None, | |
tags: Optional[List[str]] = None, | |
metadata: Optional[Dict[str, Any]] = None, | |
include_run_info: bool = False, | |
**kwargs: Any, | |
) -> dict: | |
"""Evaluate the embedding distance between a prediction and | |
reference. | |
Args: | |
prediction (str): The output string from the first model. | |
reference (str): The reference string (required) | |
callbacks (Callbacks, optional): The callbacks to use. | |
**kwargs (Any): Additional keyword arguments. | |
Returns: | |
dict: A dictionary containing: | |
- score: The embedding distance between the two | |
predictions. | |
""" | |
result = self( | |
inputs={"prediction": prediction, "reference": reference}, | |
callbacks=callbacks, | |
tags=tags, | |
metadata=metadata, | |
include_run_info=include_run_info, | |
) | |
return self._prepare_output(result) | |
async def _aevaluate_strings( | |
self, | |
*, | |
prediction: str, | |
reference: Optional[str] = None, | |
callbacks: Callbacks = None, | |
tags: Optional[List[str]] = None, | |
metadata: Optional[Dict[str, Any]] = None, | |
include_run_info: bool = False, | |
**kwargs: Any, | |
) -> dict: | |
"""Asynchronously evaluate the embedding distance between | |
a prediction and reference. | |
Args: | |
prediction (str): The output string from the first model. | |
reference (str): The output string from the second model. | |
callbacks (Callbacks, optional): The callbacks to use. | |
**kwargs (Any): Additional keyword arguments. | |
Returns: | |
dict: A dictionary containing: | |
- score: The embedding distance between the two | |
predictions. | |
""" | |
result = await self.acall( | |
inputs={"prediction": prediction, "reference": reference}, | |
callbacks=callbacks, | |
tags=tags, | |
metadata=metadata, | |
include_run_info=include_run_info, | |
) | |
return self._prepare_output(result) | |
class PairwiseEmbeddingDistanceEvalChain( | |
_EmbeddingDistanceChainMixin, PairwiseStringEvaluator | |
): | |
"""Use embedding distances to score semantic difference between two predictions. | |
Examples: | |
>>> chain = PairwiseEmbeddingDistanceEvalChain() | |
>>> result = chain.evaluate_string_pairs(prediction="Hello", prediction_b="Hi") | |
>>> print(result) | |
{'score': 0.5} | |
""" | |
def input_keys(self) -> List[str]: | |
"""Return the input keys of the chain. | |
Returns: | |
List[str]: The input keys. | |
""" | |
return ["prediction", "prediction_b"] | |
def evaluation_name(self) -> str: | |
return f"pairwise_embedding_{self.distance_metric.value}_distance" | |
def _call( | |
self, | |
inputs: Dict[str, Any], | |
run_manager: Optional[CallbackManagerForChainRun] = None, | |
) -> Dict[str, Any]: | |
"""Compute the score for two predictions. | |
Args: | |
inputs (Dict[str, Any]): The input data. | |
run_manager (CallbackManagerForChainRun, optional): | |
The callback manager. | |
Returns: | |
Dict[str, Any]: The computed score. | |
""" | |
vectors = np.array( | |
self.embeddings.embed_documents( | |
[inputs["prediction"], inputs["prediction_b"]] | |
) | |
) | |
score = self._compute_score(vectors) | |
return {"score": score} | |
async def _acall( | |
self, | |
inputs: Dict[str, Any], | |
run_manager: Optional[AsyncCallbackManagerForChainRun] = None, | |
) -> Dict[str, Any]: | |
"""Asynchronously compute the score for two predictions. | |
Args: | |
inputs (Dict[str, Any]): The input data. | |
run_manager (AsyncCallbackManagerForChainRun, optional): | |
The callback manager. | |
Returns: | |
Dict[str, Any]: The computed score. | |
""" | |
embedded = await self.embeddings.aembed_documents( | |
[inputs["prediction"], inputs["prediction_b"]] | |
) | |
vectors = np.array(embedded) | |
score = self._compute_score(vectors) | |
return {"score": score} | |
def _evaluate_string_pairs( | |
self, | |
*, | |
prediction: str, | |
prediction_b: str, | |
callbacks: Callbacks = None, | |
tags: Optional[List[str]] = None, | |
metadata: Optional[Dict[str, Any]] = None, | |
include_run_info: bool = False, | |
**kwargs: Any, | |
) -> dict: | |
"""Evaluate the embedding distance between two predictions. | |
Args: | |
prediction (str): The output string from the first model. | |
prediction_b (str): The output string from the second model. | |
callbacks (Callbacks, optional): The callbacks to use. | |
tags (List[str], optional): Tags to apply to traces | |
metadata (Dict[str, Any], optional): metadata to apply to | |
**kwargs (Any): Additional keyword arguments. | |
Returns: | |
dict: A dictionary containing: | |
- score: The embedding distance between the two | |
predictions. | |
""" | |
result = self( | |
inputs={"prediction": prediction, "prediction_b": prediction_b}, | |
callbacks=callbacks, | |
tags=tags, | |
metadata=metadata, | |
include_run_info=include_run_info, | |
) | |
return self._prepare_output(result) | |
async def _aevaluate_string_pairs( | |
self, | |
*, | |
prediction: str, | |
prediction_b: str, | |
callbacks: Callbacks = None, | |
tags: Optional[List[str]] = None, | |
metadata: Optional[Dict[str, Any]] = None, | |
include_run_info: bool = False, | |
**kwargs: Any, | |
) -> dict: | |
"""Asynchronously evaluate the embedding distance | |
between two predictions. | |
Args: | |
prediction (str): The output string from the first model. | |
prediction_b (str): The output string from the second model. | |
callbacks (Callbacks, optional): The callbacks to use. | |
tags (List[str], optional): Tags to apply to traces | |
metadata (Dict[str, Any], optional): metadata to apply to traces | |
**kwargs (Any): Additional keyword arguments. | |
Returns: | |
dict: A dictionary containing: | |
- score: The embedding distance between the two | |
predictions. | |
""" | |
result = await self.acall( | |
inputs={"prediction": prediction, "prediction_b": prediction_b}, | |
callbacks=callbacks, | |
tags=tags, | |
metadata=metadata, | |
include_run_info=include_run_info, | |
) | |
return self._prepare_output(result) | |