Feature Extraction
sentence-transformers
ONNX
Safetensors
multilingual
bidirectional_pplx_qwen3
sentence-similarity
mteb
custom_code
text-embeddings-inference
Instructions to use perplexity-ai/pplx-embed-v1-4b with libraries, inference providers, notebooks, and local apps. Follow these links to get started.
- Libraries
- sentence-transformers
How to use perplexity-ai/pplx-embed-v1-4b with sentence-transformers:
from sentence_transformers import SentenceTransformer model = SentenceTransformer("perplexity-ai/pplx-embed-v1-4b", trust_remote_code=True) sentences = [ "The weather is lovely today.", "It's so sunny outside!", "He drove to the stadium." ] embeddings = model.encode(sentences) similarities = model.similarity(embeddings, embeddings) print(similarities.shape) # [3, 3] - Notebooks
- Google Colab
- Kaggle
| import torch | |
| from typing import Literal | |
| from sentence_transformers.models import Module | |
| class Quantizer(torch.nn.Module): | |
| def __init__(self, hard: bool = True): | |
| """ | |
| Args: | |
| hard: Whether to use hard or soft quantization. Defaults to True. | |
| """ | |
| super().__init__() | |
| self._hard = hard | |
| def _hard_quantize(self, x, *args, **kwargs) -> torch.Tensor: | |
| raise NotImplementedError | |
| def _soft_quantize(self, x, *args, **kwargs) -> torch.Tensor: | |
| raise NotImplementedError | |
| def forward(self, x, *args, **kwargs) -> torch.Tensor: | |
| soft = self._soft_quantize(x, *args, **kwargs) | |
| if not self._hard: | |
| result = soft | |
| else: | |
| result = ( | |
| self._hard_quantize(x, *args, **kwargs).detach() + soft - soft.detach() | |
| ) | |
| return result | |
| class Int8TanhQuantizer(Quantizer): | |
| def __init__( | |
| self, | |
| hard: bool = True, | |
| ): | |
| super().__init__(hard=hard) | |
| self.qmin = -128 | |
| self.qmax = 127 | |
| def _soft_quantize(self, x, *args, **kwargs): | |
| return torch.tanh(x) | |
| def _hard_quantize(self, x, *args, **kwargs): | |
| soft = self._soft_quantize(x) | |
| int_x = torch.round(soft * self.qmax) | |
| int_x = torch.clamp(int_x, self.qmin, self.qmax) | |
| return int_x | |
| class BinaryTanhQuantizer(Quantizer): | |
| def __init__( | |
| self, | |
| hard: bool = True, | |
| scale: float = 1.0, | |
| ): | |
| super().__init__(hard) | |
| self._scale = scale | |
| def _soft_quantize(self, x, *args, **kwargs): | |
| return torch.tanh(self._scale * x) | |
| def _hard_quantize(self, x, *args, **kwargs): | |
| return torch.where(x >= 0, 1.0, -1.0) | |
| class FlexibleQuantizer(Module): | |
| def __init__(self): | |
| super().__init__() | |
| self._int8_quantizer = Int8TanhQuantizer() | |
| self._binary_quantizer = BinaryTanhQuantizer() | |
| def forward( | |
| self, | |
| features: dict[str, torch.Tensor], | |
| quantization: Literal["binary", "int8"] = "int8", | |
| **kwargs | |
| ) -> dict[str, torch.Tensor]: | |
| if quantization == "int8": | |
| features["sentence_embedding"] = self._int8_quantizer( | |
| features["sentence_embedding"] | |
| ) | |
| elif quantization == "binary": | |
| features["sentence_embedding"] = self._binary_quantizer( | |
| features["sentence_embedding"] | |
| ) | |
| else: | |
| raise ValueError( | |
| f"Invalid quantization type: {quantization}. Must be 'binary' or 'int8'." | |
| ) | |
| return features | |
| def load( | |
| cls, | |
| model_name_or_path: str, | |
| subfolder: str = "", | |
| token: bool | str | None = None, | |
| cache_folder: str | None = None, | |
| revision: str | None = None, | |
| local_files_only: bool = False, | |
| **kwargs, | |
| ): | |
| return cls() | |
| def save(self, output_path: str, *args, **kwargs) -> None: | |
| return | |