Spaces:
Runtime error
Runtime error
File size: 6,692 Bytes
58d33f0 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 |
"""Wrapper around HuggingFace embedding models for self-hosted remote hardware."""
import importlib
import logging
from typing import Any, Callable, List, Optional
from pydantic import BaseModel
from langchain.embeddings.self_hosted import SelfHostedEmbeddings
DEFAULT_MODEL_NAME = "sentence-transformers/all-mpnet-base-v2"
DEFAULT_INSTRUCT_MODEL = "hkunlp/instructor-large"
DEFAULT_EMBED_INSTRUCTION = "Represent the document for retrieval: "
DEFAULT_QUERY_INSTRUCTION = (
"Represent the question for retrieving supporting documents: "
)
logger = logging.getLogger(__name__)
def _embed_documents(client: Any, *args: Any, **kwargs: Any) -> List[List[float]]:
"""Inference function to send to the remote hardware.
Accepts a sentence_transformer model_id and
returns a list of embeddings for each document in the batch.
"""
return client.encode(*args, **kwargs)
def load_embedding_model(model_id: str, instruct: bool = False, device: int = 0) -> Any:
"""Load the embedding model."""
if not instruct:
import sentence_transformers
client = sentence_transformers.SentenceTransformer(model_id)
else:
from InstructorEmbedding import INSTRUCTOR
client = INSTRUCTOR(model_id)
if importlib.util.find_spec("torch") is not None:
import torch
cuda_device_count = torch.cuda.device_count()
if device < -1 or (device >= cuda_device_count):
raise ValueError(
f"Got device=={device}, "
f"device is required to be within [-1, {cuda_device_count})"
)
if device < 0 and cuda_device_count > 0:
logger.warning(
"Device has %d GPUs available. "
"Provide device={deviceId} to `from_model_id` to use available"
"GPUs for execution. deviceId is -1 for CPU and "
"can be a positive integer associated with CUDA device id.",
cuda_device_count,
)
client = client.to(device)
return client
class SelfHostedHuggingFaceEmbeddings(SelfHostedEmbeddings, BaseModel):
"""Runs sentence_transformers embedding models on self-hosted remote hardware.
Supported hardware includes auto-launched instances on AWS, GCP, Azure,
and Lambda, as well as servers specified
by IP address and SSH credentials (such as on-prem, or another cloud
like Paperspace, Coreweave, etc.).
To use, you should have the ``runhouse`` python package installed.
Example:
.. code-block:: python
from langchain.embeddings import SelfHostedHuggingFaceEmbeddings
import runhouse as rh
model_name = "sentence-transformers/all-mpnet-base-v2"
gpu = rh.cluster(name="rh-a10x", instance_type="A100:1")
hf = SelfHostedHuggingFaceEmbeddings(model_name=model_name, hardware=gpu)
"""
client: Any #: :meta private:
model_id: str = DEFAULT_MODEL_NAME
"""Model name to use."""
model_reqs: List[str] = ["./", "sentence_transformers", "torch"]
"""Requirements to install on hardware to inference the model."""
hardware: Any
"""Remote hardware to send the inference function to."""
model_load_fn: Callable = load_embedding_model
"""Function to load the model remotely on the server."""
load_fn_kwargs: Optional[dict] = None
"""Key word arguments to pass to the model load function."""
inference_fn: Callable = _embed_documents
"""Inference function to extract the embeddings."""
def __init__(self, **kwargs: Any):
"""Initialize the remote inference function."""
load_fn_kwargs = kwargs.pop("load_fn_kwargs", {})
load_fn_kwargs["model_id"] = load_fn_kwargs.get("model_id", DEFAULT_MODEL_NAME)
load_fn_kwargs["instruct"] = load_fn_kwargs.get("instruct", False)
load_fn_kwargs["device"] = load_fn_kwargs.get("device", 0)
super().__init__(load_fn_kwargs=load_fn_kwargs, **kwargs)
class SelfHostedHuggingFaceInstructEmbeddings(SelfHostedHuggingFaceEmbeddings):
"""Runs InstructorEmbedding embedding models on self-hosted remote hardware.
Supported hardware includes auto-launched instances on AWS, GCP, Azure,
and Lambda, as well as servers specified
by IP address and SSH credentials (such as on-prem, or another
cloud like Paperspace, Coreweave, etc.).
To use, you should have the ``runhouse`` python package installed.
Example:
.. code-block:: python
from langchain.embeddings import SelfHostedHuggingFaceInstructEmbeddings
import runhouse as rh
model_name = "hkunlp/instructor-large"
gpu = rh.cluster(name='rh-a10x', instance_type='A100:1')
hf = SelfHostedHuggingFaceInstructEmbeddings(
model_name=model_name, hardware=gpu)
"""
model_id: str = DEFAULT_INSTRUCT_MODEL
"""Model name to use."""
embed_instruction: str = DEFAULT_EMBED_INSTRUCTION
"""Instruction to use for embedding documents."""
query_instruction: str = DEFAULT_QUERY_INSTRUCTION
"""Instruction to use for embedding query."""
model_reqs: List[str] = ["./", "InstructorEmbedding", "torch"]
"""Requirements to install on hardware to inference the model."""
def __init__(self, **kwargs: Any):
"""Initialize the remote inference function."""
load_fn_kwargs = kwargs.pop("load_fn_kwargs", {})
load_fn_kwargs["model_id"] = load_fn_kwargs.get(
"model_id", DEFAULT_INSTRUCT_MODEL
)
load_fn_kwargs["instruct"] = load_fn_kwargs.get("instruct", True)
load_fn_kwargs["device"] = load_fn_kwargs.get("device", 0)
super().__init__(load_fn_kwargs=load_fn_kwargs, **kwargs)
def embed_documents(self, texts: List[str]) -> List[List[float]]:
"""Compute doc embeddings using a HuggingFace instruct model.
Args:
texts: The list of texts to embed.
Returns:
List of embeddings, one for each text.
"""
instruction_pairs = []
for text in texts:
instruction_pairs.append([self.embed_instruction, text])
embeddings = self.client(self.pipeline_ref, instruction_pairs)
return embeddings.tolist()
def embed_query(self, text: str) -> List[float]:
"""Compute query embeddings using a HuggingFace instruct model.
Args:
text: The text to embed.
Returns:
Embeddings for the text.
"""
instruction_pair = [self.query_instruction, text]
embedding = self.client(self.pipeline_ref, [instruction_pair])[0]
return embedding.tolist()
|